Add line numbers and removed unneeded dependency

This commit is contained in:
Gonçalo Valério 2022-03-29 21:39:03 +01:00
parent 373ed13ef5
commit 37d0217708
Signed by: dethos
GPG Key ID: DF557F2BDCC2445E
6 changed files with 92 additions and 110 deletions

View File

@ -1,4 +1,4 @@
from .lib import parse
__version__ = "0.0.3"
__version__ = "0.0.4"
__all__ = ["parse"]

View File

@ -5,7 +5,8 @@ this library. It makes use of the same tools available for those that
install the package in order to be used programatically.
"""
from typing import List
import requests
from urllib.request import urlopen, Request
from urllib.error import URLError
import argparse
import inlinehashes
import json
@ -16,20 +17,18 @@ def build_output(
) -> str:
"""Build a JSON output from a list of Inline objects."""
snippet = "content" if full else "short_content"
out = [{"content": getattr(i, snippet), "hash": getattr(i, alg)} for i in inlines]
out = [
{
"content": getattr(i, snippet),
"hash": getattr(i, alg),
"line": i.line,
"position": i.position,
}
for i in inlines
]
return json.dumps(out, indent=2)
def write_to_file(path: str, content: str) -> None:
"""Writes the content to the specified file.
raises:
OSError: More than one subclass of OSError
"""
with open(path, "w") as f:
f.write(content)
def run_cli() -> None:
"""Entry point of the command line interface."""
parser = argparse.ArgumentParser()
@ -47,28 +46,35 @@ def run_cli() -> None:
help="Include full content in the output",
action="store_true",
)
parser.add_argument("-o", "--output", help="Store output in a file.")
parser.add_argument(
"-t",
"--target",
help="Target inline content to look for",
default="all",
choices=["all", "scripts", "styles"],
)
args = parser.parse_args()
path = args.source
target = args.target
try:
if path.startswith("http://") or path.startswith("https://"):
response = requests.get(path)
response.raise_for_status()
content = response.text
req = Request(
path,
headers={"User-Agent": f"Inlinehashes[{inlinehashes.__version__}]"},
)
with urlopen(req) as response:
content = response.read()
else:
with open(path, "r") as f:
content = f.read()
except (requests.RequestException, OSError):
print(f"Invalid source: {path}")
except (URLError, OSError) as error:
print(error)
print(f"Failed to get source: {path}")
exit(1)
inlines = inlinehashes.parse(content)
inlines = inlinehashes.parse(content, target)
out = build_output(inlines, args.alg, bool(args.full))
if args.output:
write_to_file(args.output, out)
else:
print(out)

View File

@ -29,6 +29,8 @@ class Inline:
"""
content: str
line: Optional[int] = None
position: Optional[int] = None
@cached_property
def short_content(self) -> str:
@ -53,7 +55,7 @@ class Inline:
return f"sha512-{h_b64}"
def __repr__(self) -> str:
return f"Inline(content='{self.content}')"
return f"Inline(content='{self.content}', line='{self.line}', postiion='{self.position}')"
def __str__(self) -> str:
return f"Inline(content='{self.short_content}...')"
@ -205,10 +207,10 @@ def parse(content: str, target: str = "all") -> List[Inline]:
for q in search_queries:
for tag in soup.find_all(q.search_function):
if q.attr_name:
inline = Inline(tag[q.attr_name])
inline = Inline(tag[q.attr_name], tag.sourceline, tag.sourcepos)
else:
if not tag.contents:
continue
inline = Inline(tag.contents[0])
inline = Inline(tag.contents[0], tag.sourceline, tag.sourcepos)
elements.append(inline)
return elements

80
poetry.lock generated
View File

@ -57,25 +57,6 @@ d = ["aiohttp (>=3.7.4)"]
jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"]
uvloop = ["uvloop (>=0.15.2)"]
[[package]]
name = "certifi"
version = "2021.10.8"
description = "Python package for providing Mozilla's CA Bundle."
category = "main"
optional = false
python-versions = "*"
[[package]]
name = "charset-normalizer"
version = "2.0.12"
description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
category = "main"
optional = false
python-versions = ">=3.5.0"
[package.extras]
unicode_backport = ["unicodedata2"]
[[package]]
name = "click"
version = "8.0.4"
@ -95,14 +76,6 @@ category = "dev"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
[[package]]
name = "idna"
version = "3.3"
description = "Internationalized Domain Names in Applications (IDNA)"
category = "main"
optional = false
python-versions = ">=3.5"
[[package]]
name = "more-itertools"
version = "8.12.0"
@ -220,24 +193,6 @@ wcwidth = "*"
checkqa-mypy = ["mypy (==v0.761)"]
testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "requests", "xmlschema"]
[[package]]
name = "requests"
version = "2.27.1"
description = "Python HTTP for Humans."
category = "main"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
[package.dependencies]
certifi = ">=2017.4.17"
charset-normalizer = {version = ">=2.0.0,<2.1.0", markers = "python_version >= \"3\""}
idna = {version = ">=2.5,<4", markers = "python_version >= \"3\""}
urllib3 = ">=1.21.1,<1.27"
[package.extras]
socks = ["PySocks (>=1.5.6,!=1.5.7)", "win-inet-pton"]
use_chardet_on_py3 = ["chardet (>=3.0.2,<5)"]
[[package]]
name = "soupsieve"
version = "2.3.1"
@ -262,19 +217,6 @@ category = "dev"
optional = false
python-versions = ">=3.6"
[[package]]
name = "urllib3"
version = "1.26.8"
description = "HTTP library with thread-safe connection pooling, file post, and more."
category = "main"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4"
[package.extras]
brotli = ["brotlipy (>=0.6.0)"]
secure = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "certifi", "ipaddress"]
socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
[[package]]
name = "wcwidth"
version = "0.2.5"
@ -286,7 +228,7 @@ python-versions = "*"
[metadata]
lock-version = "1.1"
python-versions = "^3.9"
content-hash = "5d3261ad45347af8277dd3042f42feb5beff0861143dee17cc632c823534a859"
content-hash = "4da6f6565f351fd70096e9f8bb34b6958cc7e223fb9d87f59eba607c7ec96264"
[metadata.files]
atomicwrites = [
@ -326,14 +268,6 @@ black = [
{file = "black-22.1.0-py3-none-any.whl", hash = "sha256:3524739d76b6b3ed1132422bf9d82123cd1705086723bc3e235ca39fd21c667d"},
{file = "black-22.1.0.tar.gz", hash = "sha256:a7c0192d35635f6fc1174be575cb7915e92e5dd629ee79fdaf0dcfa41a80afb5"},
]
certifi = [
{file = "certifi-2021.10.8-py2.py3-none-any.whl", hash = "sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569"},
{file = "certifi-2021.10.8.tar.gz", hash = "sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872"},
]
charset-normalizer = [
{file = "charset-normalizer-2.0.12.tar.gz", hash = "sha256:2857e29ff0d34db842cd7ca3230549d1a697f96ee6d3fb071cfa6c7393832597"},
{file = "charset_normalizer-2.0.12-py3-none-any.whl", hash = "sha256:6881edbebdb17b39b4eaaa821b438bf6eddffb4468cf344f09f89def34a8b1df"},
]
click = [
{file = "click-8.0.4-py3-none-any.whl", hash = "sha256:6a7a62563bbfabfda3a38f3023a1db4a35978c0abd76f6c9605ecd6554d6d9b1"},
{file = "click-8.0.4.tar.gz", hash = "sha256:8458d7b1287c5fb128c90e23381cf99dcde74beaf6c7ff6384ce84d6fe090adb"},
@ -342,10 +276,6 @@ colorama = [
{file = "colorama-0.4.4-py2.py3-none-any.whl", hash = "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"},
{file = "colorama-0.4.4.tar.gz", hash = "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b"},
]
idna = [
{file = "idna-3.3-py3-none-any.whl", hash = "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff"},
{file = "idna-3.3.tar.gz", hash = "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d"},
]
more-itertools = [
{file = "more-itertools-8.12.0.tar.gz", hash = "sha256:7dc6ad46f05f545f900dd59e8dfb4e84a4827b97b3cfecb175ea0c7d247f6064"},
{file = "more_itertools-8.12.0-py3-none-any.whl", hash = "sha256:43e6dd9942dffd72661a2c4ef383ad7da1e6a3e968a927ad7a6083ab410a688b"},
@ -407,10 +337,6 @@ pytest = [
{file = "pytest-5.4.3-py3-none-any.whl", hash = "sha256:5c0db86b698e8f170ba4582a492248919255fcd4c79b1ee64ace34301fb589a1"},
{file = "pytest-5.4.3.tar.gz", hash = "sha256:7979331bfcba207414f5e1263b5a0f8f521d0f457318836a7355531ed1a4c7d8"},
]
requests = [
{file = "requests-2.27.1-py2.py3-none-any.whl", hash = "sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d"},
{file = "requests-2.27.1.tar.gz", hash = "sha256:68d7c56fd5a8999887728ef304a6d12edc7be74f1cfa47714fc8b414525c9a61"},
]
soupsieve = [
{file = "soupsieve-2.3.1-py3-none-any.whl", hash = "sha256:1a3cca2617c6b38c0343ed661b1fa5de5637f257d4fe22bd9f1338010a1efefb"},
{file = "soupsieve-2.3.1.tar.gz", hash = "sha256:b8d49b1cd4f037c7082a9683dfa1801aa2597fb11c3a1155b7a5b94829b4f1f9"},
@ -423,10 +349,6 @@ typing-extensions = [
{file = "typing_extensions-4.1.1-py3-none-any.whl", hash = "sha256:21c85e0fe4b9a155d0799430b0ad741cdce7e359660ccbd8b530613e8df88ce2"},
{file = "typing_extensions-4.1.1.tar.gz", hash = "sha256:1a9462dcc3347a79b1f1c0271fbe79e844580bb598bafa1ed208b94da3cdcd42"},
]
urllib3 = [
{file = "urllib3-1.26.8-py2.py3-none-any.whl", hash = "sha256:000ca7f471a233c2251c6c7023ee85305721bfdf18621ebff4fd17a8653427ed"},
{file = "urllib3-1.26.8.tar.gz", hash = "sha256:0e7c33d9a63e7ddfcb86780aac87befc2fbddf46c58dbb487e0855f7ceec283c"},
]
wcwidth = [
{file = "wcwidth-0.2.5-py2.py3-none-any.whl", hash = "sha256:beb4802a9cebb9144e99086eff703a642a13d6a0052920003a230f3294bbe784"},
{file = "wcwidth-0.2.5.tar.gz", hash = "sha256:c4d647b99872929fdb7bdcaa4fbe7f01413ed3d98077df798530e5b04f116c83"},

View File

@ -1,6 +1,6 @@
[tool.poetry]
name = "inlinehashes"
version = "0.0.3"
version = "0.0.4"
description = "Hash generator for HTML inline styles and scripts"
authors = ["Gonçalo Valério <gon@ovalerio.net>"]
homepage = "https://github.com/dethos/inlinehashes"
@ -22,7 +22,6 @@ classifiers = [
[tool.poetry.dependencies]
python = "^3.9"
beautifulsoup4 = "^4.10.0"
requests = "^2.27.1"
[tool.poetry.dev-dependencies]
pytest = "^5.2"

View File

@ -117,6 +117,8 @@ class TestParse:
inlines = parse(doc)
assert len(inlines) == 1
assert inlines[0].content == 'alert("hash this");'
assert inlines[0].line == 5
assert inlines[0].position == 8
def test_parse_detects_style_tags(self):
doc = """
@ -131,6 +133,8 @@ class TestParse:
inlines = parse(doc)
assert len(inlines) == 1
assert inlines[0].content == ".someclass { background:#142a3f; }"
assert inlines[0].line == 5
assert inlines[0].position == 10
def test_parse_detects_style_attributes(self):
doc = """
@ -144,6 +148,8 @@ class TestParse:
inlines = parse(doc)
assert len(inlines) == 1
assert inlines[0].content == "text-color: #000;"
assert inlines[0].line == 6
assert inlines[0].position == 8
@pytest.mark.parametrize("attr", _EVENT_HANDLER_ATTRS)
def test_parse_detect_attributes_with_js(self, attr):
@ -160,7 +166,54 @@ class TestParse:
inlines = parse(doc)
assert len(inlines) == 1
assert inlines[0].content == "alert(1);"
assert inlines[0].line == 6
assert inlines[0].position == 8
def test_parse_both_targets(self):
doc = """
<html>
<head>
<title>Some title</title>
<style>.someclass { background:#142a3f; }</style>
</head>
<body onclick="alert(1);">Some body</body>
</html>
"""
inlines = parse(doc)
assert len(inlines) == 2
def test_parse_only_script_targets(self):
doc = """
<html>
<head>
<title>Some title</title>
<style>.someclass { background:#142a3f; }</style>
</head>
<body onclick="alert(1);">Some body</body>
</html>
"""
inlines = parse(doc, "scripts")
assert len(inlines) == 1
assert inlines[0].content == "alert(1);"
assert inlines[0].line == 7
assert inlines[0].position == 8
def test_parse_only_style_targets(self):
doc = """
<html>
<head>
<title>Some title</title>
<style>.someclass { background:#142a3f; }</style>
</head>
<body onclick="alert(1);">Some body</body>
</html>
"""
inlines = parse(doc, "styles")
assert len(inlines) == 1
assert inlines[0].content == ".someclass { background:#142a3f; }"
assert inlines[0].line == 5
assert inlines[0].position == 10
def test_version():
assert __version__ == "0.0.3"
assert __version__ == "0.0.4"