From 37d0217708249db61102922a0b073f9f8079df4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gon=C3=A7alo?= Date: Tue, 29 Mar 2022 21:39:03 +0100 Subject: [PATCH] Add line numbers and removed unneeded dependency --- inlinehashes/__init__.py | 2 +- inlinehashes/app.py | 54 +++++++++++++------------ inlinehashes/lib.py | 8 ++-- poetry.lock | 80 +------------------------------------- pyproject.toml | 3 +- tests/test_inlinehashes.py | 55 +++++++++++++++++++++++++- 6 files changed, 92 insertions(+), 110 deletions(-) diff --git a/inlinehashes/__init__.py b/inlinehashes/__init__.py index 246df41..8f86ddf 100644 --- a/inlinehashes/__init__.py +++ b/inlinehashes/__init__.py @@ -1,4 +1,4 @@ from .lib import parse -__version__ = "0.0.3" +__version__ = "0.0.4" __all__ = ["parse"] diff --git a/inlinehashes/app.py b/inlinehashes/app.py index 2caf378..d7716fd 100644 --- a/inlinehashes/app.py +++ b/inlinehashes/app.py @@ -5,7 +5,8 @@ this library. It makes use of the same tools available for those that install the package in order to be used programatically. """ from typing import List -import requests +from urllib.request import urlopen, Request +from urllib.error import URLError import argparse import inlinehashes import json @@ -16,20 +17,18 @@ def build_output( ) -> str: """Build a JSON output from a list of Inline objects.""" snippet = "content" if full else "short_content" - out = [{"content": getattr(i, snippet), "hash": getattr(i, alg)} for i in inlines] + out = [ + { + "content": getattr(i, snippet), + "hash": getattr(i, alg), + "line": i.line, + "position": i.position, + } + for i in inlines + ] return json.dumps(out, indent=2) -def write_to_file(path: str, content: str) -> None: - """Writes the content to the specified file. - - raises: - OSError: More than one subclass of OSError - """ - with open(path, "w") as f: - f.write(content) - - def run_cli() -> None: """Entry point of the command line interface.""" parser = argparse.ArgumentParser() @@ -47,29 +46,36 @@ def run_cli() -> None: help="Include full content in the output", action="store_true", ) - parser.add_argument("-o", "--output", help="Store output in a file.") + parser.add_argument( + "-t", + "--target", + help="Target inline content to look for", + default="all", + choices=["all", "scripts", "styles"], + ) args = parser.parse_args() path = args.source + target = args.target try: if path.startswith("http://") or path.startswith("https://"): - response = requests.get(path) - response.raise_for_status() - content = response.text + req = Request( + path, + headers={"User-Agent": f"Inlinehashes[{inlinehashes.__version__}]"}, + ) + with urlopen(req) as response: + content = response.read() else: with open(path, "r") as f: content = f.read() - except (requests.RequestException, OSError): - print(f"Invalid source: {path}") + except (URLError, OSError) as error: + print(error) + print(f"Failed to get source: {path}") exit(1) - inlines = inlinehashes.parse(content) + inlines = inlinehashes.parse(content, target) out = build_output(inlines, args.alg, bool(args.full)) - - if args.output: - write_to_file(args.output, out) - else: - print(out) + print(out) if __name__ == "__main__": diff --git a/inlinehashes/lib.py b/inlinehashes/lib.py index 24bf4b2..361e812 100644 --- a/inlinehashes/lib.py +++ b/inlinehashes/lib.py @@ -29,6 +29,8 @@ class Inline: """ content: str + line: Optional[int] = None + position: Optional[int] = None @cached_property def short_content(self) -> str: @@ -53,7 +55,7 @@ class Inline: return f"sha512-{h_b64}" def __repr__(self) -> str: - return f"Inline(content='{self.content}')" + return f"Inline(content='{self.content}', line='{self.line}', postiion='{self.position}')" def __str__(self) -> str: return f"Inline(content='{self.short_content}...')" @@ -205,10 +207,10 @@ def parse(content: str, target: str = "all") -> List[Inline]: for q in search_queries: for tag in soup.find_all(q.search_function): if q.attr_name: - inline = Inline(tag[q.attr_name]) + inline = Inline(tag[q.attr_name], tag.sourceline, tag.sourcepos) else: if not tag.contents: continue - inline = Inline(tag.contents[0]) + inline = Inline(tag.contents[0], tag.sourceline, tag.sourcepos) elements.append(inline) return elements diff --git a/poetry.lock b/poetry.lock index 7bb13ca..729a28b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -57,25 +57,6 @@ d = ["aiohttp (>=3.7.4)"] jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] uvloop = ["uvloop (>=0.15.2)"] -[[package]] -name = "certifi" -version = "2021.10.8" -description = "Python package for providing Mozilla's CA Bundle." -category = "main" -optional = false -python-versions = "*" - -[[package]] -name = "charset-normalizer" -version = "2.0.12" -description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." -category = "main" -optional = false -python-versions = ">=3.5.0" - -[package.extras] -unicode_backport = ["unicodedata2"] - [[package]] name = "click" version = "8.0.4" @@ -95,14 +76,6 @@ category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" -[[package]] -name = "idna" -version = "3.3" -description = "Internationalized Domain Names in Applications (IDNA)" -category = "main" -optional = false -python-versions = ">=3.5" - [[package]] name = "more-itertools" version = "8.12.0" @@ -220,24 +193,6 @@ wcwidth = "*" checkqa-mypy = ["mypy (==v0.761)"] testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "requests", "xmlschema"] -[[package]] -name = "requests" -version = "2.27.1" -description = "Python HTTP for Humans." -category = "main" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" - -[package.dependencies] -certifi = ">=2017.4.17" -charset-normalizer = {version = ">=2.0.0,<2.1.0", markers = "python_version >= \"3\""} -idna = {version = ">=2.5,<4", markers = "python_version >= \"3\""} -urllib3 = ">=1.21.1,<1.27" - -[package.extras] -socks = ["PySocks (>=1.5.6,!=1.5.7)", "win-inet-pton"] -use_chardet_on_py3 = ["chardet (>=3.0.2,<5)"] - [[package]] name = "soupsieve" version = "2.3.1" @@ -262,19 +217,6 @@ category = "dev" optional = false python-versions = ">=3.6" -[[package]] -name = "urllib3" -version = "1.26.8" -description = "HTTP library with thread-safe connection pooling, file post, and more." -category = "main" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4" - -[package.extras] -brotli = ["brotlipy (>=0.6.0)"] -secure = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "certifi", "ipaddress"] -socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] - [[package]] name = "wcwidth" version = "0.2.5" @@ -286,7 +228,7 @@ python-versions = "*" [metadata] lock-version = "1.1" python-versions = "^3.9" -content-hash = "5d3261ad45347af8277dd3042f42feb5beff0861143dee17cc632c823534a859" +content-hash = "4da6f6565f351fd70096e9f8bb34b6958cc7e223fb9d87f59eba607c7ec96264" [metadata.files] atomicwrites = [ @@ -326,14 +268,6 @@ black = [ {file = "black-22.1.0-py3-none-any.whl", hash = "sha256:3524739d76b6b3ed1132422bf9d82123cd1705086723bc3e235ca39fd21c667d"}, {file = "black-22.1.0.tar.gz", hash = "sha256:a7c0192d35635f6fc1174be575cb7915e92e5dd629ee79fdaf0dcfa41a80afb5"}, ] -certifi = [ - {file = "certifi-2021.10.8-py2.py3-none-any.whl", hash = "sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569"}, - {file = "certifi-2021.10.8.tar.gz", hash = "sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872"}, -] -charset-normalizer = [ - {file = "charset-normalizer-2.0.12.tar.gz", hash = "sha256:2857e29ff0d34db842cd7ca3230549d1a697f96ee6d3fb071cfa6c7393832597"}, - {file = "charset_normalizer-2.0.12-py3-none-any.whl", hash = "sha256:6881edbebdb17b39b4eaaa821b438bf6eddffb4468cf344f09f89def34a8b1df"}, -] click = [ {file = "click-8.0.4-py3-none-any.whl", hash = "sha256:6a7a62563bbfabfda3a38f3023a1db4a35978c0abd76f6c9605ecd6554d6d9b1"}, {file = "click-8.0.4.tar.gz", hash = "sha256:8458d7b1287c5fb128c90e23381cf99dcde74beaf6c7ff6384ce84d6fe090adb"}, @@ -342,10 +276,6 @@ colorama = [ {file = "colorama-0.4.4-py2.py3-none-any.whl", hash = "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"}, {file = "colorama-0.4.4.tar.gz", hash = "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b"}, ] -idna = [ - {file = "idna-3.3-py3-none-any.whl", hash = "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff"}, - {file = "idna-3.3.tar.gz", hash = "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d"}, -] more-itertools = [ {file = "more-itertools-8.12.0.tar.gz", hash = "sha256:7dc6ad46f05f545f900dd59e8dfb4e84a4827b97b3cfecb175ea0c7d247f6064"}, {file = "more_itertools-8.12.0-py3-none-any.whl", hash = "sha256:43e6dd9942dffd72661a2c4ef383ad7da1e6a3e968a927ad7a6083ab410a688b"}, @@ -407,10 +337,6 @@ pytest = [ {file = "pytest-5.4.3-py3-none-any.whl", hash = "sha256:5c0db86b698e8f170ba4582a492248919255fcd4c79b1ee64ace34301fb589a1"}, {file = "pytest-5.4.3.tar.gz", hash = "sha256:7979331bfcba207414f5e1263b5a0f8f521d0f457318836a7355531ed1a4c7d8"}, ] -requests = [ - {file = "requests-2.27.1-py2.py3-none-any.whl", hash = "sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d"}, - {file = "requests-2.27.1.tar.gz", hash = "sha256:68d7c56fd5a8999887728ef304a6d12edc7be74f1cfa47714fc8b414525c9a61"}, -] soupsieve = [ {file = "soupsieve-2.3.1-py3-none-any.whl", hash = "sha256:1a3cca2617c6b38c0343ed661b1fa5de5637f257d4fe22bd9f1338010a1efefb"}, {file = "soupsieve-2.3.1.tar.gz", hash = "sha256:b8d49b1cd4f037c7082a9683dfa1801aa2597fb11c3a1155b7a5b94829b4f1f9"}, @@ -423,10 +349,6 @@ typing-extensions = [ {file = "typing_extensions-4.1.1-py3-none-any.whl", hash = "sha256:21c85e0fe4b9a155d0799430b0ad741cdce7e359660ccbd8b530613e8df88ce2"}, {file = "typing_extensions-4.1.1.tar.gz", hash = "sha256:1a9462dcc3347a79b1f1c0271fbe79e844580bb598bafa1ed208b94da3cdcd42"}, ] -urllib3 = [ - {file = "urllib3-1.26.8-py2.py3-none-any.whl", hash = "sha256:000ca7f471a233c2251c6c7023ee85305721bfdf18621ebff4fd17a8653427ed"}, - {file = "urllib3-1.26.8.tar.gz", hash = "sha256:0e7c33d9a63e7ddfcb86780aac87befc2fbddf46c58dbb487e0855f7ceec283c"}, -] wcwidth = [ {file = "wcwidth-0.2.5-py2.py3-none-any.whl", hash = "sha256:beb4802a9cebb9144e99086eff703a642a13d6a0052920003a230f3294bbe784"}, {file = "wcwidth-0.2.5.tar.gz", hash = "sha256:c4d647b99872929fdb7bdcaa4fbe7f01413ed3d98077df798530e5b04f116c83"}, diff --git a/pyproject.toml b/pyproject.toml index d55a089..e597aa2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "inlinehashes" -version = "0.0.3" +version = "0.0.4" description = "Hash generator for HTML inline styles and scripts" authors = ["Gonçalo Valério "] homepage = "https://github.com/dethos/inlinehashes" @@ -22,7 +22,6 @@ classifiers = [ [tool.poetry.dependencies] python = "^3.9" beautifulsoup4 = "^4.10.0" -requests = "^2.27.1" [tool.poetry.dev-dependencies] pytest = "^5.2" diff --git a/tests/test_inlinehashes.py b/tests/test_inlinehashes.py index 43ae391..e2b076f 100644 --- a/tests/test_inlinehashes.py +++ b/tests/test_inlinehashes.py @@ -117,6 +117,8 @@ class TestParse: inlines = parse(doc) assert len(inlines) == 1 assert inlines[0].content == 'alert("hash this");' + assert inlines[0].line == 5 + assert inlines[0].position == 8 def test_parse_detects_style_tags(self): doc = """ @@ -131,6 +133,8 @@ class TestParse: inlines = parse(doc) assert len(inlines) == 1 assert inlines[0].content == ".someclass { background:#142a3f; }" + assert inlines[0].line == 5 + assert inlines[0].position == 10 def test_parse_detects_style_attributes(self): doc = """ @@ -144,6 +148,8 @@ class TestParse: inlines = parse(doc) assert len(inlines) == 1 assert inlines[0].content == "text-color: #000;" + assert inlines[0].line == 6 + assert inlines[0].position == 8 @pytest.mark.parametrize("attr", _EVENT_HANDLER_ATTRS) def test_parse_detect_attributes_with_js(self, attr): @@ -160,7 +166,54 @@ class TestParse: inlines = parse(doc) assert len(inlines) == 1 assert inlines[0].content == "alert(1);" + assert inlines[0].line == 6 + assert inlines[0].position == 8 + + def test_parse_both_targets(self): + doc = """ + + + Some title + + + Some body + + """ + inlines = parse(doc) + assert len(inlines) == 2 + + def test_parse_only_script_targets(self): + doc = """ + + + Some title + + + Some body + + """ + inlines = parse(doc, "scripts") + assert len(inlines) == 1 + assert inlines[0].content == "alert(1);" + assert inlines[0].line == 7 + assert inlines[0].position == 8 + + def test_parse_only_style_targets(self): + doc = """ + + + Some title + + + Some body + + """ + inlines = parse(doc, "styles") + assert len(inlines) == 1 + assert inlines[0].content == ".someclass { background:#142a3f; }" + assert inlines[0].line == 5 + assert inlines[0].position == 10 def test_version(): - assert __version__ == "0.0.3" + assert __version__ == "0.0.4"