Add corresponding directive to each inline

This commit is contained in:
Gonçalo Valério 2023-01-25 18:56:11 +00:00
parent 65889f83a3
commit 9bf236b60d
Signed by: dethos
GPG Key ID: DF557F2BDCC2445E
5 changed files with 34 additions and 18 deletions

View File

@ -65,6 +65,7 @@ Here is an example of the output:
{ {
"content": "\n html {\n height: 100%;\n }\n ", "content": "\n html {\n height: 100%;\n }\n ",
"hash": "sha384-Ku20lQH5qbr4EDPzXD2rf25rEHJNswNYRUNMPjYl7jCe0eHJYDe0gFdQpnKkFUTv", "hash": "sha384-Ku20lQH5qbr4EDPzXD2rf25rEHJNswNYRUNMPjYl7jCe0eHJYDe0gFdQpnKkFUTv",
"directive": "style-src",
"line": 12, "line": 12,
"position": 0 "position": 0
} }
@ -83,11 +84,7 @@ Here is the same example, but using python's shell:
>>> content = requests.get("https://ovalerio.net").text >>> content = requests.get("https://ovalerio.net").text
>>> inlines = inlinehashes.parse(content) >>> inlines = inlinehashes.parse(content)
>>> inlines >>> inlines
[Inline(content=' [Inline(line='17', position='0')]
html {
height: 100%;
}
...')]
>>> first = inlines[0] >>> first = inlines[0]
>>> first.short_content >>> first.short_content
'\n html {\n height: 100%;\n }\n ' '\n html {\n height: 100%;\n }\n '

View File

@ -22,6 +22,7 @@ def build_output(
{ {
"content": getattr(i, snippet), "content": getattr(i, snippet),
"hash": getattr(i, alg), "hash": getattr(i, alg),
"directive": i.directive,
"line": i.line, "line": i.line,
"position": i.position, "position": i.position,
} }
@ -52,7 +53,7 @@ def run_cli() -> None:
"--target", "--target",
help="Target inline content to look for", help="Target inline content to look for",
default="all", default="all",
choices=["all", "scripts", "styles"], choices=["all", "script-src", "style-src"],
) )
args = parser.parse_args() args = parser.parse_args()
path = args.source path = args.source

View File

@ -18,6 +18,7 @@ from bs4 import BeautifulSoup, Tag # type: ignore
class SearchQuery: class SearchQuery:
search_function: Callable search_function: Callable
attr_name: Optional[str] attr_name: Optional[str]
directive: str
@dataclass(frozen=True) @dataclass(frozen=True)
@ -29,6 +30,7 @@ class Inline:
""" """
content: str content: str
directive: Optional[str] = None
line: Optional[int] = None line: Optional[int] = None
position: Optional[int] = None position: Optional[int] = None
@ -58,7 +60,7 @@ class Inline:
return f"Inline(line='{self.line}', position='{self.position}')" return f"Inline(line='{self.line}', position='{self.position}')"
def __str__(self) -> str: def __str__(self) -> str:
return f"Inline(content='{self.short_content}...')" return self.content
def matches_attribute(tag: Tag, attribute_name: str) -> bool: def matches_attribute(tag: Tag, attribute_name: str) -> bool:
@ -178,16 +180,20 @@ _EVENT_HANDLER_ATTRS = [
] ]
_VALID_TARGETS = { _VALID_TARGETS = {
"scripts": [ "script-src": [
SearchQuery(partial(matches_name, name="script"), None), SearchQuery(partial(matches_name, name="script"), None, "script-src"),
*[ *[
SearchQuery(partial(matches_attribute, attribute_name=attr), attr) SearchQuery(
partial(matches_attribute, attribute_name=attr), attr, "script-src"
)
for attr in _EVENT_HANDLER_ATTRS for attr in _EVENT_HANDLER_ATTRS
], ],
], ],
"styles": [ "style-src": [
SearchQuery(partial(matches_name, name="style"), None), SearchQuery(partial(matches_name, name="style"), None, "style-src"),
SearchQuery(partial(matches_attribute, attribute_name="style"), "style"), SearchQuery(
partial(matches_attribute, attribute_name="style"), "style", "style-src"
),
], ],
} }
@ -207,10 +213,20 @@ def parse(content: str, target: str = "all") -> List[Inline]:
for q in search_queries: for q in search_queries:
for tag in soup.find_all(q.search_function): for tag in soup.find_all(q.search_function):
if q.attr_name: if q.attr_name:
inline = Inline(tag[q.attr_name], tag.sourceline, tag.sourcepos) inline = Inline(
tag[q.attr_name],
q.directive,
tag.sourceline,
tag.sourcepos,
)
else: else:
if not tag.contents: if not tag.contents:
continue continue
inline = Inline(tag.contents[0], tag.sourceline, tag.sourcepos) inline = Inline(
tag.contents[0],
q.directive,
tag.sourceline,
tag.sourcepos,
)
elements.append(inline) elements.append(inline)
return elements return elements

2
poetry.lock generated
View File

@ -304,4 +304,4 @@ files = [
[metadata] [metadata]
lock-version = "2.0" lock-version = "2.0"
python-versions = "^3.11" python-versions = "^3.11"
content-hash = "0df57fe5eed6559a07fde2ce7470a34e0b6e0e0c897a1a848daa92287217037f" content-hash = "81f9d3306e76f9f7c39291bd7972216d852f39aba794b43afe2b9a0d4a7a2829"

View File

@ -191,11 +191,12 @@ class TestParse:
<body onclick="alert(1);">Some body</body> <body onclick="alert(1);">Some body</body>
</html> </html>
""" """
inlines = parse(doc, "scripts") inlines = parse(doc, "script-src")
assert len(inlines) == 1 assert len(inlines) == 1
assert inlines[0].content == "alert(1);" assert inlines[0].content == "alert(1);"
assert inlines[0].line == 7 assert inlines[0].line == 7
assert inlines[0].position == 8 assert inlines[0].position == 8
assert inlines[0].directive == "script-src"
def test_parse_only_style_targets(self): def test_parse_only_style_targets(self):
doc = """ doc = """
@ -207,11 +208,12 @@ class TestParse:
<body onclick="alert(1);">Some body</body> <body onclick="alert(1);">Some body</body>
</html> </html>
""" """
inlines = parse(doc, "styles") inlines = parse(doc, "style-src")
assert len(inlines) == 1 assert len(inlines) == 1
assert inlines[0].content == ".someclass { background:#142a3f; }" assert inlines[0].content == ".someclass { background:#142a3f; }"
assert inlines[0].line == 5 assert inlines[0].line == 5
assert inlines[0].position == 10 assert inlines[0].position == 10
assert inlines[0].directive == "style-src"
def test_version(): def test_version():