mirror of https://github.com/dethos/inlinehashes
calculate hashes of js and css code in element attributes
This commit is contained in:
parent
fa40e7323d
commit
bba3e6024b
|
@ -4,23 +4,20 @@ This small module helps you to parse HTML documents and extract all the inline
|
||||||
content that must be specifically allowed in the Content-Security-Policy in
|
content that must be specifically allowed in the Content-Security-Policy in
|
||||||
order to work (assuming "unsafe-inline" is not present).
|
order to work (assuming "unsafe-inline" is not present).
|
||||||
"""
|
"""
|
||||||
from typing import List
|
from typing import List, Callable, Optional
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from functools import cached_property
|
from functools import cached_property, partial
|
||||||
from itertools import chain
|
from itertools import chain
|
||||||
import hashlib
|
import hashlib
|
||||||
import base64
|
import base64
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup, Tag
|
||||||
|
|
||||||
_VALID_TARGETS = {
|
|
||||||
"scripts": [
|
@dataclass(frozen=True)
|
||||||
{"name": "script"},
|
class SearchQuery:
|
||||||
],
|
search_function: Callable
|
||||||
"styles": [
|
attr_name: Optional[str]
|
||||||
{"name": "style"},
|
|
||||||
],
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
|
@ -62,6 +59,137 @@ class Inline:
|
||||||
return f"Inline(content='{self.short_content}...')"
|
return f"Inline(content='{self.short_content}...')"
|
||||||
|
|
||||||
|
|
||||||
|
def matches_attribute(tag: Tag, attribute_name: str) -> bool:
|
||||||
|
return tag.has_attr(attribute_name)
|
||||||
|
|
||||||
|
|
||||||
|
def matches_name(tag: Tag, name: str) -> bool:
|
||||||
|
return tag.name == name
|
||||||
|
|
||||||
|
|
||||||
|
_EVENT_HANDLER_ATTRS = [
|
||||||
|
"onafterprint",
|
||||||
|
"onafterscriptexecute",
|
||||||
|
"onanimationcancel",
|
||||||
|
"onanimationend",
|
||||||
|
"onanimationiteration",
|
||||||
|
"onanimationstart",
|
||||||
|
"onauxclick",
|
||||||
|
"onbeforecopy",
|
||||||
|
"onbeforecut",
|
||||||
|
"onbeforeprint",
|
||||||
|
"onbeforescriptexecute",
|
||||||
|
"onbeforeunload",
|
||||||
|
"onbegin",
|
||||||
|
"onblur",
|
||||||
|
"onbounce",
|
||||||
|
"oncanplay",
|
||||||
|
"oncanplaythrough",
|
||||||
|
"onchange",
|
||||||
|
"onclick",
|
||||||
|
"onclose",
|
||||||
|
"oncontextmenu",
|
||||||
|
"oncopy",
|
||||||
|
"oncuechange",
|
||||||
|
"oncut",
|
||||||
|
"ondblclick",
|
||||||
|
"ondrag",
|
||||||
|
"ondragend",
|
||||||
|
"ondragenter",
|
||||||
|
"ondragleave",
|
||||||
|
"ondragover",
|
||||||
|
"ondragstart",
|
||||||
|
"ondrop",
|
||||||
|
"ondurationchange",
|
||||||
|
"onend",
|
||||||
|
"onended",
|
||||||
|
"onerror",
|
||||||
|
"onfocusin",
|
||||||
|
"onfocusout",
|
||||||
|
"onfullscreenchange",
|
||||||
|
"onhashchange",
|
||||||
|
"oninput",
|
||||||
|
"oninvalid",
|
||||||
|
"onkeydown",
|
||||||
|
"onkeypress",
|
||||||
|
"onkeyup",
|
||||||
|
"onload",
|
||||||
|
"onloadeddata",
|
||||||
|
"onloadedmetadata",
|
||||||
|
"onloadend",
|
||||||
|
"onloadstart",
|
||||||
|
"onmessage",
|
||||||
|
"onmousedown",
|
||||||
|
"onmouseenter",
|
||||||
|
"onmouseleave",
|
||||||
|
"onmousemove",
|
||||||
|
"onmouseout",
|
||||||
|
"onmouseover",
|
||||||
|
"onmouseup",
|
||||||
|
"onmousewheel",
|
||||||
|
"onmozfullscreenchange",
|
||||||
|
"onpagehide",
|
||||||
|
"onpageshow",
|
||||||
|
"onpaste",
|
||||||
|
"onpause",
|
||||||
|
"onplay",
|
||||||
|
"onplaying",
|
||||||
|
"onpointerdown",
|
||||||
|
"onpointerenter",
|
||||||
|
"onpointerleave",
|
||||||
|
"onpointermove",
|
||||||
|
"onpointerout",
|
||||||
|
"onpointerover",
|
||||||
|
"onpointerrawupdate",
|
||||||
|
"onpointerup",
|
||||||
|
"onpopstate",
|
||||||
|
"onprogress",
|
||||||
|
"onrepeat",
|
||||||
|
"onreset",
|
||||||
|
"onresize",
|
||||||
|
"onscroll",
|
||||||
|
"onsearch",
|
||||||
|
"onseeked",
|
||||||
|
"onseeking",
|
||||||
|
"onselect",
|
||||||
|
"onselectionchange",
|
||||||
|
"onselectstart",
|
||||||
|
"onshow",
|
||||||
|
"onstart",
|
||||||
|
"onsubmit",
|
||||||
|
"ontoggle",
|
||||||
|
"ontouchend",
|
||||||
|
"ontouchmove",
|
||||||
|
"ontouchstart",
|
||||||
|
"ontransitioncancel",
|
||||||
|
"ontransitionend",
|
||||||
|
"ontransitionrun",
|
||||||
|
"ontransitionstart",
|
||||||
|
"onunhandledrejection",
|
||||||
|
"onunload",
|
||||||
|
"onvolumechange",
|
||||||
|
"onwebkitanimationend",
|
||||||
|
"onwebkitanimationiteration",
|
||||||
|
"onwebkitanimationstart",
|
||||||
|
"onwebkittransitionend",
|
||||||
|
"onwheel",
|
||||||
|
]
|
||||||
|
|
||||||
|
_VALID_TARGETS = {
|
||||||
|
"scripts": [
|
||||||
|
SearchQuery(partial(matches_name, name="script"), None),
|
||||||
|
*[
|
||||||
|
SearchQuery(partial(matches_attribute, attribute_name=attr), attr)
|
||||||
|
for attr in _EVENT_HANDLER_ATTRS
|
||||||
|
],
|
||||||
|
],
|
||||||
|
"styles": [
|
||||||
|
SearchQuery(partial(matches_name, name="style"), None),
|
||||||
|
SearchQuery(partial(matches_attribute, attribute_name="style"), "style"),
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def parse(content: str, target: str = "all") -> List[Inline]:
|
def parse(content: str, target: str = "all") -> List[Inline]:
|
||||||
"""Parses an HTML document and extracts."""
|
"""Parses an HTML document and extracts."""
|
||||||
soup = BeautifulSoup(content, "html.parser")
|
soup = BeautifulSoup(content, "html.parser")
|
||||||
|
@ -75,6 +203,12 @@ def parse(content: str, target: str = "all") -> List[Inline]:
|
||||||
|
|
||||||
elements = []
|
elements = []
|
||||||
for q in search_queries:
|
for q in search_queries:
|
||||||
elements += soup.find_all(**q)
|
for tag in soup.find_all(q.search_function):
|
||||||
|
if q.attr_name:
|
||||||
return [Inline(e.contents[0]) for e in elements if e.contents]
|
inline = Inline(tag[q.attr_name])
|
||||||
|
else:
|
||||||
|
if not tag.contents:
|
||||||
|
continue
|
||||||
|
inline = Inline(tag.contents[0])
|
||||||
|
elements.append(inline)
|
||||||
|
return elements
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
from inlinehashes import __version__
|
from inlinehashes import __version__, parse
|
||||||
from inlinehashes.lib import Inline
|
from inlinehashes.lib import Inline, _EVENT_HANDLER_ATTRS
|
||||||
|
|
||||||
|
|
||||||
class TestInline:
|
class TestInline:
|
||||||
|
@ -105,22 +105,62 @@ class TestInline:
|
||||||
|
|
||||||
|
|
||||||
class TestParse:
|
class TestParse:
|
||||||
@pytest.mark.skip(reason="Add later")
|
|
||||||
def test_parse_detects_script_tags(self):
|
def test_parse_detects_script_tags(self):
|
||||||
pass
|
doc = """
|
||||||
|
<html>
|
||||||
|
<head><title>Some title</title></head>
|
||||||
|
<body>Some body
|
||||||
|
<script>alert("hash this");</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
|
inlines = parse(doc)
|
||||||
|
assert len(inlines) == 1
|
||||||
|
assert inlines[0].content == 'alert("hash this");'
|
||||||
|
|
||||||
@pytest.mark.skip(reason="Add later")
|
|
||||||
def test_parse_detects_style_tags(self):
|
def test_parse_detects_style_tags(self):
|
||||||
pass
|
doc = """
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>Some title</title>
|
||||||
|
<style>.someclass { background:#142a3f; }</style>
|
||||||
|
</head>
|
||||||
|
<body>Some body</body>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
|
inlines = parse(doc)
|
||||||
|
assert len(inlines) == 1
|
||||||
|
assert inlines[0].content == ".someclass { background:#142a3f; }"
|
||||||
|
|
||||||
@pytest.mark.skip(reason="Not Implemented yet")
|
|
||||||
def test_parse_detects_style_attributes(self):
|
def test_parse_detects_style_attributes(self):
|
||||||
pass
|
doc = """
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>Some title</title>
|
||||||
|
</head>
|
||||||
|
<body style="text-color: #000;">Some body</body>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
|
inlines = parse(doc)
|
||||||
|
assert len(inlines) == 1
|
||||||
|
assert inlines[0].content == "text-color: #000;"
|
||||||
|
|
||||||
@pytest.mark.skip(reason="Not Implemented yet")
|
@pytest.mark.parametrize("attr", _EVENT_HANDLER_ATTRS)
|
||||||
def test_parse_detect_attributes_with_js(self):
|
def test_parse_detect_attributes_with_js(self, attr):
|
||||||
pass
|
# Just to test they are detected even though some of them are
|
||||||
|
# not valid for all elements
|
||||||
|
doc = f"""
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>Some title</title>
|
||||||
|
</head>
|
||||||
|
<body {attr}="alert(1);">Some body</body>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
|
inlines = parse(doc)
|
||||||
|
assert len(inlines) == 1
|
||||||
|
assert inlines[0].content == "alert(1);"
|
||||||
|
|
||||||
|
|
||||||
def test_version():
|
def test_version():
|
||||||
assert __version__ == "0.0.1"
|
assert __version__ == "0.0.2"
|
||||||
|
|
Loading…
Reference in New Issue