mirror of https://github.com/dethos/inlinehashes
calculate hashes of js and css code in element attributes
parent
fa40e7323d
commit
bba3e6024b
|
@ -4,23 +4,20 @@ This small module helps you to parse HTML documents and extract all the inline
|
|||
content that must be specifically allowed in the Content-Security-Policy in
|
||||
order to work (assuming "unsafe-inline" is not present).
|
||||
"""
|
||||
from typing import List
|
||||
from typing import List, Callable, Optional
|
||||
from dataclasses import dataclass
|
||||
from functools import cached_property
|
||||
from functools import cached_property, partial
|
||||
from itertools import chain
|
||||
import hashlib
|
||||
import base64
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from bs4 import BeautifulSoup, Tag
|
||||
|
||||
_VALID_TARGETS = {
|
||||
"scripts": [
|
||||
{"name": "script"},
|
||||
],
|
||||
"styles": [
|
||||
{"name": "style"},
|
||||
],
|
||||
}
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SearchQuery:
|
||||
search_function: Callable
|
||||
attr_name: Optional[str]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
|
@ -62,6 +59,137 @@ class Inline:
|
|||
return f"Inline(content='{self.short_content}...')"
|
||||
|
||||
|
||||
def matches_attribute(tag: Tag, attribute_name: str) -> bool:
|
||||
return tag.has_attr(attribute_name)
|
||||
|
||||
|
||||
def matches_name(tag: Tag, name: str) -> bool:
|
||||
return tag.name == name
|
||||
|
||||
|
||||
_EVENT_HANDLER_ATTRS = [
|
||||
"onafterprint",
|
||||
"onafterscriptexecute",
|
||||
"onanimationcancel",
|
||||
"onanimationend",
|
||||
"onanimationiteration",
|
||||
"onanimationstart",
|
||||
"onauxclick",
|
||||
"onbeforecopy",
|
||||
"onbeforecut",
|
||||
"onbeforeprint",
|
||||
"onbeforescriptexecute",
|
||||
"onbeforeunload",
|
||||
"onbegin",
|
||||
"onblur",
|
||||
"onbounce",
|
||||
"oncanplay",
|
||||
"oncanplaythrough",
|
||||
"onchange",
|
||||
"onclick",
|
||||
"onclose",
|
||||
"oncontextmenu",
|
||||
"oncopy",
|
||||
"oncuechange",
|
||||
"oncut",
|
||||
"ondblclick",
|
||||
"ondrag",
|
||||
"ondragend",
|
||||
"ondragenter",
|
||||
"ondragleave",
|
||||
"ondragover",
|
||||
"ondragstart",
|
||||
"ondrop",
|
||||
"ondurationchange",
|
||||
"onend",
|
||||
"onended",
|
||||
"onerror",
|
||||
"onfocusin",
|
||||
"onfocusout",
|
||||
"onfullscreenchange",
|
||||
"onhashchange",
|
||||
"oninput",
|
||||
"oninvalid",
|
||||
"onkeydown",
|
||||
"onkeypress",
|
||||
"onkeyup",
|
||||
"onload",
|
||||
"onloadeddata",
|
||||
"onloadedmetadata",
|
||||
"onloadend",
|
||||
"onloadstart",
|
||||
"onmessage",
|
||||
"onmousedown",
|
||||
"onmouseenter",
|
||||
"onmouseleave",
|
||||
"onmousemove",
|
||||
"onmouseout",
|
||||
"onmouseover",
|
||||
"onmouseup",
|
||||
"onmousewheel",
|
||||
"onmozfullscreenchange",
|
||||
"onpagehide",
|
||||
"onpageshow",
|
||||
"onpaste",
|
||||
"onpause",
|
||||
"onplay",
|
||||
"onplaying",
|
||||
"onpointerdown",
|
||||
"onpointerenter",
|
||||
"onpointerleave",
|
||||
"onpointermove",
|
||||
"onpointerout",
|
||||
"onpointerover",
|
||||
"onpointerrawupdate",
|
||||
"onpointerup",
|
||||
"onpopstate",
|
||||
"onprogress",
|
||||
"onrepeat",
|
||||
"onreset",
|
||||
"onresize",
|
||||
"onscroll",
|
||||
"onsearch",
|
||||
"onseeked",
|
||||
"onseeking",
|
||||
"onselect",
|
||||
"onselectionchange",
|
||||
"onselectstart",
|
||||
"onshow",
|
||||
"onstart",
|
||||
"onsubmit",
|
||||
"ontoggle",
|
||||
"ontouchend",
|
||||
"ontouchmove",
|
||||
"ontouchstart",
|
||||
"ontransitioncancel",
|
||||
"ontransitionend",
|
||||
"ontransitionrun",
|
||||
"ontransitionstart",
|
||||
"onunhandledrejection",
|
||||
"onunload",
|
||||
"onvolumechange",
|
||||
"onwebkitanimationend",
|
||||
"onwebkitanimationiteration",
|
||||
"onwebkitanimationstart",
|
||||
"onwebkittransitionend",
|
||||
"onwheel",
|
||||
]
|
||||
|
||||
_VALID_TARGETS = {
|
||||
"scripts": [
|
||||
SearchQuery(partial(matches_name, name="script"), None),
|
||||
*[
|
||||
SearchQuery(partial(matches_attribute, attribute_name=attr), attr)
|
||||
for attr in _EVENT_HANDLER_ATTRS
|
||||
],
|
||||
],
|
||||
"styles": [
|
||||
SearchQuery(partial(matches_name, name="style"), None),
|
||||
SearchQuery(partial(matches_attribute, attribute_name="style"), "style"),
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def parse(content: str, target: str = "all") -> List[Inline]:
|
||||
"""Parses an HTML document and extracts."""
|
||||
soup = BeautifulSoup(content, "html.parser")
|
||||
|
@ -75,6 +203,12 @@ def parse(content: str, target: str = "all") -> List[Inline]:
|
|||
|
||||
elements = []
|
||||
for q in search_queries:
|
||||
elements += soup.find_all(**q)
|
||||
|
||||
return [Inline(e.contents[0]) for e in elements if e.contents]
|
||||
for tag in soup.find_all(q.search_function):
|
||||
if q.attr_name:
|
||||
inline = Inline(tag[q.attr_name])
|
||||
else:
|
||||
if not tag.contents:
|
||||
continue
|
||||
inline = Inline(tag.contents[0])
|
||||
elements.append(inline)
|
||||
return elements
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
import pytest
|
||||
|
||||
|
||||
from inlinehashes import __version__
|
||||
from inlinehashes.lib import Inline
|
||||
from inlinehashes import __version__, parse
|
||||
from inlinehashes.lib import Inline, _EVENT_HANDLER_ATTRS
|
||||
|
||||
|
||||
class TestInline:
|
||||
|
@ -105,22 +105,62 @@ class TestInline:
|
|||
|
||||
|
||||
class TestParse:
|
||||
@pytest.mark.skip(reason="Add later")
|
||||
def test_parse_detects_script_tags(self):
|
||||
pass
|
||||
doc = """
|
||||
<html>
|
||||
<head><title>Some title</title></head>
|
||||
<body>Some body
|
||||
<script>alert("hash this");</script>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
inlines = parse(doc)
|
||||
assert len(inlines) == 1
|
||||
assert inlines[0].content == 'alert("hash this");'
|
||||
|
||||
@pytest.mark.skip(reason="Add later")
|
||||
def test_parse_detects_style_tags(self):
|
||||
pass
|
||||
doc = """
|
||||
<html>
|
||||
<head>
|
||||
<title>Some title</title>
|
||||
<style>.someclass { background:#142a3f; }</style>
|
||||
</head>
|
||||
<body>Some body</body>
|
||||
</html>
|
||||
"""
|
||||
inlines = parse(doc)
|
||||
assert len(inlines) == 1
|
||||
assert inlines[0].content == ".someclass { background:#142a3f; }"
|
||||
|
||||
@pytest.mark.skip(reason="Not Implemented yet")
|
||||
def test_parse_detects_style_attributes(self):
|
||||
pass
|
||||
doc = """
|
||||
<html>
|
||||
<head>
|
||||
<title>Some title</title>
|
||||
</head>
|
||||
<body style="text-color: #000;">Some body</body>
|
||||
</html>
|
||||
"""
|
||||
inlines = parse(doc)
|
||||
assert len(inlines) == 1
|
||||
assert inlines[0].content == "text-color: #000;"
|
||||
|
||||
@pytest.mark.skip(reason="Not Implemented yet")
|
||||
def test_parse_detect_attributes_with_js(self):
|
||||
pass
|
||||
@pytest.mark.parametrize("attr", _EVENT_HANDLER_ATTRS)
|
||||
def test_parse_detect_attributes_with_js(self, attr):
|
||||
# Just to test they are detected even though some of them are
|
||||
# not valid for all elements
|
||||
doc = f"""
|
||||
<html>
|
||||
<head>
|
||||
<title>Some title</title>
|
||||
</head>
|
||||
<body {attr}="alert(1);">Some body</body>
|
||||
</html>
|
||||
"""
|
||||
inlines = parse(doc)
|
||||
assert len(inlines) == 1
|
||||
assert inlines[0].content == "alert(1);"
|
||||
|
||||
|
||||
def test_version():
|
||||
assert __version__ == "0.0.1"
|
||||
assert __version__ == "0.0.2"
|
||||
|
|
Loading…
Reference in New Issue