Add hash implementation and basic html parsing. The cli app should be usable.

Gonçalo Valério 2022-03-13 13:18:34 +00:00
parent 6fad4c43cc
commit 0d8f365fda
Signed by: dethos
GPG Key ID: DF557F2BDCC2445E
9 changed files with 756 additions and 4 deletions

.gitignore
@ -127,3 +127,6 @@ dmypy.json
# Pyre type checker
# Editors

@ -0,0 +1,27 @@
CLI app
All contributions and improvements are welcome.

@ -1 +1,4 @@
__version__ = '0.1.0'
from .lib import parse
__version__ = "0.0.1"
__all__ = ["parse"]

inlinehashes/ Normal file
@ -0,0 +1,69 @@
"""CLI App.
This file contains all the logic for the command line interface of
this library. It makes use of the same tools available for those that
install the package in order to be used programatically.
from typing import List
import requests
import argparse
import inlinehashes
import json
def build_output(
inlines: List[inlinehashes.lib.Inline], alg: str, full: bool = False
) -> str:
"""Build a JSON output from a list of Inline objects."""
snippet = "content" if full else "short_content"
out = [{"content": getattr(i, snippet), "hash": getattr(i, alg)} for i in inlines]
return json.dumps(out, indent=2)
def write_to_file(path: str, content: str) -> None:
"""Writes the content to the specified file.
OSError: More than one subclass of OSError
with open(path, "w") as f:
def run_cli() -> None:
"""Entry point of the command line interface."""
parser = argparse.ArgumentParser()
parser.add_argument("source", help="URL or local HTML file to check")
"-a", "--alg", help="Hash algorithm to use (default: sha256)", default="sha256"
"-f", "--full", help="Include full content in the output", action="store_true"
parser.add_argument("-o", "--output", help="Store output in a file.")
args = parser.parse_args()
path = args.source
if path.startswith("http://") or path.startswith("https://"):
response = requests.get(path)
content = response.text
with open(path, "r") as f:
content =
except (requests.RequestException, OSError):
print(f"Invalid source: {path}")
inlines = inlinehashes.parse(content)
out = build_output(inlines, args.alg, bool(args.full))
if args.output:
write_to_file(args.output, out)
if __name__ == "__main__":

inlinehashes/ Normal file
@ -0,0 +1,74 @@
"""Inline Hashes - Helping with CSP when possible.
This small module helps you to parse HTML documents and extract all the inline
content that must be specifically allowed in the Content-Security-Policy in
order to work (assuming "unsafe-inline" is not present).
from typing import List
from dataclasses import dataclass
from functools import cached_property
from itertools import chain
import hashlib
import base64
from bs4 import BeautifulSoup
"scripts": [
{"name": "script"},
"styles": [
{"name": "style"},
class Inline:
"""Represents a piece of content present in the HTML document.
It can be the value of an element/node or the value of an attribute
of a given element/node.
content: str
def short_content(self) -> str:
return self.content[:50]
def sha256(self) -> str:
h = hashlib.sha256(self.content.encode("utf-8"))
h_b64 = base64.b64encode(h.digest()).decode("utf8")
return f"sha256-{h_b64}"
def sha384(self) -> str:
h = hashlib.sha384(self.content.encode("utf-8"))
h_b64 = base64.b64encode(h.digest()).decode("utf8")
return f"sha384-{h_b64}"
def sha512(self) -> str:
h = hashlib.sha512(self.content.encode("utf-8"))
h_b64 = base64.b64encode(h.digest()).decode("utf8")
return f"sha512-{h_b64}"
def parse(content: str, target: str = "all") -> List[Inline]:
"""Parses an HTML document and extracts."""
soup = BeautifulSoup(content, "html.parser")
if target == "all":
search_queries = chain(*_VALID_TARGETS.values())
elif target in _VALID_TARGETS.keys():
search_queries = _VALID_TARGETS[target]
raise ValueError("Invalid Target")
elements = []
for q in search_queries:
elements += soup.find_all(**q)
return [Inline(e.contents[0]) for e in elements if e.contents]

@ -1,14 +1,35 @@
name = "inlinehashes"
version = "0.1.0"
description = ""
version = "0.0.1"
description = "Hash generator for HTML inline styles and scripts"
authors = ["Gonçalo Valério <>"]
homepage = ""
repository = ""
license = "MIT"
classifiers = [
"Development Status :: 3 - Alpha",
"Environment :: Console",
"Intended Audience :: Developers",
"License :: OSI Approved :: MIT License",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Topic :: Security",
"Topic :: Text Processing :: Markup :: HTML",
"Typing :: Typed"
python = "^3.9"
beautifulsoup4 = "^4.10.0"
requests = "^2.27.1"
pytest = "^5.2"
black = "^22.1.0"
mypy = "^0.940"
cli = ""
requires = ["poetry-core>=1.0.0"]

tests/ Normal file
@ -0,0 +1 @@
# Add later

@ -1,5 +1,126 @@
import pytest
from inlinehashes import __version__
from inlinehashes.lib import Inline
class TestInline:
def test_cant_be_changed(self):
("", ""),
("var a = 1", "var a = 1"),
("a" * 60, "a" * 50),
("a " * 100, "a " * 25),
def test_short_content_property(self, content, short_version):
inline = Inline(content=content)
assert inline.short_content == short_version
"// some random text just for the test",
"var someVar = { name: 'some object' };",
("var i = 1;", "sha256-1QhCpB/IFWw8Pb/g/IBzIBgErHWG5wrytauZib+UF+g="),
(" var i = 1; ", "sha256-JXsq/1KEtrnrlGozP1V228Z4rNL2pB7MlgpEBBbVnLA="),
def test_sha256_property(self, content, hash):
inline = Inline(content=content)
assert inline.sha256 == hash
"// some random text just for the test",
"var someVar = { name: 'some object' };",
"var i = 1;",
" var i = 1; ",
def test_sha384_property(self, content, hash):
inline = Inline(content=content)
assert inline.sha384 == hash
"// some random text just for the test",
"var someVar = { name: 'some object' };",
"var i = 1;",
" var i = 1; ",
def test_sha512_property(self, content, hash):
inline = Inline(content=content)
assert inline.sha512 == hash
class TestParse:
@pytest.mark.skip(reason="Add later")
def test_parse_detects_script_tags(self):
@pytest.mark.skip(reason="Add later")
def test_parse_detects_style_tags(self):
@pytest.mark.skip(reason="Not Implemented yet")
def test_parse_detects_style_attributes(self):
@pytest.mark.skip(reason="Not Implemented yet")
def test_parse_detect_attributes_with_js(self):
def test_version():
assert __version__ == '0.1.0'
assert __version__ == "0.0.1"