boxnotes2html

Convert Box's proprietary Box Notes to HTML, Markdown, or plain text
Log | Files | Refs | README | LICENSE

commit becf9a6475783b1a73f6532799c164c74bbb2693
Author: alex wennerberg <awennerberg@cloudbakers.com>
Date:   Mon, 11 Feb 2019 21:20:47 -0600

initial commit

Diffstat:
A.gitignore | 112+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A.pre-commit-config.yaml | 16++++++++++++++++
ALICENSE | 7+++++++
AMANIFEST.in | 3+++
AMakefile | 96+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aboxnotes2html/__init__.py | 1+
Aboxnotes2html/boxnote.py | 214+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aboxnotes2html/cli.py | 53+++++++++++++++++++++++++++++++++++++++++++++++++++++
Aboxnotes2html/html.py | 106+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aboxnotes2html/markdown.py | 31+++++++++++++++++++++++++++++++
Aboxnotes2html/style.css | 3+++
Aimg/after.png | 0
Aimg/before.png | 0
Areadme.md | 75+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asetup.cfg | 10++++++++++
Asetup.py | 36++++++++++++++++++++++++++++++++++++
Atests/fixtures/Calendar.boxnote | 1+
Atests/fixtures/complex_note.boxnote | 2++
Atests/fixtures/normal note.boxnote | 2++
Atests/fixtures/notes.py | 36++++++++++++++++++++++++++++++++++++
Atests/fixtures/simple_note.boxnote | 2++
Atests/test_boxhtml.py | 22++++++++++++++++++++++
Atests/test_boxnote.py | 43+++++++++++++++++++++++++++++++++++++++++++
Atests/test_cli.py | 15+++++++++++++++
Atox.ini | 8++++++++
25 files changed, 894 insertions(+), 0 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -0,0 +1,112 @@ +*swp +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +keys.py +output.html +test.html +tests/fixtures/*.html +tests/fixtures/*.txt +tests/fixtures/*.md +tests/fixtures/*.gif diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml @@ -0,0 +1,16 @@ +repos: +- repo: https://github.com/ambv/black + rev: stable + hooks: + - id: black + language_version: python3.6 + ethis testanguage_version: python3.6 +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v2.0.0 # Use the ref you want to point at + hooks: + - id: check-json + - id: flake8 + - id: detect-private-key + - id: pretty-format-json + args: ['--autofix', '--no-sort-keys'] + - id: check-yaml diff --git a/LICENSE b/LICENSE @@ -0,0 +1,7 @@ +Copyright 2019 Alex Wennerberg + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/MANIFEST.in b/MANIFEST.in @@ -0,0 +1,3 @@ +include boxnotes2html/style.css +include LICENSE +include readme.md diff --git a/Makefile b/Makefile @@ -0,0 +1,96 @@ +.PHONY: clean clean-test clean-pyc clean-build docs help +.DEFAULT_GOAL := help + +define BROWSER_PYSCRIPT +import os, webbrowser, sys + +try: + from urllib import pathname2url +except: + from urllib.request import pathname2url + +webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1]))) +endef +export BROWSER_PYSCRIPT + +define PRINT_HELP_PYSCRIPT +import re, sys + +for line in sys.stdin: + match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line) + if match: + target, help = match.groups() + print("%-20s %s" % (target, help)) +endef +export PRINT_HELP_PYSCRIPT + +BROWSER := python -c "$$BROWSER_PYSCRIPT" + +help: + @python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST) + +clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts + +clean-build: ## remove build artifacts + rm -fr build/ + rm -fr dist/ + rm -fr .eggs/ + find . -name '*.egg-info' -exec rm -fr {} + + find . -name '*.egg' -exec rm -f {} + + +clean-pyc: ## remove Python file artifacts + find . -name '*.pyc' -exec rm -f {} + + find . -name '*.pyo' -exec rm -f {} + + find . -name '*~' -exec rm -f {} + + find . -name '__pycache__' -exec rm -fr {} + + +clean-test: ## remove test and coverage artifacts + rm -fr .tox/ + rm -f .coverage + rm -fr htmlcov/ + rm -fr .pytest_cache + rm -rf tests/fixtures/*txt + rm -rf tests/fixtures/*md + rm -rf tests/fixtures/*html + +lint: ## check style with flake8 + flake8 boxnotes2html tests + +black: + black boxnotes2html + +test: ## run tests quickly with the default Python + python3 setup.py test + +integration_test: ## run integration tests, which may be slower + +test-all: ## run tests on every Python version with tox + tox + +coverage: ## check code coverage quickly with the default Python + coverage run --source boxnotes2html -m pytest + coverage report -m + coverage html + $(BROWSER) htmlcov/index.html + +docs: ## generate Sphinx HTML documentation, including API docs + rm -f docs/boxnotes2html.rst + rm -f docs/modules.rst + sphinx-apidoc -o docs/ boxnotes2html + $(MAKE) -C docs clean + $(MAKE) -C docs html + $(BROWSER) docs/_build/html/index.html + +servedocs: docs ## compile the docs watching for changes + watchmedo shell-command -p '*.rst' -c '$(MAKE) -C docs html' -R -D . + +release: dist ## package and upload a release + twine upload dist/* + +dist: clean ## builds source and wheel package + python setup.py sdist + python setup.py bdist_wheel + ls -l dist + +install: clean ## install the package to the active Python's site-packages + python setup.py install diff --git a/boxnotes2html/__init__.py b/boxnotes2html/__init__.py @@ -0,0 +1 @@ +from .boxnote import BoxNote diff --git a/boxnotes2html/boxnote.py b/boxnotes2html/boxnote.py @@ -0,0 +1,214 @@ +from functools import reduce +import json +import re +from . import html, markdown +from xml.etree import ElementTree as ET +import os + +dir_path = os.path.dirname(os.path.realpath(__file__)) + + +class AttributeChunk: # not really a class + """ + An attribute chunk is formatted like this: + *n[*n...]+n[|n+n] + eg + *4*1+1|+1 + where *n refers to an attribute to apply from the attribute pool + and +n is a number of characters to apply that attribute to + and |n is indicative of a line break (unclear the purpose of this) + """ + + def __init__(self, attribute_string_chunk, position=None): + self.attribute_string_chunk = attribute_string_chunk + self.attributes = set(self._all_items_after_indicator("*")) + self.num_characters = sum(self._all_items_after_indicator("+")) + self.num_linebreaks = sum(self._all_items_after_indicator("|")) + + def _all_items_after_indicator(self, indicator): + """ + Regex to get all the numbers after the given indicator + Then convert this base36 number into an integer + * -> attribute + + -> number of characters to apply attribute to + | -> number of linebreaks (I think) + """ + items = re.findall( + "\\{}([^\\+\\|\\*]*)".format(indicator), self.attribute_string_chunk + ) + return map(lambda x: int(x, 36), items) + + +class FormattedText: + """ + A block of text with parsed information about it + """ + + def __init__(self, attributes, text, num_linebreaks, tagnums=None): + self.attributes = attributes + self.tagnums = list(tagnums) # for debugging + self.styles = self.get_base_styles() + self.num_linebreaks = num_linebreaks + self.text = text + self.element_tree = self.styles_to_elements() + self.table_id, self.row_id, self.column_id = self.get_table_info() + self.list_type, self.list_level = self.get_list_info() + return + + def get_base_styles(self): + tags = [] + for attribute in self.attributes: + tags.append(html.convert_simple_element_to_html_tag(attribute)) + if not tags: + tags = html.HTMLTag("span", {}) # hmm + return tags + + def get_table_info(self): + for box_attribute in self.attributes: + if html.get_table_info(box_attribute)[0]: + return html.get_table_info(box_attribute) + return None, None, None + + def get_list_info(self): # refactor + for box_attribute in self.attributes: + if html.get_list_info(box_attribute): + return html.get_list_info(box_attribute) + return None, None + + def styles_to_elements(self): + if self.text.replace("\n", "") == "*": # maybe change + self.text = "" + + # LISTS HACK -- After much anguish, I have resorted myself to the dark arts + # Please forgive me + # We are using <li/> to represent list items + if "li" in [a.tag for a in self.styles]: + span = ET.Element("span") + indent_level = self.get_list_info()[1] - 1 or 0 + span.text = indent_level * "&nbsp;&nbsp;" + "* " + return span + + def _append(x, y): + y.append(x) + return y + + individual_elements = list( + map(lambda x: ET.Element(x.tag, x.attributes), self.styles) + ) + reduce(_append, individual_elements) + lowest_element = individual_elements[0] # indexerror + toplevel_element = individual_elements[-1] + for _ in range(self.num_linebreaks): + toplevel_element.append(ET.Element("br")) # Hm + lowest_element.text = self.text + return toplevel_element + + def styles_to_markdown_string(self): + # escape markdown characters + # kind of awkward + characters_to_escape = "\\*[]`" + tmp = self.text or "" + out_text = "" + if tmp is not None: + for character in characters_to_escape: + tmp = tmp.replace(character, "\\{}".format(character)) + for line in tmp.split("\n"): + for box_attribute in self.attributes: + if line or box_attribute[0] in ["list", "image", "link"]: + start, end = markdown.convert_simple_element_to_markdown( + box_attribute + ) + line = start + line + end + out_text += line + out_text += "\n" * (self.num_linebreaks) + + return out_text + + def __repr__(self): + return json.dumps( + {k: v for k, v in self.__dict__.items() if k != "element_tree"}, indent=2 + ) + + +class BoxNote: + NOTE_MAPPING = [] # MAPPING FROM ATTRIB TO HTML TAG + + def __init__(self, note_string): # TODO: rename notefile to notefilepath + """ + note_string: the note data as a string + text is the raw text of the notes document. + attributes is the attribute formatting string + attribute pool is all the attributes that are used and a conversion from + numattribute number to some html-like formatting + """ + self.note_data = json.loads(note_string) + self.text = self.note_data["atext"]["text"] + self.attribute_chunks = self._attribute_chunks_from_string( + self.note_data["atext"]["attribs"] + ) + self.attribute_pool = self.note_data["pool"]["numToAttrib"] + # config? + + @classmethod + def from_file(cls, filepath): + with open(filepath) as f: + return cls(note_string=f.read()) + + def get_metadata(self): + """ + returns potentially useful metadata about the file. ignores more obscure + metadata that is mostly for internal user. WIP, currently unused + """ + metadata = {"last_edit_timestamp": self.note_data.get("lastEditTimestamp")} + return metadata + + @staticmethod + def _attribute_chunks_from_string(attributes_string): + return map(AttributeChunk, re.findall("\\*.*?\\+[^\\*]*", attributes_string)) + + def _get_formatted_text_list(self): + text = self.text + output = [] + pointer = 0 + for chunk in self.attribute_chunks: + attributes = [ + self.attribute_pool[str(attribute_number)] + for attribute_number in chunk.attributes + ] + element_text = text[pointer : pointer + chunk.num_characters] + blob = FormattedText( + attributes, element_text, chunk.num_linebreaks, tagnums=chunk.attributes + ) + output.append(blob) + pointer += chunk.num_characters + return output + + def as_element_tree(self): + html_result = ET.Element("html") + body = ET.SubElement(html_result, "body") + table = ET.Element("table") # only one for now WIP + blobs = self._get_formatted_text_list() + for blob in blobs: + body.append(blob.element_tree) + return html_result + + def as_html(self): + with open(os.path.join(dir_path, "style.css")) as f: + css = "<style>" + f.read() + "</style>" + body = ET.tostring(self.as_element_tree(), encoding="unicode").replace( + "&amp;nbsp;", "&nbsp;" + ) + return css + body + + def as_markdown(self): + out = "" + blobs = self._get_formatted_text_list() + for blob in blobs: + out += blob.styles_to_markdown_string() + return out + + def as_text(self): + return self.text + + def __str__(self): + return json.dumps(self.note_data, indent=2) diff --git a/boxnotes2html/cli.py b/boxnotes2html/cli.py @@ -0,0 +1,53 @@ +import sys +import os +import argparse +from boxnotes2html.boxnote import BoxNote + + +def run(): + run_with_args(sys.argv[1:]) + + +def run_with_args(args): + parser = argparse.ArgumentParser() + parser.add_argument( + "files", + help="file or files to process. If passed a directory, will process everything in that directory with the .boxnote extension.", + nargs="*", + ) + # TODO: implement + parser.add_argument( + "-r", "--recurse", help="recursively look through a folder", action="store_true" + ) + parser.add_argument( + "-f", + "--filetype", + help="output filetype: markdown or html or plaintext. Default html", + choices=["md", "html", "txt"], + default="html", + ) + args = parser.parse_args(args) + + for filepath in args.files: + if os.path.isdir(filepath): + for root, dirs, files in os.walk(filepath): + for subfile in files: + full_path = os.path.join(root, subfile) + if full_path.endswith(".boxnote"): + write_file(full_path, args.filetype) + else: + write_file(filepath, args.filetype) + + +def write_file(filepath, filetype): + note = BoxNote.from_file(filepath) + if filetype == "html": + out_string = note.as_html() + elif filetype == "md": + out_string = note.as_markdown() + elif filetype == "txt": + out_string = note.as_text() + output_path = os.path.splitext(filepath)[0] + ".{}".format(filetype) + print("writing file {}".format(output_path)) + with open(output_path, "w") as f: + f.write(out_string) diff --git a/boxnotes2html/html.py b/boxnotes2html/html.py @@ -0,0 +1,106 @@ +import json +import urllib.parse +import base64 + +from collections import namedtuple + + +HTMLTag = namedtuple("HTMLTag", ["tag", "attributes"]) + + +def get_table_info(box_attribute): + attribute_type = box_attribute[0] + if attribute_type.startswith("struct-table"): + table_id = attribute_type.partition("_")[0].replace("struct-table", "") + if "_row" in attribute_type: # col and row backwards + row_id = attribute_type.partition("_row")[2] + return table_id, row_id, None + elif "_col" in attribute_type: + col_id = attribute_type.partition("_col")[2] + return table_id, None, col_id + return None, None, None + + +def get_list_attribute(box_attribute): # UNUSED + # ordered vs unordered + attribute_type = box_attribute[1][:-1] + if attribute_type.startswith("number"): + return HTMLTag("ol", {}) + else: + return HTMLTag("ul", {}) # set defautl value + + +def get_list_info(box_attribute): + if "list" in box_attribute[0]: + if box_attribute[1].startswith("number"): + type = "ordered" + elif box_attribute[1].startswith("check"): + type = "checkbox" + else: + type = "unordered" + # TODO: regex. cant do more than 9 list levels + return type, int(box_attribute[1][-1]) + + +def convert_simple_element_to_html_tag(box_attribute): + tag = None + html_attrib = {} + attribute_type = box_attribute[0] + attribute_value = box_attribute[1] + assert attribute_value # WIP + if attribute_type == "bold": + tag = "b" + elif attribute_type == "underline": + tag = "u" + elif attribute_type == "italic": + tag = "i" + elif attribute_type == "strikethrough": + tag = "s" + elif "font-color" in attribute_type: + tag = "font" + html_attrib = {"color": attribute_type.split("-")[-1]} + elif "font-size" in attribute_type: + tag = "font" + size = attribute_type.split("-")[-1] + sizemap = { + "medium": "3", + "large": "+2", + "verylarge": "+3", + "small": "-1", # TODO: consider + } + html_attrib["size"] = sizemap[size] + elif attribute_type == "align": + tag = "div" + html_attrib["style"] = "text-align: {}".format(attribute_value) + elif attribute_type.startswith("link-"): + urlstring = _decode_link(attribute_type) + tag = "a" + html_attrib["href"] = urlstring + elif attribute_type.startswith("image"): + tag = "img" + html_attrib["src"] = _decode_image(attribute_type).get("boxSharedLink") + # elif attribute_type.startswith("struct-table"): + # tag = "td" + elif attribute_type == "list": + if "checked" in attribute_value: # "checked" or "unckecked" + tag = "input" + html_attrib["type"] = "checkbox" + if attribute_value.startswith("checked"): + html_attrib["checked"] = "checked" + else: + tag = "li" + return HTMLTag(tag, html_attrib) # NamedTuple + + +def _decode_link(urlstring): # move to separate module + return base64.b64decode(urlstring.split("-")[-1]).decode("utf-8").partition("-")[2] + + +def _decode_image(imagestring): + # distinguish image flags..." + # this one is fun + return json.loads( + urllib.parse.unquote( + base64.b64decode(imagestring.split("-")[-1]).decode("utf-8") + ) + ) diff --git a/boxnotes2html/markdown.py b/boxnotes2html/markdown.py @@ -0,0 +1,31 @@ +# Tools for markdown conversion +from . import html + + +def convert_simple_element_to_markdown(box_attribute): + attribute_type = box_attribute[0] + attribute_value = box_attribute[1] + start = "" + end = "" + if not attribute_type: + start = end = "" + elif attribute_type == "bold": + start = end = "**" + elif attribute_type == "italic" or attribute_type == "underline": + start = end = "*" + elif attribute_type == "strikethrough": + start = end = "~~" + elif "font-size" in attribute_type: + sizemap = {"small": "", "medium": "", "large": "## ", "verylarge": "# "} + size = attribute_type.split("-")[-1] + start = sizemap[size] + elif attribute_type.startswith("link-"): + start = "[" + end = "]({})".format(html._decode_link(attribute_type)) + elif attribute_type.startswith("image"): + start = "![" + end = "]({})".format(html._decode_image(attribute_type).get("boxSharedLink")) + elif attribute_type == "list": + _, level = html.get_list_info(box_attribute) + start = " " * (level - 1) + "* " + return start, end diff --git a/boxnotes2html/style.css b/boxnotes2html/style.css @@ -0,0 +1,3 @@ +body { + font-family: Sans-Serif; +} diff --git a/img/after.png b/img/after.png Binary files differ. diff --git a/img/before.png b/img/before.png Binary files differ. diff --git a/readme.md b/readme.md @@ -0,0 +1,75 @@ +# boxnotes2html + +boxnotes2html is a tool to convert Box Notes to html, markdown or plain text + +![before](img/before.png) + +*to* + +![after](img/after.png) + +# Rationale + +Box notes are a proprietary format. According to this [support ticket](https://community.box.com/t5/Desktop-and-Mobile-Forum/Exporting-Box-Notes/td-p/14834) the official way in which they meet GDPR compliance is through allowing users to print the notes page as a PDF when it's open in a web browser. Another option is to Copy/paste the document into LibreOffice, Google Docs, Microsoft Word or some other text-editing program. Neither of these are very attractive options when exporting a large number of Box notes, say, for archival/backup purposes or in the process of migrating to another platform. + + +# Installation + +No external dependencies! Just the good old Python standard library. (plus pytest for tests) + +Use `pip install boxnotes2html` to install. + +Requires Python 3. Tested on >=3.5 + +# Usage + +`boxnotes2html -h` for help and options. + +To convert a file: + +`boxnotes2html -h mynote.boxnote` + +Will create a file called mynote. You can optionally specify the filetype with `-f`, current options, in order to most to least reliable, html, md (markdown), and txt (plaintext) + +You can specify one or more options. If any of the options are a directory, boxnotes2html will convert those notes and put the new file next to it in the directory path. + +Box doesn't allow for you to download individual Box notes from the UI, but you can put them into a folder and download that folder, or use the API. + +You can also use boxnotes2html as an imported library: + +```python +from boxnotes2html import BoxNote + +# from string +note = BoxNote("boxnote_file_content_string") +# Or from file +note = BoxNote.from_file("filename.boxnote") + +note.as_html() # returns an html string +note.as_markdown() # returns a markdown string +note.as_text() # returns raw text +``` + +The HTML formatted by this tool is pretty ugly -- that may not be important for a lot of cases, but you may want to use [tidy](http://www.html-tidy.org/) or another tool to clean it up. You can also use [pandoc](https://pandoc.org/) to convert from html to another format, such as PDFs, markdown, microsoft word, and so on. I haven't tested all these tools, so YMMV. + + +Functioning: +* Text formatting (bold, underline, colors, size, etc) +* Hyperlinks + +Caveats: +* Lists are a little wonky -- they are just stars with spcaes instead of HTML tags. A TBD if there is interest +* Tables are broken and will just be converted to plaintext. If you can figure out a clean way to do this, please submit a pull request. +* Images are just a link to the image in Box, converting them would require API access. +* Comments and annotations are not saved. +* This tool is in ALPHA, bugs may exist. Please report any issues you encounter! + +If this tool is unsatisfactory to your needs, please contact Box and tell them to build this much-needed feature! + +# Technical summary + +Box notes are formatting as a JSON. They include a bunch of text and metadata. Some of this metadata is in a long "attributes" string. This string encoded various styles to different properties, using base36 numbers. This formatting is HTML-like but not exactly HTML. Check out the code for more details. Feel free to contact me on GitHub or email me with any questions on feedback: alex@alexwennerberg.com + +Thanks, and enjoy! + +Alex diff --git a/setup.cfg b/setup.cfg @@ -0,0 +1,10 @@ +[bumpversion] +current_version = 0.1.2 +commit = True +tag = True + +[bumpversion:file:setup.py] + +[aliases] +test = pytest + diff --git a/setup.py b/setup.py @@ -0,0 +1,36 @@ +from setuptools import find_packages, setup + +with open("readme.md", "r") as fh: + long_description = fh.read() + +setup( + name="boxnotes2html", + version="0.1.2", + author="Alex Wennerberg", + author_email="alex@alexwennerberg.com", + description="Converting from Box Notes to HTML", + long_description=long_description, + url="https://github.com/alexwennerberg/boxnotes2html", + packages=find_packages(), + install_requires=[], + setup_requires=["pytest-runner"], + tests_require=["pytest"], + include_package_data=True, + entry_points={ + "console_scripts": [ + "boxnotes2html=boxnotes2html.cli:run", + "boxnote2html=boxnotes2html.cli:run", + ] # alias because if you're like me you'll type it wrong + }, + classifiers=[ + # As from http://pypi.python.org/pypi?%3Aaction=list_classifiers + # 'Development Status :: 1 - Planning', + # 'Development Status :: 2 - Pre-Alpha', + "Development Status :: 3 - Alpha", + # 'Development Status :: 4 - Beta', + # 'Development Status :: 5 - Production/Stable', + # 'Development Status :: 6 - Mature', + # 'Development Status :: 7 - Inactive', + "Programming Language :: Python :: 3", + ], +) diff --git a/tests/fixtures/Calendar.boxnote b/tests/fixtures/Calendar.boxnote @@ -0,0 +1 @@ +{"head":4,"savepointDataFileId":"","savepointListObject":{},"diffChangeset":"","invalidDiffChangeset":false,"authorList":{},"diffAuthorList":{},"shouldCreateSavepointBeforeApplyingNextRevision":false,"firstKeyRevision":1,"atext":{"text":"\n**Update months/dates based on relevant months for your usage.*\n*\n*Red = Key Milestone\n*Yellow = Event\n*Green = Project Management / Core Team\n*Grey = XXXX\n*Purple = XXXX\n*Light Grey = XXXX\n*\n*\n*Month\n*\n*\n*4\n*Milestone 1\n*Milestone 2\n*5\n*Milestone 1\n*Milestone 2\n*6\n*Milestone 1\n*Milestone 2\n*7\n*Milestone 1\n*Milestone 2\n*8\n*Milestone 1\n*Milestone 2\n*11\n*EXAMPLE: \n*PM Sync (2pm ET)\n*\n*12\n*EXAMPLE: \n*PM Sync (2pm ET)\n*13\n*EXAMPLE: \n*PM Sync (2pm ET)\n*14\n*EXAMPLE: \n*PM Sync (2pm ET)\n*15\n*EXAMPLE:\n*PM Sync (2pm ET)\n*\n*18\n*\n*19\n*\n*20\n*\n*\n*21\n*\n*22\n*\n*25\n*\n*26\n*\n*27\n*\n*28\n*\n*29\n*\n*\n*\n*\n*Month\n*\n*\n*4\n*\n*\n*5\n*6\n*7\n*8\n*11\n*EXAMPLE: \n*PM Sync (2pm ET)\n*\n*12\n*EXAMPLE: \n*PM Sync (2pm ET)\n*13\n*EXAMPLE: \n*PM Sync (2pm ET)\n*14\n*EXAMPLE: \n*PM Sync (2pm ET)\n*15\n*EXAMPLE:\n*PM Sync (2pm ET)\n*\n*18\n*\n*19\n*\n*20\n*\n*\n*21\n*\n*22\n*\n*25\n*\n*26\n*\n*27\n*\n*28\n*\n*29\n*\n*\n*\n*\n*Month\n*\n*\n*4\n*\n*\n*5\n*6\n*7\n*8\n*11\n*EXAMPLE: \n*PM Sync (2pm ET)\n*\n*12\n*EXAMPLE: \n*PM Sync (2pm ET)\n*13\n*EXAMPLE: \n*PM Sync (2pm ET)\n*14\n*EXAMPLE: \n*PM Sync (2pm ET)\n*15\n*EXAMPLE:\n*PM Sync (2pm ET)\n*\n*18\n*\n*19\n*\n*20\n*\n*\n*21\n*\n*22\n*\n*25\n*\n*26\n*\n*27\n*\n*28\n*\n*29\n*\n*\n*\n\n","attribs":"|1+1*d*2*3+1*6*e*8*37+1q|1+1*1*2*3+1|1+1*1*2*3+1*6*e*8+3*7*8+g|1+1*1*2*3+1*6*3r*8+6*7*8+8|1+1*1*2*3+1*6*3s*8+5*7*8+x|1+1*1*2*3+1*6*3t*8+4*3t*8+1*7*8+6|1+1*1*2*3+1*6*3u*8+6*7*8+7|1+1*1*2*3+1*6*3v*8+a*7*8+7|1+1*1*2*3+1|1+1*1*2*3+1|1+1*d*2*3+1*6*e*1i*8+5|1+1*d*2*3+1|1+1*d*2*3+1|1+1*d*2*3+1*7*8+1*3w*3x|1+1*1*2*1q*3+1*6*7*8+b*3w*3x|1+1*1*2*1q*3+1*6*7*8+b*3w*3x|1+1*d*2*3+1*7*8+1*3y*3x|1+1*1*2*1q*3+1*6*7*8+b*3y*3x|1+1*1*2*1q*3+1*6*7*8+b*3y*3x|1+1*d*2*3+1*7*8+1*3z*3x|1+1*1*2*1q*3+1*6*7*8+b*3z*3x|1+1*1*2*1q*3+1*6*7*8+b*3z*3x|1+1*d*2*3+1*7*8+1*40*3x|1+1*1*2*1q*3+1*6*7*8+b*40*3x|1+1*1*2*1q*3+1*6*7*8+b*40*3x|1+1*d*2*3+1*7*8+1*41*3x|1+1*1*2*1q*3+1*6*7*8+b*41*3x|1+1*1*2*1q*3+1*6*7*8+b*41*3x|1+1*d*2*3+1*7*8+2*3w*42|1+1*d*2*3+1*6*3s*8*q+9*3w*42|1+1*d*2*3+1*6*3s*8*q+g*3w*42|1+1*d*2*3+1*3w*42|1+1*d*2*3+1*7*8+2*3y*42|1+1*d*2*3+1*6*3s*8*q+9*3y*42|1+1*d*2*3+1*6*3s*8*q+g*3y*42|1+1*d*2*3+1*7*8+2*3z*42|1+1*d*2*3+1*6*3s*8*q+9*3z*42|1+1*d*2*3+1*6*3s*8*q+g*3z*42|1+1*d*2*3+1*7*8+2*40*42|1+1*d*2*3+1*6*3s*8*q+9*40*42|1+1*d*2*3+1*6*3s*8*q+g*40*42|1+1*d*2*3+1*7*8+2*41*42|1+1*d*2*3+1*6*3s*8*q+8*41*42|1+1*d*2*3+1*6*3s*8*q+g*41*42|1+1*d*2*3+1*41*42|1+1*d*2*3+1*7*8+2*3w*43|1+1*d*2*3+1*3w*43|1+1*d*2*3+1*7*8+2*3y*43|1+1*d*2*3+1*3y*43|1+1*d*2*3+1*7*8+2*3z*43|1+1*d*2*3+1*3z*43|1+1*d*2*3+1*3z*43|1+1*d*2*3+1*7*8+2*40*43|1+1*d*2*3+1*40*43|1+1*d*2*3+1*7*8+2*41*43|1+1*d*2*3+1*41*43|1+1*d*2*3+1*7*8+2*3w*44|1+1*d*2*3+1*3w*44|1+1*d*2*3+1*7*8+2*3y*44|1+1*d*2*3+1*3y*44|1+1*d*2*3+1*7*8+2*3z*44|1+1*d*2*3+1*3z*44|1+1*d*2*3+1*7*8+2*40*44|1+1*d*2*3+1*40*44|1+1*d*2*3+1*7*8+2*41*44|1+1*d*2*3+1*41*44|1+1*d*2*3+1*41*44|1+1*d*2*3+1|1+1*d*2*3+1|1+1*d*2*3+1*6*e*1i*8+5|1+1*d*2*3+1|1+1*d*2*3+1|1+1*d*2*3+1*7*8+1*45*46|1+1*d*2*3+1*45*46|1+1*d*2*3+1*45*46|1+1*d*2*3+1*7*8+1*47*46|1+1*d*2*3+1*7*8+1*48*46|1+1*d*2*3+1*7*8+1*49*46|1+1*d*2*3+1*7*8+1*4a*46|1+1*d*2*3+1*7*8+2*45*4b|1+1*d*2*3+1*6*3s*8*q+9*45*4b|1+1*d*2*3+1*6*3s*8*q+g*45*4b|1+1*d*2*3+1*45*4b|1+1*d*2*3+1*7*8+2*47*4b|1+1*d*2*3+1*6*3s*8*q+9*47*4b|1+1*d*2*3+1*6*3s*8*q+g*47*4b|1+1*d*2*3+1*7*8+2*48*4b|1+1*d*2*3+1*6*3s*8*q+9*48*4b|1+1*d*2*3+1*6*3s*8*q+g*48*4b|1+1*d*2*3+1*7*8+2*49*4b|1+1*d*2*3+1*6*3s*8*q+9*49*4b|1+1*d*2*3+1*6*3s*8*q+g*49*4b|1+1*d*2*3+1*7*8+2*4a*4b|1+1*d*2*3+1*6*3s*8*q+8*4a*4b|1+1*d*2*3+1*6*3s*8*q+g*4a*4b|1+1*d*2*3+1*4a*4b|1+1*d*2*3+1*7*8+2*45*4c|1+1*d*2*3+1*45*4c|1+1*d*2*3+1*7*8+2*47*4c|1+1*d*2*3+1*47*4c|1+1*d*2*3+1*7*8+2*48*4c|1+1*d*2*3+1*48*4c|1+1*d*2*3+1*48*4c|1+1*d*2*3+1*7*8+2*49*4c|1+1*d*2*3+1*49*4c|1+1*d*2*3+1*7*8+2*4a*4c|1+1*d*2*3+1*4a*4c|1+1*d*2*3+1*7*8+2*45*4d|1+1*d*2*3+1*45*4d|1+1*d*2*3+1*7*8+2*47*4d|1+1*d*2*3+1*47*4d|1+1*d*2*3+1*7*8+2*48*4d|1+1*d*2*3+1*48*4d|1+1*d*2*3+1*7*8+2*49*4d|1+1*d*2*3+1*49*4d|1+1*d*2*3+1*7*8+2*4a*4d|1+1*d*2*3+1*4a*4d|1+1*d*2*3+1*4a*4d|1+1*1*2*3+1|1+1*d*2*3+1|1+1*d*2*3+1*6*e*1i*8+5|1+1*d*2*3+1|1+1*d*2*3+1|1+1*d*2*3+1*7*8+1*4e*4f|1+1*d*2*3+1*4e*4f|1+1*d*2*3+1*4e*4f|1+1*d*2*3+1*7*8+1*4g*4f|1+1*d*2*3+1*7*8+1*4h*4f|1+1*d*2*3+1*7*8+1*4i*4f|1+1*d*2*3+1*7*8+1*4j*4f|1+1*d*2*3+1*7*8+2*4e*4k|1+1*d*2*3+1*6*3s*8*q+9*4e*4k|1+1*d*2*3+1*6*3s*8*q+g*4e*4k|1+1*d*2*3+1*4e*4k|1+1*d*2*3+1*7*8+2*4g*4k|1+1*d*2*3+1*6*3s*8*q+9*4g*4k|1+1*d*2*3+1*6*3s*8*q+g*4g*4k|1+1*d*2*3+1*7*8+2*4h*4k|1+1*d*2*3+1*6*3s*8*q+9*4h*4k|1+1*d*2*3+1*6*3s*8*q+g*4h*4k|1+1*d*2*3+1*7*8+2*4i*4k|1+1*d*2*3+1*6*3s*8*q+9*4i*4k|1+1*d*2*3+1*6*3s*8*q+g*4i*4k|1+1*d*2*3+1*7*8+2*4j*4k|1+1*d*2*3+1*6*3s*8*q+8*4j*4k|1+1*d*2*3+1*6*3s*8*q+g*4j*4k|1+1*d*2*3+1*4j*4k|1+1*d*2*3+1*7*8+2*4e*4l|1+1*d*2*3+1*4e*4l|1+1*d*2*3+1*7*8+2*4g*4l|1+1*d*2*3+1*4g*4l|1+1*d*2*3+1*7*8+2*4h*4l|1+1*d*2*3+1*4h*4l|1+1*d*2*3+1*4h*4l|1+1*d*2*3+1*7*8+2*4i*4l|1+1*d*2*3+1*4i*4l|1+1*d*2*3+1*7*8+2*4j*4l|1+1*d*2*3+1*4j*4l|1+1*d*2*3+1*7*8+2*4e*4m|1+1*d*2*3+1*4e*4m|1+1*d*2*3+1*7*8+2*4g*4m|1+1*d*2*3+1*4g*4m|1+1*d*2*3+1*7*8+2*4h*4m|1+1*d*2*3+1*4h*4m|1+1*d*2*3+1*7*8+2*4i*4m|1+1*d*2*3+1*4i*4m|1+1*d*2*3+1*7*8+2*4j*4m|1+1*d*2*3+1*4j*4m|1+1*d*2*3+1*4j*4m|1+1*1*2*3+1|2+2","opCount":458,"appliedAttribsCount":1587,"maxAttribsOnSingleOp":5},"pool":{"numToAttrib":{"1":["align","left"],"2":["insertorder","first"],"3":["lmkr","1"],"6":["bold","true"],"7":["font-color-000000","true"],"8":["font-size-medium","true"],"13":["align","center"],"14":["font-color-ea1f43","true"],"26":["font-size-small","true"],"54":["font-size-large","true"],"62":["list","unchecked1"],"115":["italic","true"],"135":["font-color-f79600","true"],"136":["font-color-26c281","true"],"137":["font-color-666666","true"],"138":["font-color-9f3fed","true"],"139":["font-color-aaaaaa","true"],"140":["struct-table02e30be1c6264ee3a608c2f19a6f13f0_cole54c825628be480195743ae6173fd974","true"],"141":["struct-table02e30be1c6264ee3a608c2f19a6f13f0_row4ea3329d8f264d11a0646d5a3f206fa1","true"],"142":["struct-table02e30be1c6264ee3a608c2f19a6f13f0_colceab2d5e592c4e80bbb0555728d64ec6","true"],"143":["struct-table02e30be1c6264ee3a608c2f19a6f13f0_col271ad38423c84e24aa017d452039be42","true"],"144":["struct-table02e30be1c6264ee3a608c2f19a6f13f0_col9ed6a32b1e16423fba0d2158b3a1bc81","true"],"145":["struct-table02e30be1c6264ee3a608c2f19a6f13f0_col2932a2d1aca343299f44016887bc37ec","true"],"146":["struct-table02e30be1c6264ee3a608c2f19a6f13f0_rowa24678b9c8ea49d6939aceb43c29bde8","true"],"147":["struct-table02e30be1c6264ee3a608c2f19a6f13f0_rowbba57d4483b9402b8158f52cc7e284a5","true"],"148":["struct-table02e30be1c6264ee3a608c2f19a6f13f0_row2515d8e1c61842269828e4c9d9d73abe","true"],"149":["struct-tablebac8b7be25de4e2ba9cac08f97d35597_col55f031debd5546efa34cc3b2e661b3e7","true"],"150":["struct-tablebac8b7be25de4e2ba9cac08f97d35597_row82c59cdd476b4242a7c913f820b6c4da","true"],"151":["struct-tablebac8b7be25de4e2ba9cac08f97d35597_cold1243f7ffc4c4b34839a3d85591c8cb6","true"],"152":["struct-tablebac8b7be25de4e2ba9cac08f97d35597_col32081423fa054fb2a2e091b2c83a0338","true"],"153":["struct-tablebac8b7be25de4e2ba9cac08f97d35597_col20e4e6aca14d4c2ba3e32648abdbc309","true"],"154":["struct-tablebac8b7be25de4e2ba9cac08f97d35597_col39f4a864e34b4a6ebeaa9923d19536ba","true"],"155":["struct-tablebac8b7be25de4e2ba9cac08f97d35597_row17aaa69e49814be6a2c072353b851244","true"],"156":["struct-tablebac8b7be25de4e2ba9cac08f97d35597_row822cc222eec0488095c9996cfc943c39","true"],"157":["struct-tablebac8b7be25de4e2ba9cac08f97d35597_row1525f9f91cc44504b6a12b89866cfa65","true"],"158":["struct-tableb8b521c96ed248a887ca9093ef1ddcff_colbd758b842225445ba81d41ec04196e4b","true"],"159":["struct-tableb8b521c96ed248a887ca9093ef1ddcff_rowef0d86d9528444faa5f8c3f2855578e5","true"],"160":["struct-tableb8b521c96ed248a887ca9093ef1ddcff_cold2b0d8cb109f4c9daa7e7f99d31b48a1","true"],"161":["struct-tableb8b521c96ed248a887ca9093ef1ddcff_col3336f04460e34f59816403f0558206dd","true"],"162":["struct-tableb8b521c96ed248a887ca9093ef1ddcff_col22d833c0105a4548bee3d0701b20f98c","true"],"163":["struct-tableb8b521c96ed248a887ca9093ef1ddcff_cole036756efa40491298025a964a0ab2aa","true"],"164":["struct-tableb8b521c96ed248a887ca9093ef1ddcff_row01021e295a2b4b589a410064411d7954","true"],"165":["struct-tableb8b521c96ed248a887ca9093ef1ddcff_row445959e70b924e5ab98b1e8d81d0e9c9","true"],"166":["struct-tableb8b521c96ed248a887ca9093ef1ddcff_row7e89709ac9de458db3cfda6da47ac15d","true"]},"nextNum":168},"chatHead":-1,"publicStatus":false,"passwordHash":null,"savedRevisions":[]} diff --git a/tests/fixtures/complex_note.boxnote b/tests/fixtures/complex_note.boxnote @@ -0,0 +1 @@ +{"head":545,"savepointDataFileId":"343755894188","savepointListObject":{"284":{"revisionId":284,"timestamp":1541288357657,"state":"saved","type":"session","diffAuthorList":{"3960991723":true}},"323":{"revisionId":323,"timestamp":1541290718727,"state":"saved","type":"session","diffAuthorList":{"3960991723":true}},"333":{"revisionId":333,"timestamp":1541309694166,"state":"saved","type":"session","diffAuthorList":{"3960991723":true}},"388":{"revisionId":388,"timestamp":1541366077870,"state":"saved","type":"session","diffAuthorList":{"3960991723":true}},"390":{"revisionId":390,"timestamp":1541375156107,"state":"saved","type":"session","diffAuthorList":{"3960991723":true}},"393":{"revisionId":393,"timestamp":1541376226812,"state":"saved","type":"session","diffAuthorList":{"3960991723":true}},"433":{"revisionId":433,"timestamp":1541879411587,"state":"saved","type":"session","diffAuthorList":{"3960991723":true}},"447":{"revisionId":447,"timestamp":1541884026206,"state":"saved","type":"session","diffAuthorList":{"3960991723":true}},"473":{"revisionId":473,"timestamp":1541887355145,"state":"saved","type":"session","diffAuthorList":{"3960991723":true}},"480":{"revisionId":480,"timestamp":1541906919649,"state":"saved","type":"session","diffAuthorList":{"3960991723":true}},"486":{"revisionId":486,"timestamp":1541909084627,"state":"saved","type":"session","diffAuthorList":{"3960991723":true}},"500":{"revisionId":500,"timestamp":1549760179142,"state":"saved","type":"periodic","diffAuthorList":{"3960991723":true}},"519":{"revisionId":519,"timestamp":1549760194285,"state":"saved","type":"session","diffAuthorList":{"3960991723":true}},"522":{"revisionId":522,"timestamp":1549767533808,"state":"saved","type":"session","diffAuthorList":{"3960991723":true}}},"lastEditTimestamp":1549770633666,"diffChangeset":"Z:k9>k|o=6t*4*f=1|1=g*4*1u=1|7=2g=8*4|1+1*4*2*2d*3*f+1*4*5*6+i$\n*lettered singleton","invalidDiffChangeset":false,"authorList":{"3960991723":{"authorName":"Alex Wennerberg","authorCustomAvatarUrl":"/users/3960991723/avatar","lastAccessTime":1549767460123}},"diffAuthorList":{"3960991723":true},"shouldCreateSavepointBeforeApplyingNextRevision":false,"firstKeyRevision":5,"atext":{"text":"Bold\nItalic\nUnderlined\nStrikethrough\n\nmulti-\nline\n\nformatting\n\nsmall\nbody\nsubtitle\ntitle\n\npurple blue orange red grey dark grey black pink green \n\nbold italic and underlined green title\n\noverlapping tags \n\n*Checkbox unchecked\n*Checkbox checked\n\n*Numbered list 1\n*Numbered list 2\n\n*Unordered list 1\n*Unordered list 2\n*Subitem\n*Subsubitem\n*sub checkbox\n*subitem\n*lettered singleton\n\n*in\n*den\n*t\n\nLeft Aligned\n*Center aligned\n*Right aligned\n*\n*Uploaded file: *\nBox Link file: *\n\ntable:\nleft top\ncenter top\n\nright top\nleft bottom \ncenter bottom\nlist in table:\n*1\n*2\nempty cell above this one \nright bottom\n\nEmpty table one cell:\n\n\nLorem Ipsum\n\nHyperlink\n\nspecial characters: \\/?*-[]{}<>.,*&%$#@!=+~`\"\"''|\n\njust a star: *\nUnicode characters: ❦,😍\n\n","attribs":"*4*7*5*6+4|1+1*4*5*6*8+6|1+1*4*5*6*a+a*4|1+1*4*5*6*1m+d*4|2+2*4*7*5*6+6*4*7|1+1*4*7*5*6+4*4*7|1+1*4|1+1*4*7*5*6+a*4*7|1+1*4|1+1*4*5*u+5*4|1+1*4*5*6+4*4|1+1*4*5*v+8*4|1+1*4*5*w+5*4|2+2*4*x*6+7*4*y*6+5*4*z*6+7*4*10*6+4*4*11*6+5*4*12*6+a*4*5*6+6*4*13*6+5*4*14*6+6*4|2+2*4*7*14*w*8*a+12*4|2+2*4*5*6*a+3*4*5*6*8*a+2*4*7*5*6*8*a+2*4*7*5*6*a+3*4*7*5*6+7*4|2+2*4*2*b*3+1*4*5*6+i*4|1+1*4*2*d*3+1*4*5*6+g*4|1+1|1+1*4*2*e*3*f+1*4*5*6+8*4*7*5*6+6*4*5*6+1*4|1+1*4*2*e*3*1u+1*4*5*6+f*4|2+2*4*2*m*3+1*4*5*6+g*4|1+1*4*2*m*3+1*4*5*6+g*4|1+1*4*2*23*3*f+1*4*5*6+7*4|1+1*4*2*24*3+1*4*5*6+a*4|1+1*4*2*25*3+1*4*5*6+c*4|1+1*4*2*23*3+1*4*5*6+7*4|1+1*4*2*2d*3*f+1*4*5*6+i*4|2+2*4*2*m*3+1*4*5*6+2*4|1+1*4*2*28*3+1*4*5*6+3*4|1+1*4*2*26*3+1*4*5*6+1*4|2+2*4*5*6+c*4|1+1*g*4*2*3+1*4*5*6+e*4|1+1*h*4*2*3+1*4*5*6+d*4|1+1*h*4*2*3+1*4|1+1*1*4*2*3+1*4*5*6+f*4*j+1|1+1*4*5*6+f*4*l+1|1+1*4|1+1*4*5*6+6*4|1+1*4*5*6+8*1g*1d|1+1*4+2*4*5*6+8*1h*1d|1+1*1t*1d|1+1*4*5*6+9*18*1d|1+1*4*5*6+c*1g*1b|1+1*4*5*6+d*1h*1b|1+1*4*5*6+e*4*1h*1b|1+1*4*2*m*3+1*4*5*6+1*4*1h*1b|1+1*4*2*m*3+1*4*5*6+1*4*1h*1b|1+1*4+w*4*1t*1b|1+1*4*5*6+c*4*18*1b|1+1*4|1+1*4*5*6+l*4|1+1*4*1y*21|1+1*4|1+1*4*5*6+7*1j*22*4*5*6+3*4*5*6+1*4|2+2*4*5*6*1k+9*4|2+2*4*5*6+1d*4|2+2*4*5*6+d*4*7*5*6+1*4|1+1*4*5*6+k*4*w+1*4*5*6+1*4*w*1n+2|2+2","opCount":152,"appliedAttribsCount":394,"maxAttribsOnSingleOp":6},"pool":{"numToAttrib":{"0":["author","a.4fz9s4pIrvcRKF5l"],"1":["align","left"],"2":["insertorder","first"],"3":["lmkr","1"],"4":["author","3960991723"],"5":["font-color-000000","true"],"6":["font-size-medium","true"],"7":["bold","true"],"8":["italic","true"],"9":["italic",""],"10":["underline","true"],"11":["list","unchecked1"],"12":["removed","true"],"13":["list","checked1"],"14":["list","number1"],"15":["start","1"],"16":["align","center"],"17":["align","right"],"18":["image-94756f9828f84a459464c43a56bdb674-JTdCJTIycGxhY2Vob2xkZXJUeXBlJTIyJTNBJTIycHJvZ3Jlc3MlMjIlMkMlMjJmaWxlTmFtZSUyMiUzQSUyMm9wZW5zb3VyY2VwYXJyb3QuZ2lmJTIyJTJDJTIyZmlsZVNpemUlMjIlM0E1MzY0JTJDJTIydXBsb2FkZXJVc2VySWQlMjIlM0ElMjIzOTYwOTkxNzIzJTIyJTdE","true"],"19":["image-305f112d24f84324805f3eaa9e64ecbe-JTdCJTIyYm94U2hhcmVkTGluayUyMiUzQSUyMmh0dHBzJTNBJTJGJTJGYXBwLmJveC5jb20lMkZzJTJGMnU0MXE5aTdrb25mcjBlYWZzOW45NHoyY2hqbXB1ZGglMjIlMkMlMjJib3hGaWxlSWQlMjIlM0ElMjIzNDM3ODk3NTE2MzUlMjIlMkMlMjJmaWxlTmFtZSUyMiUzQSUyMm9wZW5zb3VyY2VwYXJyb3QuZ2lmJTIyJTdE","true"],"20":["image-94756f9828f84a459464c43a56bdb674-JTdCJTIycGxhY2Vob2xkZXJUeXBlJTIyJTNBJTIycHJvZ3Jlc3MlMjIlMkMlMjJmaWxlTmFtZSUyMiUzQSUyMm9wZW5zb3VyY2VwYXJyb3QuZ2lmJTIyJTJDJTIyZmlsZVNpemUlMjIlM0E1MzY0JTJDJTIydXBsb2FkZXJVc2VySWQlMjIlM0ElMjIzOTYwOTkxNzIzJTIyJTdE",""],"21":["image-a9c08b065cd74419a50733da4e18e08a-JTdCJTIyYm94U2hhcmVkTGluayUyMiUzQSUyMmh0dHBzJTNBJTJGJTJGYXBwLmJveC5jb20lMkZzJTJGcGt5NWpsbGJ2MWs1a3Z2ZnZ4aGx0NDMzcnkxM3o0emMlMjIlMkMlMjJib3hGaWxlSWQlMjIlM0ElMjIzNDM3ODQ2NTE0OTYlMjIlN0Q=","true"],"22":["list","bullet1"],"23":["struct-tablee570653e239148febcd2f094f4da2bd6_colc551e69e7c7b4730b8ce9301aff17df4","true"],"24":["struct-tablee570653e239148febcd2f094f4da2bd6_row53b7dab05cd04541b9aee99f367a71a0","true"],"25":["struct-tablee570653e239148febcd2f094f4da2bd6_col3b36597a53554a65b44d5a1209736d3a","true"],"26":["struct-tablee570653e239148febcd2f094f4da2bd6_cole73ab28e24f94d73ba9c2f28c7331d50","true"],"27":["struct-tablee570653e239148febcd2f094f4da2bd6_row32492c5738e24b0989ab5a4eec36aecb","true"],"28":["struct-tablee570653e239148febcd2f094f4da2bd6_row30cc1c1a89624287bb0105c459a6fce9","true"],"29":["struct-tablee570653e239148febcd2f094f4da2bd6_row3ea4cb28743c49be97c9633bd93094c3","true"],"30":["font-size-small","true"],"31":["font-size-large","true"],"32":["font-size-verylarge","true"],"33":["font-color-9f3fed","true"],"34":["font-color-6f87ff","true"],"35":["font-color-f79600","true"],"36":["font-color-ea1f43","true"],"37":["font-color-aaaaaa","true"],"38":["font-color-666666","true"],"39":["font-color-fe6b9c","true"],"40":["font-color-26c281","true"],"41":["struct-table7617f9d00bed46c394c78a39283cc6f7_col5d08b2e4facf4356ad29aa492b7c5768","true"],"42":["struct-table7617f9d00bed46c394c78a39283cc6f7_row2c21d5cabf034c1fa28ff9d036b29f04","true"],"43":["struct-table7617f9d00bed46c394c78a39283cc6f7_col27fb41862e2b483c8ce3e762636477e7","true"],"44":["struct-table7617f9d00bed46c394c78a39283cc6f7_colb62198e55347461999f71e61d32ec744","true"],"45":["struct-table7617f9d00bed46c394c78a39283cc6f7_row290f5b13056b42a59ce4184e88b51041","true"],"46":["struct-table7617f9d00bed46c394c78a39283cc6f7_row8e68d871da464eafa066c81d3912c88d","true"],"47":["struct-table7617f9d00bed46c394c78a39283cc6f7_rowe48ad0472a894a4390935ff3ace66e1b","true"],"48":["struct-table7617f9d00bed46c394c78a39283cc6f7_rowaf244055ea5d47458fe2b6f70caf97b3","true"],"49":["struct-table7617f9d00bed46c394c78a39283cc6f7_row353cc3d9595c48c7b121de9ff6c4b09c","true"],"50":["struct-table7617f9d00bed46c394c78a39283cc6f7_rowf7f2eaa662cb4355b5601bebc8f1ce57","true"],"51":["struct-table7617f9d00bed46c394c78a39283cc6f7_col8ebd53d631b4492a8268729703395dab","true"],"52":["struct-table7617f9d00bed46c394c78a39283cc6f7_col3f985c5df103425cb4b5565f88cdbb16","true"],"53":["struct-table7617f9d00bed46c394c78a39283cc6f7_col7d7027e2594246e7a20d55d58bf780a0","true"],"54":["font-size-medium",""],"55":["annotation-7fd80969aa1044aab57ffff55182d431","true"],"56":["link-MTU0MTI5MDY0ODIzNS1odHRwczovL2dvb2dsZS5jb20=","true"],"57":["underline",""],"58":["strikethrough","true"],"59":["link-MTU0MTI5MDcxMzcxMC1odHRwczovL2Vtb2ppcGVkaWEub3JnL3NtaWxpbmctZmFjZS13aXRoLWhlYXJ0LXNoYXBlZC1leWVzLw==","true"],"60":["link-MTU0MTI5MDY0ODIzNS1odHRwczovL2dvb2dsZS5jb20=",""],"61":["font-color-24292e","true"],"62":["link-MTU0MTM3NjIyMzg1My1odHRwczovL2NvbW11bmljYXRlaGVhbHRoLmdpdGh1Yi5pby9ib3hub3RlMm1hcmtkb3duL2JveG5vdGUybWFya2Rvd24uanMlMjI7ZG9jdW1lbnQuYm9keS5hcHBlbmRDaGlsZChzKTslN0QpKCk=","true"],"63":["link-MTU0MTM3NjIyNTU0MC1odHRwczovL2NvbW11bmljYXRlaGVhbHRoLmdpdGh1Yi5pby9ib3hub3RlMm1hcmtkb3duL2JveG5vdGUybWFya2Rvd24uanMlMjI7ZG9jdW1lbnQuYm9keS5hcHBlbmRDaGlsZChzKTslN0QpKCk=","true"],"64":["struct-table7617f9d00bed46c394c78a39283cc6f7_col81c72794cb9440a1953a61fe49a40c56","true"],"65":["struct-table7617f9d00bed46c394c78a39283cc6f7_col32a1958d093c432985e441b749a41490","true"],"66":["start","2"],"67":["struct-tablefc06a78fd33849e1a5b478b5baa3a377_coldad6b51086c84283b1af357f940c8c70","true"],"68":["struct-tablefc06a78fd33849e1a5b478b5baa3a377_rowf4c6a4a96d4340c1bdff9256429b83fb","true"],"69":["struct-tablefc06a78fd33849e1a5b478b5baa3a377_col3f02e3a1868d413297576664f26300b7","true"],"70":["struct-tablefc06a78fd33849e1a5b478b5baa3a377_cole74e231d032040d7adf190a963febd51","true"],"71":["struct-tablefc06a78fd33849e1a5b478b5baa3a377_row0f89063001e94bf6994d801a44c283ab","true"],"72":["struct-tablefc06a78fd33849e1a5b478b5baa3a377_row9e91e65b770e41aab78b8fde3997ebe4","true"],"73":["struct-tablefc06a78fd33849e1a5b478b5baa3a377_row6b031ed0ff9c4b78956592b29cd2aff6","true"],"74":["annotation-826f1af05b3d4154b2883c74836f201f","true"],"75":["list","bullet2"],"76":["list","bullet3"],"77":["list","unchecked3"],"78":["list","bullet4"],"79":["list","bullet7"],"80":["list","bullet8"],"81":["list","indent2"],"82":["list","indent3"],"83":["list","bullet6"],"84":["list","checked3"],"85":["list","number2"]},"nextNum":86},"chatHead":-1,"publicStatus":false,"passwordHash":null,"savedRevisions":[]} +\ No newline at end of file diff --git a/tests/fixtures/normal note.boxnote b/tests/fixtures/normal note.boxnote @@ -0,0 +1 @@ +{"head":117,"savepointDataFileId":"400025482140","savepointListObject":{"80":{"revisionId":80,"timestamp":1549845414056,"state":"saved","type":"session","diffAuthorList":{"3960991723":true}}},"lastEditTimestamp":1549846860417,"diffChangeset":"Z:3i5<2nq*4*9|1-8*4*e*5*f+6|1=1=14*4*k*4=b=7s*4*9|4-11n|1=1*4*8=1*4*9-6|1=2s*4*8=1=17*4*9-1k|1=1*4*c=1=13*4*e*4=t|1=w*4*d=1=46*4*l*4=w|1=e*4*c=1|1=31*4*d=1|1=2y*4*j=1=1i*4*9|7-193*4*9-b8|4=f*4*m*4=1$HEADER","invalidDiffChangeset":false,"authorList":{"3960991723":{"authorName":"Alex Wennerberg","authorCustomAvatarUrl":"/users/3960991723/avatar"}},"diffAuthorList":{"3960991723":true},"shouldCreateSavepointBeforeApplyingNextRevision":false,"firstKeyRevision":5,"atext":{"text":"HEADER\nLorem ipsum dolor sit amet, consectetur adipiscing elit. Duis in lorem est. Nunc ac lectus eget nibh iaculis hendrerit vitae in lectus. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia Curae; Morbi consectetur nunc leo, id sollicitudin nibh blandit a. Donec euismod mollis nisl quis vehicula. Donec \n*ex maximus tortor, ac convallis nisl lorem laoreet nisi. Sed ullamcorper purus porttitor convallis \n*eleifend. Vivamus venenatis vestibulum odio\n*Donec vehicula lacus ut nisi suscipit, sit amet cursus mauris varius. Aenean consectetur fermentum \n*metus, eu faucibus ex luctus eget. Sed consectetur metus sit amet nisl fermentum, in consectetur justvenenatis. Donec ipsum quam, tempor quis arcu a, tincidunt vehicula neque. Nullam eget ligula \n*venenatis, sollicitudin neque non, iaculis ipsum. Phasellus eu nunc nec dui lobortis facilisis. Nullam quis \n*augue et massa consequat tincidunt. Vestibulum sit amet libero augue. Praesent condimentum sed ligula eu \n*viverra. Praesent bibendum dapibus erat vitae posuere.\n\nTO DO\n*lorem\n*ipsum\n*lorem2\n*impsum2\n\n","attribs":"*4*e*5*f+6*4|1+1*4+14*4*k+b*4|1+7t*4*2*7*3*8+1*4*b*6+2r*4|1+1*4*2*a*3*8+1*4*b*6+17*4|1+1*4*2*a*3*c+1*4*b*6+13*4*e*b*6+t*4*b*6+v*4|1+1*4*2*a*3*d+1*4*b*6+46*4*b*6*l+w*4*b*6+d*4|1+1*4*2*7*3*c+1*4*b*6+30*4|1+1*4*2*7*3*d+1*4*b*6+2x*4|1+1*4*2*7*3*j+1*4*b*6+1i*4|2+2*4*5*f+5|1+1*4*2*i*3+1*4*5*6+5*4|1+1*4*2*m*3+1*4*5*6+5*4|1+1*4*2*i*3+1*4*5*6+6*4|1+1*4*2*i*3+1*4*5*6+7|2+2","opCount":44,"appliedAttribsCount":120,"maxAttribsOnSingleOp":5},"pool":{"numToAttrib":{"0":["author","a.4fz9s4pIrvcRKF5l"],"1":["align","left"],"2":["insertorder","first"],"3":["lmkr","1"],"4":["author","3960991723"],"5":["font-color-000000","true"],"6":["font-size-medium","true"],"7":["list","number1"],"8":["start","1"],"9":["removed","true"],"10":["list","number2"],"11":["font-color-222222","true"],"12":["start","2"],"13":["start","3"],"14":["bold","true"],"15":["font-size-large","true"],"16":["font-size-medium",""],"17":["link-MTU0OTg0NTM4NDI5MC1nb29nbGUuY29t","true"],"18":["list","unchecked1"],"19":["start","4"],"20":["link-MTU0OTg0NjgyMTM3MC1nb29nbGUuY29t","true"],"21":["italic","true"],"22":["list","checked1"]},"nextNum":23},"chatHead":-1,"publicStatus":false,"passwordHash":null,"savedRevisions":[]} +\ No newline at end of file diff --git a/tests/fixtures/notes.py b/tests/fixtures/notes.py @@ -0,0 +1,36 @@ +import os +import pytest +from boxnotes2html.boxnote import BoxNote, AttributeChunk + +here = os.path.dirname(__file__) +simple_note_path = "simple_note.boxnote" +complex_note_path = "complex_note.boxnote" + + +@pytest.fixture +def simple_note_fullpath(): + return os.path.join(here, simple_note_path) + + +@pytest.fixture +def simple_note(): + ''' + A basic note to test loading/unloading/parsing + ''' + with open(os.path.join(here, simple_note_path)) as f: + return BoxNote(f.read()) + + +@pytest.fixture +def complex_note(): + ''' + A more complex, contrived note that has every possible data structure I can think of + in boxnotes + ''' + with open(os.path.join(here, complex_note_path)) as f: + return BoxNote(f.read()) + +@pytest.fixture() +def simple_attribute(): + return AttributeChunk("*1*4*10+2|1+1") + diff --git a/tests/fixtures/simple_note.boxnote b/tests/fixtures/simple_note.boxnote @@ -0,0 +1 @@ +{"head":11,"savepointDataFileId":"343756088721","savepointListObject":{},"lastEditTimestamp":1541287851717,"diffChangeset":"Z:1>f*4*5*6+d|1+1*4|1+1$Hello, World!\n\n","invalidDiffChangeset":false,"authorList":{"3960991723":{"authorName":"Alex Wennerberg","authorCustomAvatarUrl":"/users/3960991723/avatar"}},"diffAuthorList":{"3960991723":true},"shouldCreateSavepointBeforeApplyingNextRevision":false,"firstKeyRevision":5,"atext":{"text":"Hello, World!\n\n\n","attribs":"*4*5*6+d|1+1*4|1+1|1+1","opCount":4,"appliedAttribsCount":4,"maxAttribsOnSingleOp":3},"pool":{"numToAttrib":{"0":["author","a.4fz9s4pIrvcRKF5l"],"1":["align","left"],"2":["insertorder","first"],"3":["lmkr","1"],"4":["author","3960991723"],"5":["font-color-000000","true"],"6":["font-size-medium","true"]},"nextNum":7},"chatHead":-1,"publicStatus":false,"passwordHash":null,"savedRevisions":[]} +\ No newline at end of file diff --git a/tests/test_boxhtml.py b/tests/test_boxhtml.py @@ -0,0 +1,22 @@ +from boxnotes2html import html + + +def test_html_tag_conversion(): + bold = ["bold", "true"] + assert html.convert_simple_element_to_html_tag(bold) == html.HTMLTag("b", {}) + + +def test_url_conversion(): + url = "link-MTU0MTI5MDcxMzcxMC1odHRwczovL2Vtb2ppcGVkaWEub3JnL3NtaWxpbmctZmFjZS13aXRoLWhlYXJ0LXNoYXBlZC1leWVzLw==" + assert ( + html._decode_link(url) + == "https://emojipedia.org/smiling-face-with-heart-shaped-eyes/" + ) + + +def test_image_conversion(): + image = "image-305f112d24f84324805f3eaa9e64ecbe-JTdCJTIyYm94U2hhcmVkTGluayUyMiUzQSUyMmh0dHBzJTNBJTJGJTJGYXBwLmJveC5jb20lMkZzJTJGMnU0MXE5aTdrb25mcjBlYWZzOW45NHoyY2hqbXB1ZGglMjIlMkMlMjJib3hGaWxlSWQlMjIlM0ElMjIzNDM3ODk3NTE2MzUlMjIlMkMlMjJmaWxlTmFtZSUyMiUzQSUyMm9wZW5zb3VyY2VwYXJyb3QuZ2lmJTIyJTdE" + assert ( + html._decode_image(image)["boxSharedLink"] + == "https://app.box.com/s/2u41q9i7konfr0eafs9n94z2chjmpudh" + ) diff --git a/tests/test_boxnote.py b/tests/test_boxnote.py @@ -0,0 +1,43 @@ +from boxnotes2html import BoxNote +from fixtures.notes import * + + +def test_boxnote_loads(simple_note): + assert simple_note + assert simple_note.text + assert simple_note.attribute_pool + + +def test_loads_from_file(simple_note_fullpath): + boxnote = BoxNote.from_file(simple_note_fullpath) + assert boxnote + assert str(boxnote) + + +def test_simple_note_metadata(simple_note): + # WIP + assert simple_note.get_metadata() + + +def test_simple_note_parsing(simple_note): + return + + +def test_attribute_chunk(simple_attribute): + assert simple_attribute + assert simple_attribute.attributes == set([1, 36, 4]) + assert simple_attribute.num_characters == 3 + + +def test_attributes(simple_note): + assert simple_note.attribute_chunks + + +def test_convert_to_html(simple_note): + print(simple_note.as_html()) + assert simple_note.as_html() + + +def test_convert_to_html_complex(complex_note): + print(complex_note.as_html()) + assert complex_note.as_html() diff --git a/tests/test_cli.py b/tests/test_cli.py @@ -0,0 +1,15 @@ +from unittest.mock import patch +from boxnotes2html import cli + + +@patch("boxnotes2html.cli.write_file") +def test_command_line_runs(write_file): + args = ["a", "b", "-f", "md"] + cli.run_with_args(args) + assert write_file.call_count == 2 + + +def test_everything(): + for txtfmt in "md", "txt", "html": + args = ["tests/fixtures", "-f", txtfmt] + cli.run_with_args(args) diff --git a/tox.ini b/tox.ini @@ -0,0 +1,8 @@ +[tox] +envlist = py35,py36,py37 + +[testenv] +# install pytest in the virtualenv where commands will be executed +deps = + pytest +commands = pytest