Source code for bookmarks

# -*- coding: utf-8 -*-
#
#       Copyright 2011 Liftoff Software Corporation
#

__doc__ = """\
bookmarks.py - A plugin for Gate One that adds fancy bookmarking capabilities.

Hooks
-----
This Python plugin file implements the following hooks::

    hooks = {
        'Web': [
            (r"/bookmarks/fetchicon", FaviconHandler),
            (r"/bookmarks/export", ExportHandler),
            (r"/bookmarks/import", ImportHandler),
        ],
        'WebSocket': {
            'bookmarks_sync': save_bookmarks,
            'bookmarks_get': get_bookmarks,
            'bookmarks_deleted': delete_bookmarks,
            'bookmarks_rename_tags': rename_tags,
        }
    }

Docstrings
----------
"""

# Meta
__version__ = '1.0'
__license__ = "GNU AGPLv3 or Proprietary (see LICENSE.txt)"
__version_info__ = (1, 0)
__author__ = 'Dan McDougall <daniel.mcdougall@liftoffsoftware.com>'

# Python stdlib
import os, sys, logging, time
from functools import partial

# Our stuff
from gateone import BaseHandler
from utils import noop, json_encode

# Tornado stuff
import tornado.web
from tornado.escape import json_decode

# 3rd party stuff
# The following two lines let us import modules in the "dependencies" dir
plugin_path = os.path.split(__file__)[0]
sys.path.append(os.path.join(plugin_path, "dependencies"))

# Globals
boolean_fix = {
    True: True,
    False: False,
    'True': True,
    'False': False,
    'true': True,
    'false': False
}

# Helper functions
[docs]def unescape(s): """ Unescape HTML code refs; c.f. http://wiki.python.org/moin/EscapingHtml """ import re from htmlentitydefs import name2codepoint # Fix the missing one: name2codepoint['#39'] = 39 return re.sub('&(%s);' % '|'.join(name2codepoint), lambda m: unichr(name2codepoint[m.group(1)]), s)
[docs]def parse_bookmarks_html(html): """ Reads the Netscape-style bookmarks.html in string, *html* and returns a list of Bookmark objects. """ # If this looks impossibly complicated it's because parsing HTML streams is # dark voodoo. I had to push my brains back behind my eyes and into my ears # a few times while writing this. import html5lib out_list = [] p = html5lib.HTMLParser(tree=html5lib.treebuilders.getTreeBuilder("dom")) dom_tree = p.parse(html) walker = html5lib.treewalkers.getTreeWalker("dom") stream = walker(dom_tree) level = 0 tags = [] h3on = False aon = False ddon = False add_date = None url = None icon = None name = "" notes = "" for token in stream: if 'name' in token: if token['name'] == 'dl': if token['type'] == 'StartTag': level += 1 elif token['type'] == 'EndTag': if tags: tags.pop() level -= 1 if token['name'] == 'dd': if token['type'] == 'StartTag': ddon = True elif token['type'] == 'EndTag': ddon = False if token['name'] == 'h3': if token['type'] == 'StartTag': h3on = True elif token['type'] == 'EndTag': h3on = False if token['name'] == 'a': if token['type'] == 'StartTag': aon = True elif token['type'] == 'EndTag': aon = False if not add_date: # JavaScript-style 13-digit epoch: add_date = int(round(time.time() * 1000)) add_date = int(add_date) if add_date > 9999999999999: # Delicious goes out to 16 add_date = int(add_date/1000) if add_date < 10000000000: # Chrome only goes to 10 digits add_date = int(add_date*1000) bm = { 'url': url, 'name': name.strip(), 'tags': [a for a in tags if a], # Remove empty tags 'notes': "", # notes 'visits': 0, # visits 'updated': add_date, # updated 'created': add_date, # created 'updateSequenceNum': 0, # updateSequenceNum 'images': {'favicon': icon} } out_list.append(bm) # Reset everything (just in case) add_date = None url = None icon = None name = "" if h3on: if token['data']: if type(token['data']) == str: tags.append(token['data']) elif type(token['data']) == unicode: tags.append(token['data']) if ddon: # Indicates that there's notes here if token['data']: if token['type'] == 'Characters': # Notes get attached to the bookmark we just created out_list[-1]['notes'] = unescape(token['data'].strip()) if aon: if token['type'] == 'StartTag': # html5lib changed from using lists to using dicts at some point # after 0.90. Hence the two conditionals below if isinstance(token['data'], list): for tup in token['data']: if tup[0] == 'add_date': add_date = tup[1] elif tup[0] == 'href': url = tup[1] elif tup[0] == 'icon': icon = tup[1] elif tup[0] == 'tags': tags = tup[1].split(',') # Delicious-style elif isinstance(token['data'], dict): for tup in token['data']: if 'add_date' in tup: add_date = token['data'][tup] elif 'href' in tup: url = token['data'][tup] elif 'icon' in tup: icon = token['data'][tup] elif 'tags' in tup: tags = token['data'][tup].split(',') # Delicious elif token['type'] == 'Characters': name += unescape(token['data']) return out_list
[docs]def get_json_tags(json_dict, url): """ Recursively looks inside *json_dict* trying to find tags associated with the given *url*. Returns the tags found as a list. """ tags = [] # This function has been brought to you by your favorite stock symbol if json_dict.has_key('root') and json_dict.has_key('children'): for item in json_dict['children']: if item['title'] == 'Tags': for child in item['children']: if child['type'] == 'text/x-moz-place-container': for subchild in child['children']: if subchild['type'] == 'text/x-moz-place': if subchild['uri'] == url: tags.append(child['title']) # "Ahhhhhhh" # "hhhhhhhh" # "hhhhhhh" # "hhhhhh" # "hhhhh" # "!!!" # <splat> return tags
[docs]def get_ns_json_bookmarks(json_dict, bookmarks): """ Given a *json_dict*, updates *urls_list* with each URL as it is found within. .. note:: Only works with Netscape-style bookmarks.json files. """ children = [] if json_dict.has_key('children'): for child in json_dict['children']: if child['type'] == 'text/x-moz-place': if not bookmarks[0].has_key(child['uri']): # Browser won't let you load file: URIs from HTTP pages if child['uri'][0:6] not in ['place:', 'file:/']: # Note the use of json_dict as bookmarks[1] here: tags = get_json_tags(bookmarks[1], child['uri']) if not tags: tags = ['Untagged'] if child.has_key("annos"): notes = child["annos"] else: notes = "" if child['lastModified'] > 9999999999999: # Chop off the microseconds to make it 13 digits child['lastModified'] = int(child['lastModified']/1000) elif child['lastModified'] < 10000000000: child['lastModified'] = int(child['lastModified']*1000) if child['dateAdded'] > 9999999999999: # Delicious # Chop off the microseconds to make it 13 digits child['dateAdded'] = int(child['dateAdded']/1000) elif child['dateAdded'] < 10000000000: # Chrome child['dateAdded'] = int(child['dateAdded']*1000) bm = { 'url': child['uri'], 'name': child['title'].strip(), 'tags': tags, 'notes': notes, 'visits': 0, # visits 'updated': child['lastModified'], # updated 'created': child['dateAdded'], # created 'updateSequenceNum': 0, # updateSequenceNum 'images': {} # No icons in JSON :( } bookmarks[0].update({child['uri']: bm}) elif child['type'] == 'text/x-moz-place-container': get_ns_json_bookmarks(child, bookmarks)
[docs]def parse_bookmarks_json(json_str): """ Given *json_str*, returns a list of bookmark objects representing the data contained therein. """ # TODO: Get this recognizing and parsing our own JSON format. json_obj = json.loads(json_str) out_list = [] bookmarks = [{}, json_obj] # Inside a list for persistence get_ns_json_bookmarks(json_obj, bookmarks) # Updates urls in-place for url, bm in bookmarks[0].items(): out_list.append(bm) return out_list # Data Structures
[docs]class BookmarksDB(object): """ Used to read and write bookmarks to a file on disk. Can also synchronize a given list of bookmarks with what's on disk. Uses a given bookmark's ``updateSequenceNum`` to track what wins the "who is newer?" comparison. """ def __init__(self, user_dir, user): """ Sets up our bookmarks database object and reads everything in. """ self.bookmarks = [] # For temp storage of all bookmarks self.user_dir = user_dir self.user = user users_dir = os.path.join(user_dir, user) # "User's dir" self.bookmarks_path = os.path.join(users_dir, "bookmarks.json") # Read existing bookmarks into self.bookmarks self.open_bookmarks()
[docs] def open_bookmarks(self): """ Opens the bookmarks stored in self.user_dir. If not present, an empty file will be created. """ if not os.path.exists(self.bookmarks_path): with open(self.bookmarks_path, 'w') as f: f.write('[]') # That's an empty JSON list return # Default of empty list will do with open(self.bookmarks_path) as f: self.bookmarks = json_decode(f.read())
[docs] def save_bookmarks(self): """ Saves self.bookmarks to self.bookmarks_path as a JSON-encoded list. """ with open(self.bookmarks_path, 'w') as f: f.write(json_encode(self.bookmarks))
[docs] def sync_bookmarks(self, bookmarks): """ Given *bookmarks*, synchronize with self.bookmarks doing conflict resolution and whatnot. """ highest_USN = self.get_highest_USN() changed = False # For if there's changes that need to be written updated_bookmarks = [] # For bookmarks that are newer on the server for bm in bookmarks: if bm['url'] == "web+deleted:bookmarks/": # Remove the existing deleted entry if it exists for j, deleted_bm in enumerate(bm['notes']): if deleted_bm['url'] == bookmark['url']: # Remove the deleted bookmark entry bm['notes'].pop(j) found_existing = False for i, db_bookmark in enumerate(self.bookmarks): if bm['url'] == db_bookmark['url']: # Bookmark already exists, check which is newer found_existing = True if bm['updateSequenceNum'] > db_bookmark['updateSequenceNum']: # The given bookmark is newer than what's in the DB self.bookmarks[i] = bm # Replace it highest_USN += 1 # Increment the USN self.bookmarks[i]['updateSequenceNum'] = highest_USN changed = True elif bm['updateSequenceNum'] < db_bookmark['updateSequenceNum']: # DB has a newer bookmark. Add it to the list to send # to the client. updated_bookmarks.append(db_bookmark) # Otherwise the USNs are equal and there's nothing to do if not found_existing: # This is a new bookmark. Add it highest_USN += 1 # Increment the USN bm['updateSequenceNum'] = highest_USN self.bookmarks.append(bm) changed = True # So it will be saved if changed: # Write the changes to disk self.save_bookmarks() # Let the client know what's newer on the server return updated_bookmarks
[docs] def delete_bookmark(self, bookmark): """Deletes the given *bookmark*.""" highest_USN = self.get_highest_USN() for i, db_bookmark in enumerate(self.bookmarks): if bookmark['url'] == db_bookmark['url']: # Remove it deleted = self.bookmarks.pop(i) # Add it to the list of deleted bookmarks special_deleted_bm = None for bm in self.bookmarks: if bm['url'] == "web+deleted:bookmarks/": special_deleted_bm = bm # The deleted bookmarks 'bookmark' is just a list of URLs that # have been deleted along with the time it happened. This lets # us keep multiple browsers in sync with what's been deleted # so we don't inadvertently end up re-adding bookmarks that were # deleted by another client. if not special_deleted_bm: # Make our first entry special_deleted_bm = { 'url': "web+deleted:bookmarks/", 'name': "Deleted Bookmarks", 'tags': [], 'notes': [bookmark], 'visits': highest_USN + 1, 'updated': int(round(time.time() * 1000)), 'created': int(round(time.time() * 1000)), 'updateSequenceNum': 0, 'images': {} } self.bookmarks.append(special_deleted_bm) else: # Check for pre-existing updated = False for j, deleted_bm in enumerate(special_deleted_bm['notes']): if deleted_bm['url'] == bookmark['url']: # Update it in place special_deleted_bm['notes'][j] = bookmark updated = True if not updated: special_deleted_bm['notes'].append(bookmark) highest_USN += 1 special_deleted_bm['updateSequenceNum'] = highest_USN break # Save the change to disk self.save_bookmarks()
[docs] def get_bookmarks(self, updateSequenceNum=0): """ Returns a list of bookmarks newer than *updateSequenceNum*. If *updateSequenceNum* is 0 or undefined, all bookmarks will be returned. """ out_bookmarks = [] for bm in self.bookmarks: if bm['updateSequenceNum'] > updateSequenceNum: out_bookmarks.append(bm) return out_bookmarks
[docs] def get_highest_USN(self): """Returns the highest updateSequenceNum in self.bookmarks""" highest_USN = 0 for bm in self.bookmarks: if bm['updateSequenceNum'] > highest_USN: highest_USN = bm['updateSequenceNum'] return highest_USN
[docs] def rename_tag(self, old_tag, new_tag): """ Goes through all bookmarks and renames all tags named *old_tag* to be *new_tag*. """ highest_USN = self.get_highest_USN() for bm in self.bookmarks: if old_tag in bm['tags']: highest_USN += 1 i = bm['tags'].index(old_tag) bm['tags'][i] = new_tag # Made a change so we need to increment the USN to ensure sync bm['updateSequenceNum'] = highest_USN bm['updated'] = int(round(time.time() * 1000)) # Save the change to disk self.save_bookmarks() # Handlers
[docs]class FaviconHandler(BaseHandler): """ Retrives the biggest favicon-like icon at the given URL. It will try to fetch apple-touch-icons (which can be nice and big) before it falls back to grabbing the favicon. .. note:: Works with GET and POST requests but POST is preferred since it keeps the URL from winding up in the server logs. """ # Valid favicon mime types favicon_mimetypes = [ 'image/vnd.microsoft.icon', 'image/x-icon', 'image/png', 'image/svg+xml', 'image/gif', 'image/jpeg' ] @tornado.web.asynchronous def get(self): self.process() @tornado.web.asynchronous def post(self): self.process() def process(self): url = self.get_argument("url") http = tornado.httpclient.AsyncHTTPClient() callback = partial(self.on_response, url) http.fetch(url, callback, connect_timeout=5.0, request_timeout=5.0)
[docs] def get_favicon_url(self, html): """ Parses *html* looking for a favicon URL. Returns a tuple of:: (<url>, <mimetime>) If no favicon can be found, returns:: (None, None) """ import html5lib p = html5lib.HTMLParser( tree=html5lib.treebuilders.getTreeBuilder("dom")) dom_tree = p.parse(html) walker = html5lib.treewalkers.getTreeWalker("dom") stream = walker(dom_tree) fetch_url = None mimetype = None icon = False found_token = None for token in stream: if 'name' in token: if token['name'] == 'link': for attr in token['data']: if attr[0] == 'rel': if 'shortcut icon' in attr[1].lower(): found_token = token icon = True elif attr[0] == 'href': fetch_url = attr[1] elif attr[0] == 'type': mimetype = attr[1] if fetch_url and icon: if not mimetype: mimetype = "image/x-icon" if mimetype in self.favicon_mimetypes: return (fetch_url, mimetype) return (None, None)
def on_response(self, url, response): try: from urlparse import urlparse except ImportError: # Python 3.X from urllib import parse as urlparse if response.error: self.write('Unable to fetch icon.') self.finish() return fetch_url = None try: content = response.body.decode('utf-8') except UnicodeDecodeError: content = response.body parsed_url = urlparse(url) (fetch_url, mimetype) = self.get_favicon_url(content) if fetch_url: if not fetch_url.startswith('http'): fetch_url = '%s://%s%s' % ( parsed_url.scheme, parsed_url.netloc, fetch_url) if not mimetype: mimetype = "image/x-icon" # Default if not fetch_url: fetch_url = '%s://%s/favicon.ico' % ( parsed_url.scheme, parsed_url.netloc) if fetch_url.startswith('http://') or fetch_url.startswith('https://'): noop() else: raise tornado.web.HTTPError(404) http = tornado.httpclient.AsyncHTTPClient() callback = partial(self.icon_fetch, url, mimetype) try: http.fetch( fetch_url, callback, connect_timeout=5.0, request_timeout=5.0 ) except gaierror: # No address associated with hostname self.write('Unable to fetch icon.') self.finish() return
[docs] def icon_multifetch(self, urls, response): """ Fetches the icon at the given URLs, stopping when it finds the biggest. If an icon is not found, calls itself again with the next icon URL. If the icon is found, writes it to the client and finishes the request. """ if response.error: if urls: url = urls.pop() http = tornado.httpclient.AsyncHTTPClient() callback = partial(self.icon_multifetch, urls) try: http.fetch(url, callback) except gaierror: raise tornado.web.HTTPError(404) else: raise tornado.web.HTTPError(404) else: if 'Content-Type' in response.headers: mimetype = response.headers['Content-Type'] self.set_header("Content-Type", mimetype) else: mimetype = "image/vnd.microsoft.icon" self.set_header("Content-Type", mimetype) data_uri = "data:%s;base64,%s" % ( mimetype, response.body.encode('base64').replace('\n', '') ) self.write(data_uri) self.finish()
[docs] def icon_fetch(self, url, mimetype, response): """Returns the fetched icon to the client.""" if response.error: self.write('Unable to fetch icon.') self.finish() return data_uri = "data:%s;base64,%s" % ( mimetype, response.body.encode('base64').replace('\n', '') ) self.set_header("Content-Type", mimetype) self.write(data_uri) self.finish()
[docs]class ImportHandler(tornado.web.RequestHandler): """ Takes a bookmarks.html in a POST and returns a list of bookmarks in JSON format """ @tornado.web.asynchronous def post(self): html = self.request.body if html.startswith('{'): # This is a JSON file bookmarks = parse_bookmarks_json(html) else: bookmarks = parse_bookmarks_html(html) self.write(tornado.escape.json_encode(bookmarks)) self.finish() # NOTE: The client will take care of storing these at the next sync
[docs]class ExportHandler(tornado.web.RequestHandler): """ Takes a JSON-encoded list of bookmarks and returns a Netscape-style HTML file. """ @tornado.web.asynchronous def post(self): bookmarks = self.get_argument("bookmarks") bookmarks = tornado.escape.json_decode(bookmarks) self.set_header("Content-Type", "text/html") self.set_header( "Content-Disposition", 'attachment; filename="bookmarks.html"') templates_path = os.path.join(plugin_path, "templates") bookmarks_html = os.path.join(templates_path, "bookmarks.html") self.render(bookmarks_html, bookmarks=bookmarks) # WebSocket commands (not the same as handlers)
[docs]def save_bookmarks(bookmarks, tws): """ Handles saving *bookmarks* for clients. """ out_dict = { 'updates': [], 'count': 0, 'errors': [] } try: user = tws.get_current_user()['upn'] bookmarks_db = BookmarksDB(tws.settings['user_dir'], user) updates = bookmarks_db.sync_bookmarks(bookmarks) out_dict.update({ 'updates': updates, 'count': len(bookmarks), }) out_dict['updateSequenceNum'] = bookmarks_db.get_highest_USN() except Exception as e: import traceback logging.error("Got exception synchronizing bookmarks: %s" % e) traceback.print_exc(file=sys.stdout) out_dict['errors'].append(str(e)) if out_dict['errors']: out_dict['result'] = "Upload completed but errors were encountered." else: out_dict['result'] = "Upload successful" message = {'bookmarks_save_result': out_dict} tws.write_message(json_encode(message))
[docs]def get_bookmarks(updateSequenceNum, tws): """ Returns a JSON-encoded list of bookmarks updated since the last *updateSequenceNum*. If *updateSequenceNum* resolves to False, all bookmarks will be sent to the client. """ user = tws.get_current_user()['upn'] bookmarks_db = BookmarksDB(tws.settings['user_dir'], user) if updateSequenceNum: updateSequenceNum = int(updateSequenceNum) else: # This will force a full download updateSequenceNum = 0 updated_bookmarks = bookmarks_db.get_bookmarks(updateSequenceNum) message = {'bookmarks_updated': updated_bookmarks} tws.write_message(json_encode(message))
[docs]def delete_bookmarks(deleted_bookmarks, tws): """ Handles deleting bookmars given a *deleted_bookmarks* list. """ user = tws.get_current_user()['upn'] bookmarks_db = BookmarksDB(tws.settings['user_dir'], user) out_dict = { 'result': "", 'count': 0, 'errors': [], } try: for bookmark in deleted_bookmarks: out_dict['count'] += 1 bookmarks_db.delete_bookmark(bookmark) out_dict['result'] = "Success" except Exception as e: # TODO: Make this more specific logging.error("delete_bookmarks error: %s" % e) import traceback traceback.print_exc(file=sys.stdout) out_dict['result'] = "Errors" out_dict['errors'].append(str(e)) message = {'bookmarks_delete_result': out_dict} tws.write_message(json_encode(message))
[docs]def rename_tags(renamed_tags, tws): """ Handles renaming tags. """ user = tws.get_current_user()['upn'] bookmarks_db = BookmarksDB(tws.settings['user_dir'], user) out_dict = { 'result': "", 'count': 0, 'errors': [], 'updates': [] } for pair in renamed_tags: old_name, new_name = pair.split(',') bookmarks_db.rename_tag(old_name, new_name) out_dict['count'] += 1 message = {'bookmarks_renamed_tags': out_dict} tws.write_message(json_encode(message))
hooks = { 'Web': [ (r"/bookmarks/fetchicon", FaviconHandler), (r"/bookmarks/export", ExportHandler), (r"/bookmarks/import", ImportHandler), ], 'WebSocket': { 'bookmarks_sync': save_bookmarks, 'bookmarks_get': get_bookmarks, 'bookmarks_deleted': delete_bookmarks, 'bookmarks_rename_tags': rename_tags, } }