Source code for html5_appcache.middleware.appcache_middleware

# -*- coding: utf-8 -*-
from lxml import etree
from lxml.html.html5parser import document_fromstring


[docs]class AppCacheAssetsFromResponse(object):
    """
    Extracts appcache assets from the rendered template.

    Currently supports the following tags:
     * img: extracts the data in the ``src`` attribute
     * script: extracts the data in the ``src`` attribute
     * link: extracts the data in the ``href`` attribute if ``rel==stylesheet``

    It supports custom data-attribute to exclude assets from caching:
     * `data-appcache='noappcache'`: the referenced url is added to the NETWORK
       section
     * `data-appcache='appcache'`: the referenced url is added to the CACHE
       section
     * `data-appcache-fallback=URL`: the referenced url is added in the
       FALLBACK section, with *URL* as a target

    """
    _cached = set()
    _network = set()
    _fallback = {}

[docs]    def handle_img(self, tag, attrib):
        """
        Extract assets from the img tag
        """
        if 'src' in attrib:
            if 'data-appcache' in attrib and attrib['data-appcache'] == 'noappcache':
                self._network.add(attrib['src'])
            elif 'data-appcache-fallback' in attrib:
                self._fallback[attrib['src']] = attrib['data-appcache-fallback']
            else:
                self._cached.add(attrib['src'])

[docs]    def handle_script(self, tag, attrib):
        """
        Extract assets from the script tag
        """
        if 'src' in attrib:
            if 'data-appcache' in attrib and attrib['data-appcache'] == 'noappcache':
                self._network.add(attrib['src'])
            elif 'data-appcache-fallback' in attrib:
                self._fallback[attrib['src']] = attrib['data-appcache-fallback']
            else:
                self._cached.add(attrib['src'])

[docs]    def handle_link(self, tag, attrib):
        """
        Extract assets from the link tag (only for stylesheets)
        """
        if 'href' in attrib and 'rel' in attrib and attrib['rel'] == 'stylesheet':
            if 'data-appcache' in attrib and attrib['data-appcache'] == 'noappcache':
                self._network.add(attrib['href'])
            elif 'data-appcache-fallback' in attrib:
                self._fallback[attrib['href']] = attrib['data-appcache-fallback']
            else:
                self._cached.add(attrib['href'])

[docs]    def handle_a(self, tag, attrib):
        """
        Extract assets from the a tag (only for opt-in link)
        """
        if ('href' in attrib and 'data-appcache' in attrib and
                attrib['data-appcache'] == 'appcache'):
            self._cached.add(attrib['href'])

[docs]    def walk_tree(self, tree):
        """
        Walk the DOM tree
        """
        if isinstance(tree.tag, basestring):
            tag = etree.QName(tree.tag)
            if tag.localname == "img":
                self.handle_img(tag.localname, tree.attrib)
            if tag.localname == "script":
                self.handle_script(tag.localname, tree.attrib)
            if tag.localname == "link":
                self.handle_link(tag.localname, tree.attrib)
            if tag.localname == "a":
                self.handle_a(tag.localname, tree.attrib)
            for node in tree:
                self.walk_tree(node)

[docs]    def process_response(self, request, response):
        """
        This method is called only if ``appcache_analyze`` parameter is attached
        to the querystring, to avoid overhead during normal navigation
        """
        if (response['Content-Type'].find("text/html") > -1 and
                request.GET.get("appcache_analyze", False)):
            lxdoc = document_fromstring(response.content)
            self.walk_tree(lxdoc)
            response.appcache = {'cached': self._cached,
                                 'fallback': self._fallback,
                                 'network': self._network}
        return response