# -*- coding: utf-8 -*-
import string
import time
from datetime import datetime
from lxml import etree
from lxml.html import document_fromstring
from django.template import RequestContext
from django.template.loader import render_to_string
from html5_appcache.settings import DJANGOCMS_2_3, get_setting, DJANGO_1_4
from html5_appcache.cache import (get_cached_value, set_cached_value,
is_manifest_clean, reset_cache_manifest,
set_cached_manifest, get_cached_manifest)
from html5_appcache.utils import is_external_url
[docs]class BaseAppCache(object):
"""
Base class for Appcache classes
"""
manager = None
def __init__(self):
pass
[docs] def _add_language(self, request, urls):
""" For django CMS 2.3 we need to manually add language code to the
urls returned by the appcache classes
:return: list of urls
"""
if DJANGOCMS_2_3:
new_urls = []
for url in urls:
if not is_external_url(url, request):
new_urls.append("/%s%s" % (request.LANGUAGE_CODE, url))
else:
new_urls.append(url)
return new_urls
else:
return urls
[docs] def _get_assets(self, request):
"""
override this method to customize asset (images, files, javascripts,
stylesheets) urls.
:return: list of urls
"""
return []
[docs] def _get_urls(self, request):
"""
override this method to define cached urls.
If you use a sitemap-enabled application, it's not normally necessary.
:return: list of urls
"""
return []
[docs] def _get_network(self, request):
"""
override this method to define network (non-cached) urls.
:return: list of urls
"""
return []
[docs] def _get_fallback(self, request):
"""
override this method to define fallback urls.
:return: dictionary mapping original urls to fallback
"""
return {}
[docs] def get_assets(self, request):
"""
Public method that return assets urls. Do not override, use :py:meth:`_get_assets`
:return: list of urls
"""
return self._add_language(request, self._get_assets(request))
[docs] def get_urls(self, request):
"""
Public method that return cached urls. Do not override, use :py:meth:`_get_urls`
:return: list of urls
"""
return self._add_language(request, self._get_urls(request))
[docs] def get_network(self, request):
"""
Public method that return network (non-cached) urls. Do not override, use :py:meth:`_get_network`
:return: list of urls
"""
return self._add_language(request, self._get_network(request))
[docs] def get_fallback(self, request):
"""
Public method that return fallback urls. Do not override, use :py:meth:`_get_fallback`
:return: dictionary mapping original urls to fallback
"""
return self._get_fallback(request)
[docs] def signal_connector(self, instance, **kwargs):
"""
You **must** override this method in you ``AppCache`` class.
"""
return NotImplementedError("signal_connector must be implemented for appcache to work")
[docs]class AppCacheManager(object):
"""
Main class.
"""
_cached = set()
_network = set()
_fallback = {}
_external_appcaches = []
request = None
_template = None
context = {}
def __init__(self):
self.registry = []
def register(self, instance):
self.registry.append(instance)
def clear(self):
self.registry = []
[docs] def setup_registry(self):
"""
Setup the manager bootstrapping the appcache instances
"""
self._setup_signals()
[docs] def _setup_signals(self):
"""
Loads the signals for all the models declared in the appcache instances
"""
from django.db.models.signals import post_save, post_delete
for appcache in self.registry:
appcache.manager = self
for model in appcache.models:
post_save.connect(appcache.signal_connector, sender=model)
post_delete.connect(appcache.signal_connector, sender=model)
[docs] def setup(self, request, template):
"""
Setup is required wen updating the manifest file
"""
self.request = request
self.language = getattr(self.request, 'LANGUAGE_CODE', "").split("-")[0]
self._network = set()
self._cached = set()
self._fallback = {}
self._template = template
self._external_appcaches = {'cached': [], 'network': [], 'fallback': {}}
self.context = {}
[docs] def get_version_timestamp(self):
"""
Create the timestamp according to the current time.
It tries to make it unique even for very short timeframes
"""
timestamp = get_cached_value("timestamp")
if not timestamp:
timestamp = int(time.time()*100)*10000 + datetime.utcnow().microsecond
set_cached_value("timestamp", timestamp)
return timestamp
[docs] def reset_manifest(self):
"""
Clear the cache (if clean)
"""
if is_manifest_clean():
reset_cache_manifest()
[docs] def get_urls(self):
"""
Retrieves the urls from the sitemap and :meth:`BaseAppCache.get_urls` of
the appcache instances
"""
urls = set()
if get_setting('USE_SITEMAP'):
urls.update(self._get_sitemap())
for appcache in self.registry:
urls.update(appcache.get_urls(self.request))
return urls
[docs] def get_cached_urls(self):
"""
Create the cached urls set.
Merges the assets, the urls, removes the network urls and the external urls
See :meth:`BaseAppCache.get_urls`, :meth:`get_network_urls`
"""
if not self._cached:
self._cached = self.get_urls()
for appcache in self.registry:
self._cached.update(appcache.get_assets(self.request))
self._cached.update(self._external_appcaches['cached'])
self._cached.difference_update(self.get_network_urls())
if get_setting('DISCARD_EXTERNAL'):
self._cached = filter(lambda url: not is_external_url(
url, self.request), self._cached)
if get_setting('EXCLUDE_URL'):
new = set()
for url in self._cached:
if not any(url.startswith(excluded) for excluded in get_setting('EXCLUDE_URL')):
new.add(url)
self._cached = new
return self._cached
[docs] def get_network_urls(self):
"""
Create the network urls set.
``*`` (wildcard entry) is added when :setting:`ADD_WILDCARD` is True (default)
"""
if not self._network:
for appcache in self.registry:
self._network.update(appcache.get_network(self.request))
self._network.update(self._external_appcaches['network'])
self._network.update(get_setting('NETWORK_URL'))
if get_setting("ADD_WILDCARD"):
self._network.update(("*",))
return self._network
[docs] def get_fallback_urls(self):
"""
Creates the fallback urls set.
"""
if not self._fallback:
for appcache in self.registry:
self._fallback.update(appcache.get_fallback(self.request))
self._fallback.update(self._external_appcaches['fallback'])
self._fallback.update(get_setting('FALLBACK_URL'))
return self._fallback
[docs] def add_appcache(self, appcache):
"""
Adds the externally retrieved urls to the internal set.
`appcache` is a dictionary with `cached`, `network`, `fallback` keys
"""
self._external_appcaches.update(appcache)
[docs] def _get_sitemap(self):
"""
Pretty ugly method that fetches the current sitemap and parses it to
retrieve the list of available urls
"""
from django.contrib.sites.models import get_current_site
from django.test import Client
from django.contrib.sitemaps.views import sitemap
def walk_sitemap(urlset, doc):
"""
Nested function for convenience. Recursively scans the sitemap to
retrieve the urls
"""
if isinstance(doc.tag, basestring):
tag = etree.QName(doc.tag)
if tag.localname == "loc":
urlset.append(doc.text)
for node in doc:
urlset = walk_sitemap(urlset, node)
return urlset
req_protocol = 'https' if self.request.is_secure() else 'http'
req_site = get_current_site(self.request)
client = Client()
path = get_setting('SITEMAP_URL')
if DJANGO_1_4 and not DJANGOCMS_2_3:
path = "/%s%s" % (self.language, get_setting('SITEMAP_URL'))
sitemap = client.get(path, follow=True, LANGUAGE_CODE=self.language)
local_urls = []
lxdoc = document_fromstring(sitemap.content)
walk_sitemap(local_urls, lxdoc)
if get_setting('DJANGOCMS_2_3'):
lang_fix = "/" + self.language
else:
lang_fix = ""
return map(lambda x: string.replace(
x,"%s://%s" % (req_protocol, req_site), lang_fix),
local_urls)
[docs] def _fetch_url(self, client, url):
"""
Scrape a single URL and fetches assets
"""
if not is_external_url(url):
response = client.get(url, data={"appcache_analyze":1}, LANGUAGE_CODE=self.language)
if response.status_code == 200:
self.add_appcache(response.appcache)
elif response.status_code == 302:
self._fetch_url(client, response['Location'])
else:
print("Unrecognized code %s for %s\n" % (
response.status_code, url))
[docs] def get_manifest(self, update=False):
"""
Either get the manifest file out of the cache or render it and save in
the cache.
"""
if not is_manifest_clean() and update:
self.context = {
'version': self.get_version_timestamp(),
'date': datetime.fromtimestamp(self.get_version_timestamp()/1000000).isoformat(),
'cached_urls': sorted(self.get_cached_urls()),
'network_urls': sorted(self.get_network_urls()),
'fallback_urls': self.get_fallback_urls(),
}
context = RequestContext(self.request, self.context)
manifest = render_to_string(self._template, context_instance=context)
set_cached_manifest(manifest)
return manifest
else:
return get_cached_manifest()