Source code for sphinx_gallery.docs_resolv

# -*- coding: utf-8 -*-
# Author: Óscar Nájera
# License: 3-clause BSD
"""
Link resolver objects
=====================
"""

import codecs
import gzip
from io import BytesIO
import os
import pickle
import posixpath
import re
import shelve
import sys
import urllib.request as urllib_request
import urllib.parse as urllib_parse
from urllib.error import HTTPError, URLError

from sphinx.search import js_index

from . import sphinx_compatibility


logger = sphinx_compatibility.getLogger('sphinx-gallery')


def _get_data(url):
    """Get data over http(s) or from a local file."""
    if urllib_parse.urlparse(url).scheme in ('http', 'https'):
        resp = urllib_request.urlopen(url)
        encoding = resp.headers.get('content-encoding', 'plain')
        data = resp.read()
        if encoding == 'gzip':
            data = gzip.GzipFile(fileobj=BytesIO(data)).read()
        elif encoding != 'plain':
            raise RuntimeError('unknown encoding %r' % (encoding,))
        data = data.decode('utf-8')
    else:
        with codecs.open(url, mode='r', encoding='utf-8') as fid:
            data = fid.read()

    return data


[docs]def get_data(url, gallery_dir): """Persistent dictionary usage to retrieve the search indexes""" # shelve keys need to be str in python 2 if sys.version_info[0] == 2 and isinstance(url, str): url = url.encode('utf-8') cached_file = os.path.join(gallery_dir, 'searchindex') search_index = shelve.open(cached_file) if url in search_index: data = search_index[url] else: data = _get_data(url) search_index[url] = data search_index.close() return data
[docs]def parse_sphinx_docopts(index): """ Parse the Sphinx index for documentation options. Parameters ---------- index : str The Sphinx index page Returns ------- docopts : dict The documentation options from the page. """ pos = index.find('var DOCUMENTATION_OPTIONS') if pos < 0: raise ValueError('Documentation options could not be found in index.') pos = index.find('{', pos) if pos < 0: raise ValueError('Documentation options could not be found in index.') endpos = index.find('};', pos) if endpos < 0: raise ValueError('Documentation options could not be found in index.') block = index[pos + 1:endpos].strip() docopts = {} for line in block.splitlines(): key, value = line.split(':', 1) key = key.strip().strip('"') value = value.strip() if value[-1] == ',': value = value[:-1].rstrip() if value[0] in '"\'': value = value[1:-1] elif value == 'false': value = False elif value == 'true': value = True else: try: value = int(value) except ValueError: # In Sphinx 1.7.5, URL_ROOT is a JavaScript fragment. # Ignoring this entry since URL_ROOT is not used # elsewhere. # https://github.com/sphinx-gallery/sphinx-gallery/issues/382 continue docopts[key] = value return docopts
[docs]class SphinxDocLinkResolver(object): """ Resolve documentation links using searchindex.js generated by Sphinx Parameters ---------- doc_url : str The base URL of the project website. relative : bool Return relative links (only useful for links to documentation of this package). """ def __init__(self, doc_url, gallery_dir, relative=False): self.doc_url = doc_url self.gallery_dir = gallery_dir self.relative = relative self._link_cache = {} if doc_url.startswith(('http://', 'https://')): if relative: raise ValueError('Relative links are only supported for local ' 'URLs (doc_url cannot be absolute)') index_url = doc_url + '/' searchindex_url = doc_url + '/searchindex.js' docopts_url = doc_url + '_static/documentation_options.js' else: index_url = os.path.join(doc_url, 'index.html') searchindex_url = os.path.join(doc_url, 'searchindex.js') docopts_url = os.path.join( doc_url, '_static', 'documentation_options.js') # detect if we are using relative links on a Windows system if (os.name.lower() == 'nt' and not doc_url.startswith(('http://', 'https://'))): if not relative: raise ValueError('You have to use relative=True for the local' ' package on a Windows system.') self._is_windows = True else: self._is_windows = False # Download and find documentation options. As of Sphinx 1.7, these # options are now kept in a standalone file called # 'documentation_options.js'. Since SphinxDocLinkResolver can be called # not only for the documentation which is being built but also ones # that are being referenced, we need to try and get the index page # first and if that doesn't work, check for the # documentation_options.js file. index = get_data(index_url, gallery_dir) if 'var DOCUMENTATION_OPTIONS' in index: self._docopts = parse_sphinx_docopts(index) else: docopts = get_data(docopts_url, gallery_dir) self._docopts = parse_sphinx_docopts(docopts) # download and initialize the search index sindex = get_data(searchindex_url, gallery_dir) self._searchindex = js_index.loads(sindex) def _get_link(self, cobj): """Get a valid link, False if not found.""" fullname = cobj['module_short'] + '.' + cobj['name'] try: value = self._searchindex['objects'][cobj['module_short']] match = value[cobj['name']] except KeyError: link = False else: fname_idx = match[0] objname_idx = str(match[1]) anchor = match[3] fname = self._searchindex['filenames'][fname_idx] # In 1.5+ Sphinx seems to have changed from .rst.html to only # .html extension in converted files. Find this from the options. ext = self._docopts.get('FILE_SUFFIX', '.rst.html') fname = os.path.splitext(fname)[0] + ext if self._is_windows: fname = fname.replace('/', '\\') link = os.path.join(self.doc_url, fname) else: link = posixpath.join(self.doc_url, fname) if anchor == '': anchor = fullname elif anchor == '-': anchor = (self._searchindex['objnames'][objname_idx][1] + '-' + fullname) link = link + '#' + anchor return link
[docs] def resolve(self, cobj, this_url): """Resolve the link to the documentation, returns None if not found Parameters ---------- cobj : dict Dict with information about the "code object" for which we are resolving a link. cobj['name'] : function or class name (str) cobj['module_short'] : shortened module name (str) cobj['module'] : module name (str) this_url: str URL of the current page. Needed to construct relative URLs (only used if relative=True in constructor). Returns ------- link : str or None The link (URL) to the documentation. """ full_name = cobj['module_short'] + '.' + cobj['name'] link = self._link_cache.get(full_name, None) if link is None: # we don't have it cached link = self._get_link(cobj) # cache it for the future self._link_cache[full_name] = link if link is False or link is None: # failed to resolve return None if self.relative: link = os.path.relpath(link, start=this_url) if self._is_windows: # replace '\' with '/' so it on the web link = link.replace('\\', '/') # for some reason, the relative link goes one directory too high up link = link[3:] return link
def _handle_http_url_error(e, msg='fetching'): extra = e.code if isinstance(e, HTTPError) else e.reason logger.warning('The following HTTP Error has occurred %s %s: %s (%s)', msg, e.url, extra, e.msg) def _embed_code_links(app, gallery_conf, gallery_dir): # Add resolvers for the packages for which we want to show links doc_resolvers = {} src_gallery_dir = os.path.join(app.builder.srcdir, gallery_dir) for this_module, url in gallery_conf['reference_url'].items(): try: if url is None: doc_resolvers[this_module] = SphinxDocLinkResolver( app.builder.outdir, src_gallery_dir, relative=True) else: doc_resolvers[this_module] = SphinxDocLinkResolver( url, src_gallery_dir) except (URLError, HTTPError) as e: _handle_http_url_error(e) html_gallery_dir = os.path.abspath(os.path.join(app.builder.outdir, gallery_dir)) # patterns for replacement link_pattern = ('<a href="%s" title="View documentation for %s">%s</a>') orig_pattern = '<span class="n">%s</span>' period = '<span class="o">.</span>' # This could be turned into a generator if necessary, but should be okay flat = [[dirpath, filename] for dirpath, _, filenames in os.walk(html_gallery_dir) for filename in filenames] iterator = sphinx_compatibility.status_iterator( flat, 'embedding documentation hyperlinks for %s... ' % gallery_dir, color='fuchsia', length=len(flat), stringify_func=lambda x: os.path.basename(x[1])) intersphinx_inv = getattr(app.env, 'intersphinx_named_inventory', dict()) builtin_modules = set(intersphinx_inv.get( 'python', dict()).get('py:module', dict()).keys()) for dirpath, fname in iterator: full_fname = os.path.join(html_gallery_dir, dirpath, fname) subpath = dirpath[len(html_gallery_dir) + 1:] pickle_fname = os.path.join(src_gallery_dir, subpath, fname[:-5] + '_codeobj.pickle') if os.path.exists(pickle_fname): # we have a pickle file with the objects to embed links for with open(pickle_fname, 'rb') as fid: example_code_obj = pickle.load(fid) fid.close() str_repl = {} # generate replacement strings with the links for name, cobj in example_code_obj.items(): for modname in (cobj['module_short'], cobj['module']): this_module = modname.split('.')[0] cname = cobj['name'] # Try doc resolvers first link = None if this_module in doc_resolvers: try: link = doc_resolvers[this_module].resolve( cobj, full_fname) except (HTTPError, URLError) as e: _handle_http_url_error( e, msg='resolving %s.%s' % (modname, cname)) # next try intersphinx if this_module == modname == 'builtins': this_module = 'python' elif modname in builtin_modules: this_module = 'python' if link is None and this_module in intersphinx_inv: inv = intersphinx_inv[this_module] if modname == 'builtins': want = cname else: want = '%s.%s' % (modname, cname) for key, value in inv.items(): # only python domain if key.startswith('py') and want in value: link = value[want][2] break if link is not None: parts = name.split('.') name_html = period.join(orig_pattern % part for part in parts) full_function_name = '%s.%s' % (modname, cname) str_repl[name_html] = link_pattern % ( link, full_function_name, name_html) break # loop over possible module names # do the replacement in the html file # ensure greediness names = sorted(str_repl, key=len, reverse=True) regex_str = '|'.join(re.escape(name) for name in names) regex = re.compile(regex_str) def substitute_link(match): return str_repl[match.group()] if len(str_repl) > 0: with codecs.open(full_fname, 'r', 'utf-8') as fid: lines_in = fid.readlines() with codecs.open(full_fname, 'w', 'utf-8') as fid: for line in lines_in: fid.write(regex.sub(substitute_link, line))