Source code for sphinxcontrib.bibtex.cache

# -*- coding: utf-8 -*-
"""
    Cached Information
    ~~~~~~~~~~~~~~~~~~

    Classes and methods to maintain any information that is stored
    outside the doctree.

    .. autoclass:: Cache
        :members:

    .. autoclass:: BibfileCache
        :members:

    .. autoclass:: BibliographyCache
        :members:
"""

import sys
import six
if sys.version_info < (2, 7):  # pragma: no cover
    from ordereddict import OrderedDict
else:                          # pragma: no cover
    from collections import OrderedDict

import ast
import collections
import copy
from oset import oset
import re


def _raise_invalid_node(node):
    """Helper method to raise an exception when an invalid node is
    visited.
    """
    raise ValueError("invalid node %s in filter expression" % node)


class _FilterVisitor(ast.NodeVisitor):

    """Visit the abstract syntax tree of a parsed filter expression."""

    entry = None
    """The bibliographic entry to which the filter must be applied."""

    cited_docnames = False
    """The documents where the entry is cited (empty if not cited)."""

    def __init__(self, entry, docname, cited_docnames):
        self.entry = entry
        self.docname = docname
        self.cited_docnames = cited_docnames

    def visit_Module(self, node):
        if len(node.body) != 1:
            raise ValueError(
                "filter expression cannot contain multiple expressions")
        return self.visit(node.body[0])

    def visit_Expr(self, node):
        return self.visit(node.value)

    def visit_BoolOp(self, node):
        outcomes = (self.visit(value) for value in node.values)
        if isinstance(node.op, ast.And):
            return all(outcomes)
        elif isinstance(node.op, ast.Or):
            return any(outcomes)
        else:  # pragma: no cover
            # there are no other boolean operators
            # so this code should never execute
            assert False, "unexpected boolean operator %s" % node.op

    def visit_UnaryOp(self, node):
        if isinstance(node.op, ast.Not):
            return not self.visit(node.operand)
        else:
            _raise_invalid_node(node)

    def visit_BinOp(self, node):
        left = self.visit(node.left)
        op = node.op
        right = self.visit(node.right)
        if isinstance(op, ast.Mod):
            # modulo operator is used for regular expression matching
            if not isinstance(left, six.string_types):
                raise ValueError(
                    "expected a string on left side of %s" % node.op)
            if not isinstance(right, six.string_types):
                raise ValueError(
                    "expected a string on right side of %s" % node.op)
            return re.search(right, left, re.IGNORECASE)
        elif isinstance(op, ast.BitOr):
            return left | right
        elif isinstance(op, ast.BitAnd):
            return left & right
        else:
            _raise_invalid_node(node)

    def visit_Compare(self, node):
        # keep it simple: binary comparators only
        if len(node.ops) != 1:
            raise ValueError("syntax for multiple comparators not supported")
        left = self.visit(node.left)
        op = node.ops[0]
        right = self.visit(node.comparators[0])
        if isinstance(op, ast.Eq):
            return left == right
        elif isinstance(op, ast.NotEq):
            return left != right
        elif isinstance(op, ast.Lt):
            return left < right
        elif isinstance(op, ast.LtE):
            return left <= right
        elif isinstance(op, ast.Gt):
            return left > right
        elif isinstance(op, ast.GtE):
            return left >= right
        elif isinstance(op, ast.In):
            return left in right
        elif isinstance(op, ast.NotIn):
            return left not in right
        else:
            # not used currently: ast.Is | ast.IsNot
            _raise_invalid_node(op)

    def visit_Name(self, node):
        """Calculate the value of the given identifier."""
        id_ = node.id
        if id_ == 'type':
            return self.entry.type.lower()
        elif id_ == 'key':
            return self.entry.key.lower()
        elif id_ == 'cited':
            return bool(self.cited_docnames)
        elif id_ == 'docname':
            return self.docname
        elif id_ == 'docnames':
            return self.cited_docnames
        elif id_ == 'True':
            return True
        elif id_ == 'False':
            return False
        elif id_ == 'author' or id_ == 'editor':
            if id_ in self.entry.persons:
                return u' and '.join(
                    six.text_type(person)  # XXX needs fix in pybtex?
                    for person in self.entry.persons[id_])
            else:
                return u''
        else:
            return self.entry.fields.get(id_, "")

    def visit_Set(self, node):
        return frozenset(self.visit(elt) for elt in node.elts)

    def visit_Str(self, node):
        return node.s

    # NameConstant is Python 3.4 only so do not insist on coverage
    def visit_NameConstant(self, node):  # pragma: no cover
        return node.value

    def generic_visit(self, node):
        _raise_invalid_node(node)


[docs]class Cache: """Global bibtex extension information cache. Stored in ``app.env.bibtex_cache``, so must be picklable. """ bibfiles = None """A :class:`dict` mapping .bib file names (relative to the top source folder) to :class:`BibfileCache` instances. """ _bibliographies = None """Each bibliography directive is assigned an id of the form bibtex-bibliography-xxx. This :class:`dict` maps each docname to another :class:`dict` which maps each id to information about the bibliography directive, :class:`BibliographyCache`. We need to store this extra information separately because it cannot be stored in the :class:`~sphinxcontrib.bibtex.nodes.bibliography` nodes themselves. """ _cited = None """A :class:`dict` mapping each docname to a :class:`set` of citation keys. """ _enum_count = None """A :class:`dict` mapping each docname to an :class:`int` representing the current bibliography enumeration counter. """ def __init__(self): self.bibfiles = {} self._bibliographies = collections.defaultdict(dict) self._cited = collections.defaultdict(oset) self._enum_count = {}
[docs] def purge(self, docname): """Remove all information related to *docname*. :param docname: The document name. :type docname: :class:`str` """ self._bibliographies.pop(docname, None) self._cited.pop(docname, None) self._enum_count.pop(docname, None)
[docs] def inc_enum_count(self, docname): """Increment enumeration list counter for document *docname*.""" self._enum_count[docname] += 1
[docs] def set_enum_count(self, docname, value): """Set enumeration list counter for document *docname* to *value*.""" self._enum_count[docname] = value
[docs] def get_enum_count(self, docname): """Get enumeration list counter for document *docname*.""" return self._enum_count[docname]
[docs] def add_cited(self, key, docname): """Add the given *key* to the set of cited keys for *docname*. :param key: The citation key. :type key: :class:`str` :param docname: The document name. :type docname: :class:`str` """ self._cited[docname].add(key)
[docs] def get_cited_docnames(self, key): """Return the *docnames* from which the given *key* is cited. :param key: The citation key. :type key: :class:`str` """ return frozenset([ docname for docname, keys in six.iteritems(self._cited) if key in keys])
[docs] def get_label_from_key(self, key): """Return label for the given key.""" for bibcache in self.get_all_bibliography_caches(): if key in bibcache.labels: return bibcache.labels[key] else: raise KeyError("%s not found" % key)
[docs] def get_all_cited_keys(self): """Yield all citation keys, sorted first by document (alphabetical), then by citation order in the document. """ for docname in sorted(self._cited): for key in self._cited[docname]: yield key
[docs] def set_bibliography_cache(self, docname, id_, bibcache): """Register *bibcache* (:class:`BibliographyCache`) with id *id_* for document *docname*. """ assert id_ not in self._bibliographies[docname] self._bibliographies[docname][id_] = bibcache
[docs] def get_bibliography_cache(self, docname, id_): """Return :class:`BibliographyCache` with id *id_* in document *docname*. """ return self._bibliographies[docname][id_]
[docs] def get_all_bibliography_caches(self): """Return all bibliography caches.""" for bibcaches in six.itervalues(self._bibliographies): for bibcache in six.itervalues(bibcaches): yield bibcache
def _get_bibliography_entries(self, docname, id_, warn): """Return filtered bibliography entries, sorted by occurence in the bib file. """ # get the information of this bibliography node bibcache = self.get_bibliography_cache(docname=docname, id_=id_) # generate entries for bibfile in bibcache.bibfiles: data = self.bibfiles[bibfile].data for entry in six.itervalues(data.entries): cited_docnames = self.get_cited_docnames(entry.key) visitor = _FilterVisitor( entry=entry, docname=docname, cited_docnames=cited_docnames) try: success = visitor.visit(bibcache.filter_) except ValueError as err: warn("syntax error in :filter: expression; %s" % err) # recover by falling back to the default success = bool(cited_docnames) if success: # entries are modified in an unpickable way # when formatting, so fetch a deep copy yield copy.deepcopy(entry)
[docs] def get_bibliography_entries(self, docname, id_, warn): """Return filtered bibliography entries, sorted by citation order.""" # get entries, ordered by bib file occurrence entries = OrderedDict( (entry.key, entry) for entry in self._get_bibliography_entries( docname=docname, id_=id_, warn=warn)) # order entries according to which were cited first # first, we add all keys that were cited # then, we add all remaining keys sorted_entries = [] for key in self.get_all_cited_keys(): try: entry = entries.pop(key) except KeyError: pass else: sorted_entries.append(entry) sorted_entries += six.itervalues(entries) return sorted_entries
[docs]class BibfileCache(collections.namedtuple('BibfileCache', 'mtime data')): """Contains information about a parsed .bib file. .. attribute:: mtime A :class:`float` representing the modification time of the .bib file when it was last parsed. .. attribute:: data A :class:`pybtex.database.BibliographyData` containing the parsed .bib file. """
[docs]class BibliographyCache(collections.namedtuple( 'BibliographyCache', """bibfiles style encoding list_ enumtype start labels labelprefix filter_ curly_bracket_strip """)): """Contains information about a bibliography directive. .. attribute:: bibfiles A :class:`list` of :class:`str`\\ s containing the .bib file names (relative to the top source folder) that contain the references. .. attribute:: style The bibtex style. .. attribute:: list_ The list type. .. attribute:: enumtype The sequence type (only used for enumerated lists). .. attribute:: start The first ordinal of the sequence (only used for enumerated lists). .. attribute:: labels Maps citation keys to their final labels. .. attribute:: labelprefix This bibliography's string prefix for pybtex generated labels. .. attribute:: filter_ An :class:`ast.AST` node, containing the parsed filter expression. """