Source code for sagenb.notebook.docHTMLProcessor

# -*- coding: utf-8 -*
r"""
Live Documentation in the Notebook

Conversion of HTML (output by Sphinx or docutils) to Sage worksheet txt
file.

This takes an HTML document, i.e., Sage documentation, and returns it in
the editable format (notebook worksheet format with evaluable examples). It
also returns a string representing the CSS link for the document.  The HTML
parser is setup to return only the body of the HTML documentation page and
to re-format Sage examples and type-setting.

This module contains three classes:

- :class:`sagenb.notebook.docHTMLProcessor.genericHTMLProcessor`:
  gathers all the common methods of the other two classes.

- :class:`sagenb.notebook.docHTMLProcessor.SphinxHTMLProcessor`:
  translates HTML file generated by Sphinx into a worksheet text file

- :class:`sagenb.notebook.docHTMLProcessor.docutilsHTMLProcessor`:
  translates HTML file generated by docutils ``rst2html`` command into a
  worksheet text file

.. NOTE::

    This extension of htmllib.HTMLParser was partly inspired by Mark
    Pilgrim's 'Dive Into Python' examples.

AUTHORS:

- Dorian Raymer (2006): first version

- William Stein (2007-06-10): rewrite to work with twisted Sage notebook

- Mike Hansen (2008-09-27): Rewrite to work with Sphinx HTML documentation

- Sebastien Labbe (2011-01-15): Added a new class named
  docutilsHTMLProcessor used for translating the html output of the
  rst2html docutils command run on a rst file into worksheet text file.
  Also added a new class named genericHTMLProcessor which gathers the
  common methods of both docutilsHTMLProcessor and SphinxHTMLProcessor
  classes. Added lots of doctests to make its coverage 100% doctested.

EXAMPLES:

Process the output of docutils ``rst2html`` command::

    sage: rst = ""
    sage: rst += "Additions in Sage\n"
    sage: rst += "-----------------\n"
    sage: rst += "\n"
    sage: rst += "Let's do easy computations with Sage::\n"
    sage: rst += "\n"
    sage: rst += "    s" + "age: 4 + 3\n"
    sage: rst += "    7\n"
    sage: rst += "    s" + "age: 1 - 2\n"
    sage: rst += "    -1\n"
    sage: rst += "\n"
    sage: rst += "Let's do `x^2`::\n"
    sage: rst += "\n"
    sage: rst += "    s" + "age: x^2\n"
    sage: rst += "    x^2\n"
    sage: from docutils.core import publish_string
    sage: html = publish_string(rst, writer_name='html')
    sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
    sage: p = docutilsHTMLProcessor()
    sage: txt = p.process_doc_html(html)
    sage: len(txt)
    191
    sage: print(txt)
    <h1 class="title">Additions in Sage</h1>
    <BLANKLINE>
    <BLANKLINE>
    <BLANKLINE>
    <p>Let's do easy computations with Sage:</p>
    <BLANKLINE>
    {{{id=0|
    4 + 3
    ///
    7
    }}}
    <BLANKLINE>
    {{{id=1|
    1 - 2
    ///
    -1
    }}}
    <BLANKLINE>
    <p>Let's do $x^2$:</p>
    <BLANKLINE>
    {{{id=2|
    x^2
    ///
    x^2
    }}}
    <BLANKLINE>
    <BLANKLINE>

WARNING:

    Input strings must be unicode.
"""
#############################################################################
#       Copyright (C) 2007 William Stein <wstein@gmail.com> and Dorian Raimer
#       Copyright (C) 2011 Sebastien Labbe <slabqc at gmail.com>
#  Distributed under the terms of the GNU General Public License (GPL)
#  The full text of the GPL is available at:
#                  http://www.gnu.org/licenses/
#############################################################################
from __future__ import unicode_literals

from html.parser import HTMLParser
from html.entities import entitydefs

from flask import Markup
from sagenb.misc.misc import unicode_str


[docs]class genericHTMLProcessor(HTMLParser): r""" This class gathers the methods that are common to both classes :class:`sagenb.notebook.SphinxHTMLProcessor` and :class:`sagenb.notebook.docutilsHTMLProcessor` . """
[docs] def process_doc_html(self, doc_in): r""" Returns processed HTML input as HTML output. This is the only method that needs to be called externally. INPUT: - ``doc_in`` - a string containing properly formed HTML OUTPUT: - a string; the processed HTML EXAMPLES:: sage: rst = "" sage: rst += "Title\n" sage: rst += "-----\n" sage: rst += "n" sage: rst += "Some text\n" sage: from docutils.core import publish_string sage: html = publish_string(rst, writer_name='html') sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor sage: p = docutilsHTMLProcessor() sage: txt = p.process_doc_html(html) sage: len(txt) 51 sage: txt u'<h1 class="title">Title</h1>\n\n<p>nSome text</p>\n\n\n\n' """ # self.feed() is a HTMLParser method and starts everything # off; Most of the functions here are extensions to # HTMLParser, and may never actually be visibly called here. # This module works with unicode literals. In case that input data is # ascii, exceptions may occur. So, input data must be converted to # unicode if it were not. doc_in = unicode_str(doc_in) self.feed(doc_in) # HTMLParser call self.close() # HTMLParser call self.hand_off_temp_pieces('to_doc_pieces') return self.all_pieces.replace('\\(', '').replace('\\)', '').replace('\\[', '').replace('\\]', '')
[docs] def hand_off_temp_pieces(self, piece_type): r""" To separate the documentation's content from the Sage examples, everything is split into one of two cell types. This method puts the current ``self.temp_pieces`` into ``self.all_pieces``. INPUT: - ``piece_type`` - a string; indicates the type of and how to process the current ``self.temp_pieces``. It can be one of the following: - ``"to_doc_pieces"`` - put temp_pieces in all_pieces - ``"ignore"`` - delete temp_pieces - ``"to_cell_pieces"`` - translate temp_pieces into cells and put it in all_pieces EXAMPLES: Move temporary pieces to all pieces:: sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor sage: p = docutilsHTMLProcessor() sage: p.all_pieces = 'a lot of stuff done ' sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings'] sage: p.hand_off_temp_pieces('to_doc_pieces') sage: p.all_pieces u'a lot of stuff done bunch of tmp strings' sage: p.temp_pieces [] Ignore temporary pieces:: sage: p = docutilsHTMLProcessor() sage: p.all_pieces = 'a lot of stuff done ' sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings'] sage: p.hand_off_temp_pieces('ignore') sage: p.all_pieces 'a lot of stuff done ' sage: p.temp_pieces [] Translate temporary pieces (starting with sage prompt) into cells:: sage: p = docutilsHTMLProcessor() sage: p.all_pieces = 'a lot of stuff done ' sage: p.temp_pieces = ['sage'+': 4+4\n', '8\n', 'sage'+': 9-4\n', '5\n'] sage: p.hand_off_temp_pieces('to_cell_pieces') sage: print(p.all_pieces) a lot of stuff done {{{id=0| 4+4 /// 8 }}} <BLANKLINE> {{{id=1| 9-4 /// 5 }}} sage: p.temp_pieces [] Translate temporary pieces (not starting with sage prompt) into cells:: sage: p = docutilsHTMLProcessor() sage: p.all_pieces = 'a lot of stuff done ' sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings'] sage: p.hand_off_temp_pieces('to_cell_pieces') sage: print(p.all_pieces) a lot of stuff done <pre class="literal-block"> bunch of tmp strings </pre> sage: p.temp_pieces [] """ pieces = "".join(self.temp_pieces) pieces = pieces.lstrip() if piece_type == 'to_doc_pieces': self.all_pieces += pieces self.temp_pieces = [] elif piece_type == 'ignore': self.temp_pieces = [] elif piece_type == 'to_cell_pieces': pieces = self.process_cell_input_output(pieces) self.all_pieces += pieces self.temp_pieces = [] else: raise ValueError('unknown piece_type(=%s)' % piece_type)
[docs] def get_cellcount(self): r""" Return the current cell count and increment it by one. OUTPUT: - an int EXAMPLES:: sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor sage: d = docutilsHTMLProcessor() sage: d.get_cellcount() 0 sage: d.get_cellcount() 1 :: sage: from sagenb.notebook.docHTMLProcessor import SphinxHTMLProcessor sage: d = SphinxHTMLProcessor() sage: d.get_cellcount() 0 sage: d.get_cellcount() 1 """ self.cellcount += 1 return self.cellcount - 1
[docs] def process_cell_input_output(self, cell_piece): r""" Process and return a ``cell_piece``. All divs with CSS class="highlight" (if generated with Sphinx) or class="literal-block" (if generated with docutils) contain code examples. They include - Models of how the function works. These begin with, e.g., 'INPUT:' and are re-styled as divs with class="usage_model". - Actual Sage input and output. These begin with 'sage:'. The input and output are separated according to the Notebook edit format. INPUT: - ``cell_piece`` - a string; a cell piece OUTPUT: - a string; the processed cell piece EXAMPLES:: sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor sage: p = docutilsHTMLProcessor() sage: s = "s" + "age: 4 + 4\n8" # avoid the doctest script to parse "sage:" sage: p.process_cell_input_output(s) u'\n{{{id=0|\n4 + 4\n///\n8\n}}}\n\n' sage: print(p.process_cell_input_output(s)) {{{id=1| 4 + 4 /// 8 }}} :: sage: s = "age: 4 + 4\n8" sage: print(p.process_cell_input_output(s)) <pre class="literal-block"> age: 4 + 4 8 </pre> :: sage: s = '&gt;'*3 + " 4 + 4\n8" sage: print(p.process_cell_input_output(s)) {{{id=2| 4 + 4 /// 8 }}} :: sage: s = "s" + "age: 4 + 4\n8\ns" + "age: 2 + 2\n4" sage: print(p.process_cell_input_output(s)) {{{id=3| 4 + 4 /// 8 }}} <BLANKLINE> {{{id=4| 2 + 2 /// 4 }}} """ if cell_piece[:5] != 'sage:' and cell_piece[:12] != '&gt;'*3: piece = self.false_positive_input_output_cell(cell_piece) else: # group and format inputs and outputs pieces = cell_piece.split('\n') output_flag = False piece = '\n{{{id=%s|\n'%self.get_cellcount() for p in pieces: if p[:6] == 'sage: ' and not output_flag: piece += p[6:] + '\n' elif p[:6] == 'sage: ' and output_flag: piece += '\n}}}\n\n{{{id=%s|\n'%self.get_cellcount() + p[6:] + '\n' output_flag = False elif p[:6] == '....: ': piece += p[6:] + '\n' elif p[:13] == '&gt;'*3+' ' and not output_flag: piece += p[13:] + '\n' elif p[:13] == '&gt;'*3+' ' and output_flag: piece += '\n}}}\n\n{{{id=%s|\n'%self.get_cellcount() + p[13:] + '\n' output_flag = False elif p[:4] == '... ': piece += p[4:] + '\n' else: # first occurrence of an output string # write /// denoting output if output_flag is False: piece += '///' if p: piece += '\n' + p output_flag = True # multiple output lines exist, don't need /// repeated else: piece += p piece += '\n}}}\n\n' return Markup(piece).unescape()
[docs] def handle_starttag(self, tag, attrs): """ introduced when replacing SGMLParser by HTMLParser """ try: method = getattr(self, 'start_' + tag) except AttributeError: self.unknown_starttag(tag, attrs) else: method(attrs)
[docs] def handle_endtag(self, tag): """ introduced when replacing SGMLParser by HTMLParser """ try: method = getattr(self, 'end_' + tag) except AttributeError: self.unknown_endtag(tag) else: method()
############################################## # General tag handlers # These just append their HTML to self.temp_pieces.
[docs] def unknown_starttag(self, tag, attrs): r""" INPUT: - ``tag`` - string - ``attrs`` - list of tuples EXAMPLES:: sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor sage: p = docutilsHTMLProcessor() sage: p.all_pieces = 'a lot of stuff done ' sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings'] sage: p.keep_data = True sage: tag = 'style' sage: attrs = [('type', 'text/css')] sage: p.unknown_starttag(tag, attrs) sage: p.all_pieces 'a lot of stuff done ' sage: p.temp_pieces ['bunch ', 'of ', 'tmp ', 'strings', u'<style type="text/css">'] """ if self.keep_data: strattrs = "".join([' %s="%s"' % (key, value) for key, value in attrs]) self.temp_pieces.append("<%(tag)s%(strattrs)s>" % locals())
[docs] def unknown_endtag(self, tag): r""" INPUT: - ``tag`` - string EXAMPLES:: sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor sage: p = docutilsHTMLProcessor() sage: p.all_pieces = 'a lot of stuff done ' sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings'] sage: p.keep_data = True sage: p.unknown_endtag('head') sage: p.all_pieces 'a lot of stuff done ' sage: p.temp_pieces ['bunch ', 'of ', 'tmp ', 'strings', u'</head>'] """ if self.keep_data: self.temp_pieces.append("</%(tag)s>" % locals())
[docs] def handle_data(self, data): r""" INPUT: - ``data`` - string EXAMPLES:: sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor sage: p = docutilsHTMLProcessor() sage: p.all_pieces = 'a lot of stuff done ' sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings'] sage: p.keep_data = True sage: p.handle_data('some important data') sage: p.all_pieces 'a lot of stuff done ' sage: p.temp_pieces ['bunch ', 'of ', 'tmp ', 'strings', 'some important data'] """ if self.keep_data: self.temp_pieces.append(data)
[docs] def handle_charref(self, ref): r""" INPUT: - ``ref`` - string EXAMPLES:: sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor sage: p = docutilsHTMLProcessor() sage: p.all_pieces = 'a lot of stuff done ' sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings'] sage: p.keep_data = True sage: p.handle_charref('160') sage: p.all_pieces 'a lot of stuff done ' sage: p.temp_pieces ['bunch ', 'of ', 'tmp ', 'strings', u'&#160;'] """ if self.keep_data: self.temp_pieces.append("&#%(ref)s;" % locals())
[docs] def handle_entityref(self, ref): r""" INPUT: - ``ref`` - string EXAMPLES:: sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor sage: p = docutilsHTMLProcessor() sage: p.all_pieces = 'a lot of stuff done ' sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings'] sage: p.keep_data = True sage: p.handle_entityref('160') sage: p.all_pieces 'a lot of stuff done ' sage: p.temp_pieces ['bunch ', 'of ', 'tmp ', 'strings', u'&160'] """ if self.keep_data: self.temp_pieces.append("&%(ref)s" % locals()) if ref in entitydefs: self.temp_pieces.append(';')
[docs] def handle_comment(self, data): r""" INPUT: - ``data`` - string EXAMPLES:: sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor sage: p = docutilsHTMLProcessor() sage: p.all_pieces = 'a lot of stuff done ' sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings'] sage: p.keep_data = True sage: p.handle_comment('important comment') sage: p.all_pieces 'a lot of stuff done ' sage: p.temp_pieces ['bunch ', 'of ', 'tmp ', 'strings', u'<!--important comment-->'] """ if self.keep_data: self.temp_pieces.append("<!--%(data)s-->" % locals())
[docs] def handle_pi(self, text): r""" Handle processing instructions INPUT: - ``text`` - string EXAMPLES:: sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor sage: p = docutilsHTMLProcessor() sage: p.all_pieces = 'a lot of stuff done ' sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings'] sage: p.keep_data = True sage: p.handle_pi('instructions') sage: p.all_pieces 'a lot of stuff done ' sage: p.temp_pieces ['bunch ', 'of ', 'tmp ', 'strings', u'<?instructions>'] """ if self.keep_data: self.temp_pieces.append("<?%(text)s>" % locals())
[docs] def handle_decl(self, text): r""" INPUT: - ``data`` - string EXAMPLES:: sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor sage: p = docutilsHTMLProcessor() sage: p.all_pieces = 'a lot of stuff done ' sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings'] sage: p.keep_data = True sage: p.handle_decl('declaration') sage: p.all_pieces 'a lot of stuff done ' sage: p.temp_pieces ['bunch ', 'of ', 'tmp ', 'strings', u'<!declaration>'] """ if self.keep_data: self.temp_pieces.append("<!%(text)s>" % locals())
############################################## # Specific tag handlers
[docs] def start_body(self, attrs): r""" Set ``self.keep_data`` to True upon finding the opening body tag. INPUT: - ``attrs`` - a string:string dictionary containing the element's attributes EXAMPLES:: sage: from sagenb.notebook.docHTMLProcessor import SphinxHTMLProcessor sage: d = SphinxHTMLProcessor() sage: d.keep_data False sage: d.start_body(None) sage: d.keep_data True :: sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor sage: d = docutilsHTMLProcessor() sage: d.keep_data False sage: d.start_body(None) sage: d.keep_data True """ self.keep_data = True
[docs] def end_body(self): r""" EXAMPLES:: sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor sage: p = docutilsHTMLProcessor() sage: p.all_pieces = 'a lot of stuff done ' sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings'] sage: p.end_body() sage: p.all_pieces 'a lot of stuff done ' sage: p.temp_pieces ['bunch ', 'of ', 'tmp ', 'strings'] """ pass
[docs] def end_html(self): r""" INPUT: - ``data`` - string EXAMPLES:: sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor sage: p = docutilsHTMLProcessor() sage: p.all_pieces = 'a lot of stuff done ' sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings'] sage: p.end_html() sage: p.all_pieces 'a lot of stuff done ' sage: p.temp_pieces ['bunch ', 'of ', 'tmp ', 'strings'] """ pass
[docs]class SphinxHTMLProcessor(genericHTMLProcessor):
[docs] def reset(self): r""" Initialize necessary variables. Called by :meth:`HTMLParser.__init__`. EXAMPLES:: sage: from sagenb.notebook.docHTMLProcessor import SphinxHTMLProcessor sage: d = SphinxHTMLProcessor() #indirect doctest sage: d.keep_data False sage: d.in_highlight_div False sage: d.temp_pieces [] sage: d.all_pieces u'' sage: d.cellcount 0 """ # flags self.keep_data = False # don't keep anything before the <body> tag self.in_highlight_div = False # lists of what the parser keeps self.temp_pieces = [] self.all_pieces = '' # counters self.cellcount = 0 HTMLParser.reset(self)
[docs] def false_positive_input_output_cell(self, cell_piece): r""" Return the untouched html string of a false positive input output cell. A false positive input-output cell come from a block of code which doesn't start with the sage prompt string 'sage:' or the Python prompt '>>>'. INPUT: - ``cell_piece`` - string, a cell piece OUPUT: string EXAMPLES:: sage: from sagenb.notebook.docHTMLProcessor import SphinxHTMLProcessor sage: p = SphinxHTMLProcessor() sage: s = "sage -rst2html -h" sage: print(p.false_positive_input_output_cell(s)) <div class="highlight"><pre> sage -rst2html -h </pre></div> """ piece = '<div class="highlight"><pre>\n' piece += cell_piece piece = piece.replace('{','{&nbsp;') piece = piece.replace('}','}&nbsp;') piece += '\n</pre></div>' return piece
############################################# # Specific tag handlers #
[docs] def start_div(self, attrs): r""" Find out if we are starting a highlighted div. Once we hit the <div> tag in a highlighted block, hand of all of the pieces we've encountered so far and ignore the tag. INPUT: - ``attrs`` - list of tuple EXAMPLES:: sage: from sagenb.notebook.docHTMLProcessor import SphinxHTMLProcessor sage: p = SphinxHTMLProcessor() sage: p.all_pieces = 'a lot of stuff done ' sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings'] sage: attrs = [('class', 'highlight')] sage: p.start_div(attrs) sage: p.all_pieces u'a lot of stuff done bunch of tmp strings' sage: p.temp_pieces [] sage: p.in_highlight_div True :: sage: p = SphinxHTMLProcessor() sage: p.all_pieces = 'a lot of stuff done ' sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings'] sage: p.keep_data = True sage: attrs = [('class', 'something-else')] sage: p.start_div(attrs) sage: p.all_pieces 'a lot of stuff done ' sage: p.temp_pieces ['bunch ', 'of ', 'tmp ', 'strings', u'<div class="something-else">'] sage: p.in_highlight_div False """ for name, value in attrs: if name.lower()=='class' and value.lower()=='highlight': self.in_highlight_div = True self.hand_off_temp_pieces('to_doc_pieces') return self.unknown_starttag('div', attrs)
[docs] def end_div(self): r""" Once we end the highlighted div, convert all of the pieces to cells. EXAMPLES:: sage: from sagenb.notebook.docHTMLProcessor import SphinxHTMLProcessor sage: p = SphinxHTMLProcessor() sage: p.all_pieces = 'a lot of stuff done ' sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings\n'] sage: p.keep_data = True sage: attrs = [('class', 'highlight')] sage: p.start_div(attrs) sage: p.start_pre([]) sage: sprompt = 'sa' + 'ge' + ': ' # to avoid problems with doctest script sage: p.handle_data('%s4+4\n8\n%sx^2\nx^2\n' % (sprompt, sprompt)) sage: p.end_pre() sage: p.end_div() sage: print(p.all_pieces) a lot of stuff done bunch of tmp strings {{{id=0| 4+4 /// 8 }}} <BLANKLINE> {{{id=1| x^2 /// x^2 }}} sage: p.temp_pieces [] sage: p.in_highlight_div False :: sage: p = SphinxHTMLProcessor() sage: p.all_pieces = 'a lot of stuff done ' sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings'] sage: p.keep_data = True sage: attrs = [('class', 'something-else')] sage: p.start_div(attrs) sage: p.handle_data('some data') sage: p.end_div() sage: print(p.all_pieces) a lot of stuff done sage: p.temp_pieces ['bunch ', 'of ', 'tmp ', 'strings', u'<div class="something-else">', 'some data', u'</div>'] sage: p.in_highlight_div False """ if self.in_highlight_div: self.in_highlight_div = False self.hand_off_temp_pieces('to_cell_pieces') return self.temp_pieces.append("</div>")
[docs] def start_pre(self, attrs): r""" Ignore tag <pre> when inside highligh div. INPUT: - ``attrs`` - list of tuple EXAMPLES:: sage: from sagenb.notebook.docHTMLProcessor import SphinxHTMLProcessor sage: p = SphinxHTMLProcessor() sage: p.all_pieces = 'a lot of stuff done ' sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings'] sage: p.keep_data = True sage: p.in_highlight_div = True sage: attrs = [] sage: p.start_pre(attrs) sage: p.all_pieces 'a lot of stuff done ' sage: p.temp_pieces ['bunch ', 'of ', 'tmp ', 'strings'] :: sage: from sagenb.notebook.docHTMLProcessor import SphinxHTMLProcessor sage: p = SphinxHTMLProcessor() sage: p.all_pieces = 'a lot of stuff done ' sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings'] sage: p.keep_data = True sage: p.in_highlight_div = False sage: attrs = [] sage: p.start_pre(attrs) sage: p.all_pieces 'a lot of stuff done ' sage: p.temp_pieces ['bunch ', 'of ', 'tmp ', 'strings', u'<pre>'] """ if self.in_highlight_div: return self.unknown_starttag('pre',attrs)
[docs] def end_pre(self): r""" Ignore tag </pre> when inside highligh div. EXAMPLES:: sage: from sagenb.notebook.docHTMLProcessor import SphinxHTMLProcessor sage: p = SphinxHTMLProcessor() sage: p.all_pieces = 'a lot of stuff done ' sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings'] sage: p.keep_data = True sage: p.in_highlight_div = True sage: p.end_pre() sage: p.all_pieces 'a lot of stuff done ' sage: p.temp_pieces ['bunch ', 'of ', 'tmp ', 'strings'] :: sage: p.all_pieces = 'a lot of stuff done ' sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings'] sage: p.keep_data = True sage: p.in_highlight_div = False sage: p.end_pre() sage: p.all_pieces 'a lot of stuff done ' sage: p.temp_pieces ['bunch ', 'of ', 'tmp ', 'strings', u'</pre>'] """ if self.in_highlight_div: return self.unknown_endtag('pre')
# Ignore forms
[docs] def start_form(self, attrs): r""" Hand of everything we've accumulated so far. Forms are ignored. INPUT: - ``attrs`` - list of tuple EXAMPLES:: sage: from sagenb.notebook.docHTMLProcessor import SphinxHTMLProcessor sage: p = SphinxHTMLProcessor() sage: p.all_pieces = 'a lot of stuff done ' sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings'] sage: attrs = [] sage: p.start_form(attrs) sage: p.all_pieces u'a lot of stuff done bunch of tmp strings' sage: p.temp_pieces [] """ self.hand_off_temp_pieces('to_doc_pieces') return
[docs] def end_form(self): r""" Ignore all of the pieces since we started the form. EXAMPLES:: sage: from sagenb.notebook.docHTMLProcessor import SphinxHTMLProcessor sage: p = SphinxHTMLProcessor() sage: p.all_pieces = 'a lot of stuff done ' sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings'] sage: p.end_form() sage: p.all_pieces 'a lot of stuff done ' sage: p.temp_pieces [] """ self.hand_off_temp_pieces('ignore') return
[docs] def start_span(self, attrs): r""" Ignore all spans that occur within highlighted blocks INPUT: - ``attrs`` - list of tuple EXAMPLES:: sage: from sagenb.notebook.docHTMLProcessor import SphinxHTMLProcessor sage: p = SphinxHTMLProcessor() sage: p.all_pieces = 'a lot of stuff done ' sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings'] sage: p.keep_data = True sage: p.in_highlight_div = True sage: attrs = [] sage: p.start_span(attrs) sage: p.all_pieces 'a lot of stuff done ' sage: p.temp_pieces ['bunch ', 'of ', 'tmp ', 'strings'] :: sage: from sagenb.notebook.docHTMLProcessor import SphinxHTMLProcessor sage: p = SphinxHTMLProcessor() sage: p.all_pieces = 'a lot of stuff done ' sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings'] sage: p.keep_data = True sage: p.in_highlight_div = False sage: attrs = [] sage: p.start_span(attrs) sage: p.all_pieces 'a lot of stuff done ' sage: p.temp_pieces ['bunch ', 'of ', 'tmp ', 'strings', u'<span>'] """ if self.in_highlight_div: return self.unknown_starttag('span', attrs)
[docs] def end_span(self): r""" Ignore all spans that occur within highlighted blocks EXAMPLES:: sage: from sagenb.notebook.docHTMLProcessor import SphinxHTMLProcessor sage: p = SphinxHTMLProcessor() sage: p.all_pieces = 'a lot of stuff done ' sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings'] sage: p.keep_data = True sage: p.in_highlight_div = True sage: p.end_span() sage: p.all_pieces 'a lot of stuff done ' sage: p.temp_pieces ['bunch ', 'of ', 'tmp ', 'strings'] :: sage: p.all_pieces = 'a lot of stuff done ' sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings'] sage: p.keep_data = True sage: p.in_highlight_div = False sage: p.end_span() sage: p.all_pieces 'a lot of stuff done ' sage: p.temp_pieces ['bunch ', 'of ', 'tmp ', 'strings', u'</span>'] """ if self.in_highlight_div: return self.unknown_endtag('span')
[docs]class docutilsHTMLProcessor(genericHTMLProcessor): r""" Translates output of the docutils parser rst2html into notebook text. EXAMPLES:: sage: rst = "" sage: rst += "Additions in Sage\n" sage: rst += "-----------------\n" sage: rst += "\n" sage: rst += "Let's do easy computations with Sage::\n" sage: rst += "\n" sage: rst += " s" + "age: 4 + 3\n" sage: rst += " 7\n" sage: rst += " s" + "age: 1 - 2\n" sage: rst += " -1\n" sage: rst += "\n" sage: rst += "Let's do `x^2`::\n" sage: rst += "\n" sage: rst += " s" + "age: x^2\n" sage: rst += " x^2\n" sage: from docutils.core import publish_string sage: html = publish_string(rst, writer_name='html') sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor sage: p = docutilsHTMLProcessor() sage: txt = p.process_doc_html(html) sage: len(txt) 191 sage: print(txt) <h1 class="title">Additions in Sage</h1> <BLANKLINE> <BLANKLINE> <BLANKLINE> <p>Let's do easy computations with Sage:</p> <BLANKLINE> {{{id=0| 4 + 3 /// 7 }}} <BLANKLINE> {{{id=1| 1 - 2 /// -1 }}} <BLANKLINE> <p>Let's do $x^2$:</p> <BLANKLINE> {{{id=2| x^2 /// x^2 }}} <BLANKLINE> <BLANKLINE> """
[docs] def reset(self): r""" Initialize necessary variables. Called by :meth:`HTMLParser.__init__`. EXAMPLES:: sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor sage: d = docutilsHTMLProcessor() #indirect doctest sage: d.keep_data False sage: d.in_pre_litteral_block False sage: d.in_div_footer_block False sage: d.temp_pieces [] sage: d.all_pieces u'' sage: d.cellcount 0 """ # flags self.keep_data = False # don't keep anything before the <body> tag self.in_pre_litteral_block = False self.in_div_footer_block = False # lists of what the parser keeps self.temp_pieces = [] self.all_pieces = '' # counters self.cellcount = 0 HTMLParser.reset(self)
[docs] def false_positive_input_output_cell(self, cell_piece): r""" Return the untouched html string of a false positive input output cell. A false positive input-output cell come from a block of code which doesn't start with the sage prompt string 'sage:' or the Python prompt '>>>'. INPUT: - ``cell_piece`` - string, a cell piece OUPUT: string EXAMPLES:: sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor sage: p = docutilsHTMLProcessor() sage: s = "sage -rst2html -h" sage: print(p.false_positive_input_output_cell(s)) <pre class="literal-block"> sage -rst2html -h </pre> """ piece = '<pre class="literal-block">\n' piece += cell_piece piece = piece.replace('{','{&nbsp;') piece = piece.replace('}','}&nbsp;') piece += '\n</pre>' return piece
############################################# # Specific tag handlers # # sage blocks
[docs] def start_pre(self, attrs): r""" INPUT: - ``attrs`` - list of tuple EXAMPLES:: sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor sage: p = docutilsHTMLProcessor() sage: p.all_pieces = 'a lot of stuff done ' sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings'] sage: attrs = [('class', 'literal-block')] sage: p.start_pre(attrs) sage: p.all_pieces u'a lot of stuff done bunch of tmp strings' sage: p.temp_pieces [] sage: p.in_pre_litteral_block True :: sage: p = docutilsHTMLProcessor() sage: p.all_pieces = 'a lot of stuff done ' sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings'] sage: p.keep_data = True sage: attrs = [('class', 'something-else')] sage: p.start_pre(attrs) sage: p.all_pieces 'a lot of stuff done ' sage: p.temp_pieces ['bunch ', 'of ', 'tmp ', 'strings', u'<pre class="something-else">'] sage: p.in_pre_litteral_block False """ # Find out if we are starting a pre litteral-block for name, value in attrs: if name.lower() == 'class' and value.lower() == 'literal-block': self.in_pre_litteral_block = True self.hand_off_temp_pieces('to_doc_pieces') return self.unknown_starttag('pre',attrs)
[docs] def end_pre(self): r""" EXAMPLES:: sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor sage: p = docutilsHTMLProcessor() sage: p.all_pieces = 'a lot of stuff done ' sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings'] sage: p.keep_data = True sage: attrs = [('class', 'literal-block')] sage: p.start_pre(attrs) sage: sprompt = 'sa' + 'ge' + ': ' # to avoid problems with doctest script sage: p.handle_data('%s4+4\n8\n%sx^2\nx^2\n' % (sprompt, sprompt)) sage: p.end_pre() sage: print(p.all_pieces) a lot of stuff done bunch of tmp strings {{{id=0| 4+4 /// 8 }}} <BLANKLINE> {{{id=1| x^2 /// x^2 }}} sage: p.temp_pieces [] sage: p.in_pre_litteral_block False :: sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor sage: p = docutilsHTMLProcessor() sage: p.all_pieces = 'a lot of stuff done ' sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings'] sage: p.keep_data = True sage: attrs = [('class', 'something-else')] sage: p.start_pre(attrs) sage: p.handle_data('some data') sage: p.end_pre() sage: print(p.all_pieces) a lot of stuff done sage: p.temp_pieces ['bunch ', 'of ', 'tmp ', 'strings', u'<pre class="something-else">', 'some data', u'</pre>'] sage: p.in_pre_litteral_block False """ if self.in_pre_litteral_block: self.in_pre_litteral_block = False self.hand_off_temp_pieces('to_cell_pieces') return self.unknown_endtag('pre')
# Ignore div
[docs] def start_div(self, attrs): r""" INPUT: - ``attrs`` - list of tuple EXAMPLES:: sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor sage: p = docutilsHTMLProcessor() sage: p.all_pieces = 'a lot of stuff done ' sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings'] sage: attrs = [('class', 'document'), ('id', 'title')] sage: p.start_div(attrs) sage: p.all_pieces 'a lot of stuff done ' sage: p.temp_pieces ['bunch ', 'of ', 'tmp ', 'strings'] """ # Find out if we are starting a div footer block for name, value in attrs: if name.lower()=='class' and value.lower()=='footer': self.hand_off_temp_pieces('to_doc_pieces') self.keep_data = False self.in_div_footer_block = True return return
[docs] def end_div(self): r""" EXAMPLES:: sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor sage: p = docutilsHTMLProcessor() sage: p.all_pieces = 'a lot of stuff done ' sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings'] sage: p.end_div() sage: p.all_pieces 'a lot of stuff done ' sage: p.temp_pieces ['bunch ', 'of ', 'tmp ', 'strings'] """ if self.in_div_footer_block: self.in_div_footer_block = False self.keep_data = True return
# latex role
[docs] def start_cite(self, attrs): r""" INPUT: - ``attrs`` - list of tuple EXAMPLES:: sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor sage: p = docutilsHTMLProcessor() sage: p.all_pieces = 'a lot of stuff done ' sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings'] sage: p.keep_data = True sage: attrs = [] sage: p.start_cite(attrs) sage: p.all_pieces 'a lot of stuff done ' sage: p.temp_pieces ['bunch ', 'of ', 'tmp ', 'strings', u'$'] """ self.temp_pieces.append("$") return
[docs] def end_cite(self): r""" EXAMPLES:: sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor sage: p = docutilsHTMLProcessor() sage: p.all_pieces = 'a lot of stuff done ' sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings'] sage: p.keep_data = True sage: p.start_cite([]) sage: p.handle_data('x^2') sage: p.end_cite() sage: p.all_pieces 'a lot of stuff done ' sage: p.temp_pieces ['bunch ', 'of ', 'tmp ', 'strings', u'$', 'x^2', u'$'] """ self.temp_pieces.append("$") return
# script (for example for mathjax)
[docs] def start_script(self, attrs): r""" INPUT: - ``attrs`` - list of tuple EXAMPLES:: sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor sage: p = docutilsHTMLProcessor() sage: p.all_pieces = 'a lot of stuff done ' sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings'] sage: attrs = [('type', 'text/x-mathjax-config')] sage: p.start_script(attrs) sage: p.all_pieces 'a lot of stuff done ' sage: p.temp_pieces ['bunch ', 'of ', 'tmp ', 'strings'] sage: p.keep_data False """ self.keep_data = False return
[docs] def end_script(self): r""" EXAMPLES:: sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor sage: p = docutilsHTMLProcessor() sage: p.all_pieces = 'a lot of stuff done ' sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings'] sage: p.end_script() sage: p.all_pieces 'a lot of stuff done ' sage: p.temp_pieces ['bunch ', 'of ', 'tmp ', 'strings'] sage: p.keep_data True """ self.keep_data = True return