# -*- coding: utf-8 -*
r"""
Live Documentation in the Notebook
Conversion of HTML (output by Sphinx or docutils) to Sage worksheet txt
file.
This takes an HTML document, i.e., Sage documentation, and returns it in
the editable format (notebook worksheet format with evaluable examples). It
also returns a string representing the CSS link for the document. The HTML
parser is setup to return only the body of the HTML documentation page and
to re-format Sage examples and type-setting.
This module contains three classes:
- :class:`sagenb.notebook.docHTMLProcessor.genericHTMLProcessor`:
gathers all the common methods of the other two classes.
- :class:`sagenb.notebook.docHTMLProcessor.SphinxHTMLProcessor`:
translates HTML file generated by Sphinx into a worksheet text file
- :class:`sagenb.notebook.docHTMLProcessor.docutilsHTMLProcessor`:
translates HTML file generated by docutils ``rst2html`` command into a
worksheet text file
.. NOTE::
This extension of htmllib.HTMLParser was partly inspired by Mark
Pilgrim's 'Dive Into Python' examples.
AUTHORS:
- Dorian Raymer (2006): first version
- William Stein (2007-06-10): rewrite to work with twisted Sage notebook
- Mike Hansen (2008-09-27): Rewrite to work with Sphinx HTML documentation
- Sebastien Labbe (2011-01-15): Added a new class named
docutilsHTMLProcessor used for translating the html output of the
rst2html docutils command run on a rst file into worksheet text file.
Also added a new class named genericHTMLProcessor which gathers the
common methods of both docutilsHTMLProcessor and SphinxHTMLProcessor
classes. Added lots of doctests to make its coverage 100% doctested.
EXAMPLES:
Process the output of docutils ``rst2html`` command::
sage: rst = ""
sage: rst += "Additions in Sage\n"
sage: rst += "-----------------\n"
sage: rst += "\n"
sage: rst += "Let's do easy computations with Sage::\n"
sage: rst += "\n"
sage: rst += " s" + "age: 4 + 3\n"
sage: rst += " 7\n"
sage: rst += " s" + "age: 1 - 2\n"
sage: rst += " -1\n"
sage: rst += "\n"
sage: rst += "Let's do `x^2`::\n"
sage: rst += "\n"
sage: rst += " s" + "age: x^2\n"
sage: rst += " x^2\n"
sage: from docutils.core import publish_string
sage: html = publish_string(rst, writer_name='html')
sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
sage: p = docutilsHTMLProcessor()
sage: txt = p.process_doc_html(html)
sage: len(txt)
191
sage: print(txt)
<h1 class="title">Additions in Sage</h1>
<BLANKLINE>
<BLANKLINE>
<BLANKLINE>
<p>Let's do easy computations with Sage:</p>
<BLANKLINE>
{{{id=0|
4 + 3
///
7
}}}
<BLANKLINE>
{{{id=1|
1 - 2
///
-1
}}}
<BLANKLINE>
<p>Let's do $x^2$:</p>
<BLANKLINE>
{{{id=2|
x^2
///
x^2
}}}
<BLANKLINE>
<BLANKLINE>
WARNING:
Input strings must be unicode.
"""
#############################################################################
# Copyright (C) 2007 William Stein <wstein@gmail.com> and Dorian Raimer
# Copyright (C) 2011 Sebastien Labbe <slabqc at gmail.com>
# Distributed under the terms of the GNU General Public License (GPL)
# The full text of the GPL is available at:
# http://www.gnu.org/licenses/
#############################################################################
from __future__ import unicode_literals
from html.parser import HTMLParser
from html.entities import entitydefs
from flask import Markup
from sagenb.misc.misc import unicode_str
[docs]class genericHTMLProcessor(HTMLParser):
r"""
This class gathers the methods that are common to both classes
:class:`sagenb.notebook.SphinxHTMLProcessor` and
:class:`sagenb.notebook.docutilsHTMLProcessor` .
"""
[docs] def process_doc_html(self, doc_in):
r"""
Returns processed HTML input as HTML output. This is the only
method that needs to be called externally.
INPUT:
- ``doc_in`` - a string containing properly formed HTML
OUTPUT:
- a string; the processed HTML
EXAMPLES::
sage: rst = ""
sage: rst += "Title\n"
sage: rst += "-----\n"
sage: rst += "n"
sage: rst += "Some text\n"
sage: from docutils.core import publish_string
sage: html = publish_string(rst, writer_name='html')
sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
sage: p = docutilsHTMLProcessor()
sage: txt = p.process_doc_html(html)
sage: len(txt)
51
sage: txt
u'<h1 class="title">Title</h1>\n\n<p>nSome text</p>\n\n\n\n'
"""
# self.feed() is a HTMLParser method and starts everything
# off; Most of the functions here are extensions to
# HTMLParser, and may never actually be visibly called here.
# This module works with unicode literals. In case that input data is
# ascii, exceptions may occur. So, input data must be converted to
# unicode if it were not.
doc_in = unicode_str(doc_in)
self.feed(doc_in) # HTMLParser call
self.close() # HTMLParser call
self.hand_off_temp_pieces('to_doc_pieces')
return self.all_pieces.replace('\\(', '').replace('\\)', '').replace('\\[', '').replace('\\]', '')
[docs] def hand_off_temp_pieces(self, piece_type):
r"""
To separate the documentation's content from the Sage
examples, everything is split into one of two cell types.
This method puts the current ``self.temp_pieces`` into
``self.all_pieces``.
INPUT:
- ``piece_type`` - a string; indicates the type of and how to
process the current ``self.temp_pieces``. It can be one of the
following:
- ``"to_doc_pieces"`` - put temp_pieces in all_pieces
- ``"ignore"`` - delete temp_pieces
- ``"to_cell_pieces"`` - translate temp_pieces into cells and put
it in all_pieces
EXAMPLES:
Move temporary pieces to all pieces::
sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
sage: p = docutilsHTMLProcessor()
sage: p.all_pieces = 'a lot of stuff done '
sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
sage: p.hand_off_temp_pieces('to_doc_pieces')
sage: p.all_pieces
u'a lot of stuff done bunch of tmp strings'
sage: p.temp_pieces
[]
Ignore temporary pieces::
sage: p = docutilsHTMLProcessor()
sage: p.all_pieces = 'a lot of stuff done '
sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
sage: p.hand_off_temp_pieces('ignore')
sage: p.all_pieces
'a lot of stuff done '
sage: p.temp_pieces
[]
Translate temporary pieces (starting with sage prompt) into cells::
sage: p = docutilsHTMLProcessor()
sage: p.all_pieces = 'a lot of stuff done '
sage: p.temp_pieces = ['sage'+': 4+4\n', '8\n', 'sage'+': 9-4\n', '5\n']
sage: p.hand_off_temp_pieces('to_cell_pieces')
sage: print(p.all_pieces)
a lot of stuff done
{{{id=0|
4+4
///
8
}}}
<BLANKLINE>
{{{id=1|
9-4
///
5
}}}
sage: p.temp_pieces
[]
Translate temporary pieces (not starting with sage prompt) into cells::
sage: p = docutilsHTMLProcessor()
sage: p.all_pieces = 'a lot of stuff done '
sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
sage: p.hand_off_temp_pieces('to_cell_pieces')
sage: print(p.all_pieces)
a lot of stuff done <pre class="literal-block">
bunch of tmp strings
</pre>
sage: p.temp_pieces
[]
"""
pieces = "".join(self.temp_pieces)
pieces = pieces.lstrip()
if piece_type == 'to_doc_pieces':
self.all_pieces += pieces
self.temp_pieces = []
elif piece_type == 'ignore':
self.temp_pieces = []
elif piece_type == 'to_cell_pieces':
pieces = self.process_cell_input_output(pieces)
self.all_pieces += pieces
self.temp_pieces = []
else:
raise ValueError('unknown piece_type(=%s)' % piece_type)
[docs] def get_cellcount(self):
r"""
Return the current cell count and increment it by one.
OUTPUT:
- an int
EXAMPLES::
sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
sage: d = docutilsHTMLProcessor()
sage: d.get_cellcount()
0
sage: d.get_cellcount()
1
::
sage: from sagenb.notebook.docHTMLProcessor import SphinxHTMLProcessor
sage: d = SphinxHTMLProcessor()
sage: d.get_cellcount()
0
sage: d.get_cellcount()
1
"""
self.cellcount += 1
return self.cellcount - 1
[docs] def handle_starttag(self, tag, attrs):
"""
introduced when replacing SGMLParser by HTMLParser
"""
try:
method = getattr(self, 'start_' + tag)
except AttributeError:
self.unknown_starttag(tag, attrs)
else:
method(attrs)
[docs] def handle_endtag(self, tag):
"""
introduced when replacing SGMLParser by HTMLParser
"""
try:
method = getattr(self, 'end_' + tag)
except AttributeError:
self.unknown_endtag(tag)
else:
method()
##############################################
# General tag handlers
# These just append their HTML to self.temp_pieces.
[docs] def unknown_starttag(self, tag, attrs):
r"""
INPUT:
- ``tag`` - string
- ``attrs`` - list of tuples
EXAMPLES::
sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
sage: p = docutilsHTMLProcessor()
sage: p.all_pieces = 'a lot of stuff done '
sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
sage: p.keep_data = True
sage: tag = 'style'
sage: attrs = [('type', 'text/css')]
sage: p.unknown_starttag(tag, attrs)
sage: p.all_pieces
'a lot of stuff done '
sage: p.temp_pieces
['bunch ', 'of ', 'tmp ', 'strings', u'<style type="text/css">']
"""
if self.keep_data:
strattrs = "".join([' %s="%s"' % (key, value) for key, value in attrs])
self.temp_pieces.append("<%(tag)s%(strattrs)s>" % locals())
[docs] def unknown_endtag(self, tag):
r"""
INPUT:
- ``tag`` - string
EXAMPLES::
sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
sage: p = docutilsHTMLProcessor()
sage: p.all_pieces = 'a lot of stuff done '
sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
sage: p.keep_data = True
sage: p.unknown_endtag('head')
sage: p.all_pieces
'a lot of stuff done '
sage: p.temp_pieces
['bunch ', 'of ', 'tmp ', 'strings', u'</head>']
"""
if self.keep_data:
self.temp_pieces.append("</%(tag)s>" % locals())
[docs] def handle_data(self, data):
r"""
INPUT:
- ``data`` - string
EXAMPLES::
sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
sage: p = docutilsHTMLProcessor()
sage: p.all_pieces = 'a lot of stuff done '
sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
sage: p.keep_data = True
sage: p.handle_data('some important data')
sage: p.all_pieces
'a lot of stuff done '
sage: p.temp_pieces
['bunch ', 'of ', 'tmp ', 'strings', 'some important data']
"""
if self.keep_data:
self.temp_pieces.append(data)
[docs] def handle_charref(self, ref):
r"""
INPUT:
- ``ref`` - string
EXAMPLES::
sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
sage: p = docutilsHTMLProcessor()
sage: p.all_pieces = 'a lot of stuff done '
sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
sage: p.keep_data = True
sage: p.handle_charref('160')
sage: p.all_pieces
'a lot of stuff done '
sage: p.temp_pieces
['bunch ', 'of ', 'tmp ', 'strings', u' ']
"""
if self.keep_data:
self.temp_pieces.append("&#%(ref)s;" % locals())
[docs] def handle_entityref(self, ref):
r"""
INPUT:
- ``ref`` - string
EXAMPLES::
sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
sage: p = docutilsHTMLProcessor()
sage: p.all_pieces = 'a lot of stuff done '
sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
sage: p.keep_data = True
sage: p.handle_entityref('160')
sage: p.all_pieces
'a lot of stuff done '
sage: p.temp_pieces
['bunch ', 'of ', 'tmp ', 'strings', u'&160']
"""
if self.keep_data:
self.temp_pieces.append("&%(ref)s" % locals())
if ref in entitydefs:
self.temp_pieces.append(';')
[docs] def handle_pi(self, text):
r"""
Handle processing instructions
INPUT:
- ``text`` - string
EXAMPLES::
sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
sage: p = docutilsHTMLProcessor()
sage: p.all_pieces = 'a lot of stuff done '
sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
sage: p.keep_data = True
sage: p.handle_pi('instructions')
sage: p.all_pieces
'a lot of stuff done '
sage: p.temp_pieces
['bunch ', 'of ', 'tmp ', 'strings', u'<?instructions>']
"""
if self.keep_data:
self.temp_pieces.append("<?%(text)s>" % locals())
[docs] def handle_decl(self, text):
r"""
INPUT:
- ``data`` - string
EXAMPLES::
sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
sage: p = docutilsHTMLProcessor()
sage: p.all_pieces = 'a lot of stuff done '
sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
sage: p.keep_data = True
sage: p.handle_decl('declaration')
sage: p.all_pieces
'a lot of stuff done '
sage: p.temp_pieces
['bunch ', 'of ', 'tmp ', 'strings', u'<!declaration>']
"""
if self.keep_data:
self.temp_pieces.append("<!%(text)s>" % locals())
##############################################
# Specific tag handlers
[docs] def start_body(self, attrs):
r"""
Set ``self.keep_data`` to True upon finding the opening body tag.
INPUT:
- ``attrs`` - a string:string dictionary containing the
element's attributes
EXAMPLES::
sage: from sagenb.notebook.docHTMLProcessor import SphinxHTMLProcessor
sage: d = SphinxHTMLProcessor()
sage: d.keep_data
False
sage: d.start_body(None)
sage: d.keep_data
True
::
sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
sage: d = docutilsHTMLProcessor()
sage: d.keep_data
False
sage: d.start_body(None)
sage: d.keep_data
True
"""
self.keep_data = True
[docs] def end_body(self):
r"""
EXAMPLES::
sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
sage: p = docutilsHTMLProcessor()
sage: p.all_pieces = 'a lot of stuff done '
sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
sage: p.end_body()
sage: p.all_pieces
'a lot of stuff done '
sage: p.temp_pieces
['bunch ', 'of ', 'tmp ', 'strings']
"""
pass
[docs] def end_html(self):
r"""
INPUT:
- ``data`` - string
EXAMPLES::
sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
sage: p = docutilsHTMLProcessor()
sage: p.all_pieces = 'a lot of stuff done '
sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
sage: p.end_html()
sage: p.all_pieces
'a lot of stuff done '
sage: p.temp_pieces
['bunch ', 'of ', 'tmp ', 'strings']
"""
pass
[docs]class SphinxHTMLProcessor(genericHTMLProcessor):
[docs] def reset(self):
r"""
Initialize necessary variables. Called by
:meth:`HTMLParser.__init__`.
EXAMPLES::
sage: from sagenb.notebook.docHTMLProcessor import SphinxHTMLProcessor
sage: d = SphinxHTMLProcessor() #indirect doctest
sage: d.keep_data
False
sage: d.in_highlight_div
False
sage: d.temp_pieces
[]
sage: d.all_pieces
u''
sage: d.cellcount
0
"""
# flags
self.keep_data = False # don't keep anything before the <body> tag
self.in_highlight_div = False
# lists of what the parser keeps
self.temp_pieces = []
self.all_pieces = ''
# counters
self.cellcount = 0
HTMLParser.reset(self)
#############################################
# Specific tag handlers
#
[docs] def start_div(self, attrs):
r"""
Find out if we are starting a highlighted div.
Once we hit the <div> tag in a highlighted block,
hand of all of the pieces we've encountered so far
and ignore the tag.
INPUT:
- ``attrs`` - list of tuple
EXAMPLES::
sage: from sagenb.notebook.docHTMLProcessor import SphinxHTMLProcessor
sage: p = SphinxHTMLProcessor()
sage: p.all_pieces = 'a lot of stuff done '
sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
sage: attrs = [('class', 'highlight')]
sage: p.start_div(attrs)
sage: p.all_pieces
u'a lot of stuff done bunch of tmp strings'
sage: p.temp_pieces
[]
sage: p.in_highlight_div
True
::
sage: p = SphinxHTMLProcessor()
sage: p.all_pieces = 'a lot of stuff done '
sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
sage: p.keep_data = True
sage: attrs = [('class', 'something-else')]
sage: p.start_div(attrs)
sage: p.all_pieces
'a lot of stuff done '
sage: p.temp_pieces
['bunch ', 'of ', 'tmp ', 'strings', u'<div class="something-else">']
sage: p.in_highlight_div
False
"""
for name, value in attrs:
if name.lower()=='class' and value.lower()=='highlight':
self.in_highlight_div = True
self.hand_off_temp_pieces('to_doc_pieces')
return
self.unknown_starttag('div', attrs)
[docs] def end_div(self):
r"""
Once we end the highlighted div, convert all of the pieces
to cells.
EXAMPLES::
sage: from sagenb.notebook.docHTMLProcessor import SphinxHTMLProcessor
sage: p = SphinxHTMLProcessor()
sage: p.all_pieces = 'a lot of stuff done '
sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings\n']
sage: p.keep_data = True
sage: attrs = [('class', 'highlight')]
sage: p.start_div(attrs)
sage: p.start_pre([])
sage: sprompt = 'sa' + 'ge' + ': ' # to avoid problems with doctest script
sage: p.handle_data('%s4+4\n8\n%sx^2\nx^2\n' % (sprompt, sprompt))
sage: p.end_pre()
sage: p.end_div()
sage: print(p.all_pieces)
a lot of stuff done bunch of tmp strings
{{{id=0|
4+4
///
8
}}}
<BLANKLINE>
{{{id=1|
x^2
///
x^2
}}}
sage: p.temp_pieces
[]
sage: p.in_highlight_div
False
::
sage: p = SphinxHTMLProcessor()
sage: p.all_pieces = 'a lot of stuff done '
sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
sage: p.keep_data = True
sage: attrs = [('class', 'something-else')]
sage: p.start_div(attrs)
sage: p.handle_data('some data')
sage: p.end_div()
sage: print(p.all_pieces)
a lot of stuff done
sage: p.temp_pieces
['bunch ', 'of ', 'tmp ', 'strings', u'<div class="something-else">', 'some data', u'</div>']
sage: p.in_highlight_div
False
"""
if self.in_highlight_div:
self.in_highlight_div = False
self.hand_off_temp_pieces('to_cell_pieces')
return
self.temp_pieces.append("</div>")
[docs] def start_pre(self, attrs):
r"""
Ignore tag <pre> when inside highligh div.
INPUT:
- ``attrs`` - list of tuple
EXAMPLES::
sage: from sagenb.notebook.docHTMLProcessor import SphinxHTMLProcessor
sage: p = SphinxHTMLProcessor()
sage: p.all_pieces = 'a lot of stuff done '
sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
sage: p.keep_data = True
sage: p.in_highlight_div = True
sage: attrs = []
sage: p.start_pre(attrs)
sage: p.all_pieces
'a lot of stuff done '
sage: p.temp_pieces
['bunch ', 'of ', 'tmp ', 'strings']
::
sage: from sagenb.notebook.docHTMLProcessor import SphinxHTMLProcessor
sage: p = SphinxHTMLProcessor()
sage: p.all_pieces = 'a lot of stuff done '
sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
sage: p.keep_data = True
sage: p.in_highlight_div = False
sage: attrs = []
sage: p.start_pre(attrs)
sage: p.all_pieces
'a lot of stuff done '
sage: p.temp_pieces
['bunch ', 'of ', 'tmp ', 'strings', u'<pre>']
"""
if self.in_highlight_div:
return
self.unknown_starttag('pre',attrs)
[docs] def end_pre(self):
r"""
Ignore tag </pre> when inside highligh div.
EXAMPLES::
sage: from sagenb.notebook.docHTMLProcessor import SphinxHTMLProcessor
sage: p = SphinxHTMLProcessor()
sage: p.all_pieces = 'a lot of stuff done '
sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
sage: p.keep_data = True
sage: p.in_highlight_div = True
sage: p.end_pre()
sage: p.all_pieces
'a lot of stuff done '
sage: p.temp_pieces
['bunch ', 'of ', 'tmp ', 'strings']
::
sage: p.all_pieces = 'a lot of stuff done '
sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
sage: p.keep_data = True
sage: p.in_highlight_div = False
sage: p.end_pre()
sage: p.all_pieces
'a lot of stuff done '
sage: p.temp_pieces
['bunch ', 'of ', 'tmp ', 'strings', u'</pre>']
"""
if self.in_highlight_div:
return
self.unknown_endtag('pre')
# Ignore forms
[docs] def start_span(self, attrs):
r"""
Ignore all spans that occur within highlighted blocks
INPUT:
- ``attrs`` - list of tuple
EXAMPLES::
sage: from sagenb.notebook.docHTMLProcessor import SphinxHTMLProcessor
sage: p = SphinxHTMLProcessor()
sage: p.all_pieces = 'a lot of stuff done '
sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
sage: p.keep_data = True
sage: p.in_highlight_div = True
sage: attrs = []
sage: p.start_span(attrs)
sage: p.all_pieces
'a lot of stuff done '
sage: p.temp_pieces
['bunch ', 'of ', 'tmp ', 'strings']
::
sage: from sagenb.notebook.docHTMLProcessor import SphinxHTMLProcessor
sage: p = SphinxHTMLProcessor()
sage: p.all_pieces = 'a lot of stuff done '
sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
sage: p.keep_data = True
sage: p.in_highlight_div = False
sage: attrs = []
sage: p.start_span(attrs)
sage: p.all_pieces
'a lot of stuff done '
sage: p.temp_pieces
['bunch ', 'of ', 'tmp ', 'strings', u'<span>']
"""
if self.in_highlight_div:
return
self.unknown_starttag('span', attrs)
[docs] def end_span(self):
r"""
Ignore all spans that occur within highlighted blocks
EXAMPLES::
sage: from sagenb.notebook.docHTMLProcessor import SphinxHTMLProcessor
sage: p = SphinxHTMLProcessor()
sage: p.all_pieces = 'a lot of stuff done '
sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
sage: p.keep_data = True
sage: p.in_highlight_div = True
sage: p.end_span()
sage: p.all_pieces
'a lot of stuff done '
sage: p.temp_pieces
['bunch ', 'of ', 'tmp ', 'strings']
::
sage: p.all_pieces = 'a lot of stuff done '
sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
sage: p.keep_data = True
sage: p.in_highlight_div = False
sage: p.end_span()
sage: p.all_pieces
'a lot of stuff done '
sage: p.temp_pieces
['bunch ', 'of ', 'tmp ', 'strings', u'</span>']
"""
if self.in_highlight_div:
return
self.unknown_endtag('span')
[docs]class docutilsHTMLProcessor(genericHTMLProcessor):
r"""
Translates output of the docutils parser rst2html into notebook text.
EXAMPLES::
sage: rst = ""
sage: rst += "Additions in Sage\n"
sage: rst += "-----------------\n"
sage: rst += "\n"
sage: rst += "Let's do easy computations with Sage::\n"
sage: rst += "\n"
sage: rst += " s" + "age: 4 + 3\n"
sage: rst += " 7\n"
sage: rst += " s" + "age: 1 - 2\n"
sage: rst += " -1\n"
sage: rst += "\n"
sage: rst += "Let's do `x^2`::\n"
sage: rst += "\n"
sage: rst += " s" + "age: x^2\n"
sage: rst += " x^2\n"
sage: from docutils.core import publish_string
sage: html = publish_string(rst, writer_name='html')
sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
sage: p = docutilsHTMLProcessor()
sage: txt = p.process_doc_html(html)
sage: len(txt)
191
sage: print(txt)
<h1 class="title">Additions in Sage</h1>
<BLANKLINE>
<BLANKLINE>
<BLANKLINE>
<p>Let's do easy computations with Sage:</p>
<BLANKLINE>
{{{id=0|
4 + 3
///
7
}}}
<BLANKLINE>
{{{id=1|
1 - 2
///
-1
}}}
<BLANKLINE>
<p>Let's do $x^2$:</p>
<BLANKLINE>
{{{id=2|
x^2
///
x^2
}}}
<BLANKLINE>
<BLANKLINE>
"""
[docs] def reset(self):
r"""
Initialize necessary variables. Called by
:meth:`HTMLParser.__init__`.
EXAMPLES::
sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
sage: d = docutilsHTMLProcessor() #indirect doctest
sage: d.keep_data
False
sage: d.in_pre_litteral_block
False
sage: d.in_div_footer_block
False
sage: d.temp_pieces
[]
sage: d.all_pieces
u''
sage: d.cellcount
0
"""
# flags
self.keep_data = False # don't keep anything before the <body> tag
self.in_pre_litteral_block = False
self.in_div_footer_block = False
# lists of what the parser keeps
self.temp_pieces = []
self.all_pieces = ''
# counters
self.cellcount = 0
HTMLParser.reset(self)
#############################################
# Specific tag handlers
#
# sage blocks
[docs] def start_pre(self, attrs):
r"""
INPUT:
- ``attrs`` - list of tuple
EXAMPLES::
sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
sage: p = docutilsHTMLProcessor()
sage: p.all_pieces = 'a lot of stuff done '
sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
sage: attrs = [('class', 'literal-block')]
sage: p.start_pre(attrs)
sage: p.all_pieces
u'a lot of stuff done bunch of tmp strings'
sage: p.temp_pieces
[]
sage: p.in_pre_litteral_block
True
::
sage: p = docutilsHTMLProcessor()
sage: p.all_pieces = 'a lot of stuff done '
sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
sage: p.keep_data = True
sage: attrs = [('class', 'something-else')]
sage: p.start_pre(attrs)
sage: p.all_pieces
'a lot of stuff done '
sage: p.temp_pieces
['bunch ', 'of ', 'tmp ', 'strings', u'<pre class="something-else">']
sage: p.in_pre_litteral_block
False
"""
# Find out if we are starting a pre litteral-block
for name, value in attrs:
if name.lower() == 'class' and value.lower() == 'literal-block':
self.in_pre_litteral_block = True
self.hand_off_temp_pieces('to_doc_pieces')
return
self.unknown_starttag('pre',attrs)
[docs] def end_pre(self):
r"""
EXAMPLES::
sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
sage: p = docutilsHTMLProcessor()
sage: p.all_pieces = 'a lot of stuff done '
sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
sage: p.keep_data = True
sage: attrs = [('class', 'literal-block')]
sage: p.start_pre(attrs)
sage: sprompt = 'sa' + 'ge' + ': ' # to avoid problems with doctest script
sage: p.handle_data('%s4+4\n8\n%sx^2\nx^2\n' % (sprompt, sprompt))
sage: p.end_pre()
sage: print(p.all_pieces)
a lot of stuff done bunch of tmp strings
{{{id=0|
4+4
///
8
}}}
<BLANKLINE>
{{{id=1|
x^2
///
x^2
}}}
sage: p.temp_pieces
[]
sage: p.in_pre_litteral_block
False
::
sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
sage: p = docutilsHTMLProcessor()
sage: p.all_pieces = 'a lot of stuff done '
sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
sage: p.keep_data = True
sage: attrs = [('class', 'something-else')]
sage: p.start_pre(attrs)
sage: p.handle_data('some data')
sage: p.end_pre()
sage: print(p.all_pieces)
a lot of stuff done
sage: p.temp_pieces
['bunch ', 'of ', 'tmp ', 'strings', u'<pre class="something-else">', 'some data', u'</pre>']
sage: p.in_pre_litteral_block
False
"""
if self.in_pre_litteral_block:
self.in_pre_litteral_block = False
self.hand_off_temp_pieces('to_cell_pieces')
return
self.unknown_endtag('pre')
# Ignore div
[docs] def start_div(self, attrs):
r"""
INPUT:
- ``attrs`` - list of tuple
EXAMPLES::
sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
sage: p = docutilsHTMLProcessor()
sage: p.all_pieces = 'a lot of stuff done '
sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
sage: attrs = [('class', 'document'), ('id', 'title')]
sage: p.start_div(attrs)
sage: p.all_pieces
'a lot of stuff done '
sage: p.temp_pieces
['bunch ', 'of ', 'tmp ', 'strings']
"""
# Find out if we are starting a div footer block
for name, value in attrs:
if name.lower()=='class' and value.lower()=='footer':
self.hand_off_temp_pieces('to_doc_pieces')
self.keep_data = False
self.in_div_footer_block = True
return
return
[docs] def end_div(self):
r"""
EXAMPLES::
sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
sage: p = docutilsHTMLProcessor()
sage: p.all_pieces = 'a lot of stuff done '
sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
sage: p.end_div()
sage: p.all_pieces
'a lot of stuff done '
sage: p.temp_pieces
['bunch ', 'of ', 'tmp ', 'strings']
"""
if self.in_div_footer_block:
self.in_div_footer_block = False
self.keep_data = True
return
# latex role
[docs] def start_cite(self, attrs):
r"""
INPUT:
- ``attrs`` - list of tuple
EXAMPLES::
sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
sage: p = docutilsHTMLProcessor()
sage: p.all_pieces = 'a lot of stuff done '
sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
sage: p.keep_data = True
sage: attrs = []
sage: p.start_cite(attrs)
sage: p.all_pieces
'a lot of stuff done '
sage: p.temp_pieces
['bunch ', 'of ', 'tmp ', 'strings', u'$']
"""
self.temp_pieces.append("$")
return
[docs] def end_cite(self):
r"""
EXAMPLES::
sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
sage: p = docutilsHTMLProcessor()
sage: p.all_pieces = 'a lot of stuff done '
sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
sage: p.keep_data = True
sage: p.start_cite([])
sage: p.handle_data('x^2')
sage: p.end_cite()
sage: p.all_pieces
'a lot of stuff done '
sage: p.temp_pieces
['bunch ', 'of ', 'tmp ', 'strings', u'$', 'x^2', u'$']
"""
self.temp_pieces.append("$")
return
# script (for example for mathjax)
[docs] def start_script(self, attrs):
r"""
INPUT:
- ``attrs`` - list of tuple
EXAMPLES::
sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
sage: p = docutilsHTMLProcessor()
sage: p.all_pieces = 'a lot of stuff done '
sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
sage: attrs = [('type', 'text/x-mathjax-config')]
sage: p.start_script(attrs)
sage: p.all_pieces
'a lot of stuff done '
sage: p.temp_pieces
['bunch ', 'of ', 'tmp ', 'strings']
sage: p.keep_data
False
"""
self.keep_data = False
return
[docs] def end_script(self):
r"""
EXAMPLES::
sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
sage: p = docutilsHTMLProcessor()
sage: p.all_pieces = 'a lot of stuff done '
sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
sage: p.end_script()
sage: p.all_pieces
'a lot of stuff done '
sage: p.temp_pieces
['bunch ', 'of ', 'tmp ', 'strings']
sage: p.keep_data
True
"""
self.keep_data = True
return