# -*- coding: utf-8 -*
"""
A Filesystem-based Sage Notebook Datastore
Here is the filesystem layout for this datastore. Note that the all
of the pickles are pickles of basic Python objects, so can be
unpickled in any version of Python with or without Sage or the Sage
notebook installed. They are also not compressed, so are reasonably
easy to read ASCII.
The filesystem layout is as follows. It mirrors the URL's used by the
Sage notebook server::
sage_notebook.sagenb
conf.pickle
users.pickle
openid.pickle (optional)
readonly.txt (optional)
home/
username0/
history.pickle
id_number0/
worksheet.html
worksheet_conf.pickle
cells/
data/
snapshots/
id_number1/
worksheet.html
worksheet_conf.pickle
cells/
data/
snapshots/
...
username1/
...
"""
import copy
import shutil
import tarfile
import tempfile
import os
try:
import cPickle as pickle
except ImportError:
import pickle
from six import iteritems
from .abstract_storage import Datastore
from sagenb.misc.misc import set_restrictive_permissions, encoded_str
from sage.misc.temporary_file import atomic_write
[docs]def is_safe(a):
"""
Used when importing contents of various directories from Sage
worksheet files. We define this function to avoid the possibility
of a user crafting fake sws file such that extracting it creates
files outside where we want, e.g., by including .. or / in the
path of some file.
"""
# NOTE: Windows port -- I'm worried about whether a.name will have
# / or \ on windows. The code below assume \.
return '..' not in a and not a.startswith('/')
[docs]class FilesystemDatastore(Datastore):
def __init__(self, path):
"""
INPUT:
- ``path`` -- string, path to this datastore
EXAMPLES::
sage: from sagenb.storage import FilesystemDatastore
sage: FilesystemDatastore(tmp_dir())
Filesystem Sage Notebook Datastore at ...
"""
path = os.path.abspath(path)
self._path = path
self._makepath(os.path.join(self._path, 'home'))
self._home_path = 'home'
self._conf_filename = 'conf.pickle'
self._users_filename = 'users.pickle'
self._readonly_filename = 'readonly.txt'
self._readonly_mtime = 0
self._readonly = None
def __repr__(self):
return "Filesystem Sage Notebook Datastore at %s"%self._path
#########################################################################
# Paths
#########################################################################
def _makepath(self, path):
p = self._abspath(path)
try:
os.makedirs(p)
except OSError:
if not os.path.isdir(p):
raise
return path
def _deep_user_path(self, username):
from hashlib import md5
h = md5(username).hexdigest()
base = ['__store__', h[:1], h[:2], h[:3], h[:4]]
path = os.path.join(*base)
self._makepath(self._abspath(os.path.join(self._home_path, path)))
return os.path.join(path, username)
def _user_path(self, username):
# There are weird cases, e.g., old notebook server migration
# where username is None, and if we don't string it here,
# saving can be broken (at a bad moment!).
# There are also some cases where the username could have unicode in it.
username = str(username)
home = self._abspath(self._home_path)
path = os.path.join(home, username)
if not os.path.islink(path):
self._makepath(home)
old_dir = os.getcwd()
os.chdir(home)
new_path = self._deep_user_path(username)
# Ensure that new_path exists:
if os.path.exists(path):
# If the old path exists, move it to the new path.
# If both the old and new path exist, that's an error
# and this will raise an exception.
os.rename(path, new_path)
else:
# Otherwise, simply create the new path.
self._makepath(os.path.join(home, new_path))
# new_path now points to the actual directory
os.symlink(new_path, username)
os.chdir(old_dir)
return path
def _worksheet_pathname(self, username, id_number):
return os.path.join(self._user_path(username), str(id_number))
def _worksheet_path(self, username, id_number=None):
if id_number is None:
return self._makepath(self._user_path(username))
return self._makepath(self._worksheet_pathname(username, id_number))
def _worksheet_conf_filename(self, username, id_number):
return os.path.join(self._worksheet_path(username, id_number), 'worksheet_conf.pickle')
def _worksheet_html_filename(self, username, id_number):
return os.path.join(self._worksheet_path(username, id_number), 'worksheet.html')
def _history_filename(self, username):
return os.path.join(self._user_path(username), 'history.pickle')
def _abspath(self, file):
"""
Return absolute path to filename got by joining self._path
with the string file.
OUTPUT:
-- ``string``
EXAMPLES::
sage: from sagenb.storage import FilesystemDatastore
sage: FilesystemDatastore(tmp_dir())._abspath('foo.pickle')
'...foo.pickle'
"""
return os.path.join(self._path, file)
#########################################################################
# Loading and saving basic Python objects to disk.
# The input filename is always relative to self._path.
#########################################################################
def _load(self, filename):
with open(self._abspath(filename), 'rb') as f:
result = pickle.load(f)
return result
def _save(self, obj, filename):
"""
TESTS:
Check that interrupting ``_save`` is safe::
sage: from sagenb.storage.filesystem_storage import FilesystemDatastore
sage: D = FilesystemDatastore(tmp_dir())
sage: fn = tmp_filename()
sage: s = "X" * 100000
sage: D._save(s, fn)
sage: try: # long time
....: alarm(1)
....: while True:
....: D._save(s, fn)
....: except (AlarmInterrupt, OSError, AttributeError):
....: # OSError could happen due to a double close() in
....: # Python's tempfile module.
....: # AttributeError could happen due to interrupting
....: # in _TemporaryFileWrapper.__init__
....: # (see https://trac.sagemath.org/ticket/22423)
....: pass
sage: len(D._load(fn))
100000
"""
s = pickle.dumps(obj)
if len(s) == 0:
raise ValueError("Invalid Pickle")
with atomic_write(self._abspath(filename), binary=True) as f:
f.write(s)
def _permissions(self, filename):
f = self._abspath(filename)
if os.path.exists(f):
set_restrictive_permissions(f, allow_execute=False)
#########################################################################
# Conversions to and from basic Python database (so that json
# storage will work).
#########################################################################
def _basic_to_users(self, obj):
from sagenb.notebook.user import User_from_basic
return dict([(name, User_from_basic(basic)) for name, basic in obj])
def _users_to_basic(self, users):
new = sorted([[name, U.basic()] for name, U in iteritems(users)])
return new
def _basic_to_server_conf(self, obj):
from sagenb.notebook.server_conf import ServerConfiguration_from_basic
return ServerConfiguration_from_basic(obj)
def _server_conf_to_basic(self, server):
return server.basic()
def _basic_to_worksheet(self, obj):
"""
Given a basic Python object obj, return corresponding worksheet.
"""
from sagenb.notebook.worksheet import Worksheet_from_basic
path = self._abspath(self._worksheet_path(obj['owner']))
return Worksheet_from_basic(obj, path)
def _worksheet_to_basic(self, worksheet):
"""
Given a worksheet, create a corresponding basic Python object
that completely defines that worksheet.
"""
return worksheet.basic()
#########################################################################
# Now we implement the API we're supposed to implement
#########################################################################
[docs] def load_server_conf(self):
return self._basic_to_server_conf(self._load('conf.pickle'))
[docs] def save_server_conf(self, server_conf):
"""
INPUT:
- ``server`` --
"""
basic = self._server_conf_to_basic(server_conf)
self._save(basic, 'conf.pickle')
self._permissions('conf.pickle')
[docs] def load_openid(self):
"""
Loads an open_id dict read from the disk.
"""
return self._load('openid.pickle')
[docs] def save_openid(self, openid_dict):
"""
Saves an open_id dict to the disk.
"""
self._save(openid_dict, 'openid.pickle')
self._permissions('openid.pickle')
[docs] def load_users(self, user_manager):
"""
OUTPUT:
- dictionary of user info
EXAMPLES::
sage: from sagenb.notebook.user import User
sage: from sagenb.notebook.user_manager import SimpleUserManager
sage: U = SimpleUserManager()
sage: users = {'admin':User('admin','abc','a@b.c','admin'), 'wstein':User('wstein','xyz','b@c.d','user')}
sage: from sagenb.storage import FilesystemDatastore
sage: ds = FilesystemDatastore(tmp_dir())
sage: ds.save_users(users)
sage: 'users.pickle' in os.listdir(ds._path)
True
sage: users = ds.load_users(U)
sage: U.users()
{'admin': admin, 'wstein': wstein}
"""
for user in self._basic_to_users(self._load('users.pickle')).values():
user_manager.add_user_object(user, force=True)
user_manager.set_password(user.username(), user.password(), encrypt = False)
return user_manager
[docs] def save_users(self, users):
"""
INPUT:
- ``users`` -- dictionary mapping user names to users
EXAMPLES::
sage: from sagenb.notebook.user import User
sage: from sagenb.notebook.user_manager import SimpleUserManager
sage: U = SimpleUserManager()
sage: users = {'admin':User('admin','abc','a@b.c','admin'), 'wstein':User('wstein','xyz','b@c.d','user')}
sage: from sagenb.storage import FilesystemDatastore
sage: ds = FilesystemDatastore(tmp_dir())
sage: ds.save_users(users)
sage: 'users.pickle' in os.listdir(ds._path)
True
sage: users = ds.load_users(U)
sage: U.users()
{'admin': admin, 'wstein': wstein}
"""
self._save(self._users_to_basic(users), 'users.pickle')
self._permissions('users.pickle')
[docs] def load_user_history(self, username):
"""
Return the history log for the given user.
INPUT:
- ``username`` -- string
OUTPUT:
- list of strings
"""
filename = self._history_filename(username)
if not os.path.exists(self._abspath(filename)):
return []
return self._load(filename)
[docs] def save_user_history(self, username, history):
"""
Save the history log (a list of strings) for the given user.
INPUT:
- ``username`` -- string
- ``history`` -- list of strings
"""
filename = self._history_filename(username)
self._save(history, filename)
self._permissions(filename)
[docs] def save_worksheet(self, worksheet, conf_only=False):
"""
INPUT:
- ``worksheet`` -- a Sage worksheet
- ``conf_only`` -- default: False; if True, only save
the config file, not the actual body of the worksheet
EXAMPLES::
sage: from sagenb.notebook.worksheet import Worksheet
sage: tmp = tmp_dir()
sage: W = Worksheet('test', 2, tmp, system='gap', owner='sageuser')
sage: from sagenb.storage import FilesystemDatastore
sage: DS = FilesystemDatastore(tmp)
sage: DS.save_worksheet(W)
"""
username = worksheet.owner(); id_number = worksheet.id_number()
basic = self._worksheet_to_basic(worksheet)
if not hasattr(worksheet, '_last_basic') or worksheet._last_basic != basic:
# only save if changed
self._save(basic, self._worksheet_conf_filename(username, id_number))
worksheet._last_basic = basic
if not conf_only and worksheet.body_is_loaded():
# only save if loaded
# todo -- add check if changed
filename = self._worksheet_html_filename(username, id_number)
with atomic_write(self._abspath(filename)) as f:
f.write(worksheet.body().encode('utf-8', 'ignore'))
[docs] def create_worksheet(self, username, id_number):
"""
Create worksheet with given id_number belonging to the given user.
If the worksheet already exists, return ValueError.
INPUT:
- ``username`` -- string
- ``id_number`` -- integer
OUTPUT:
- a worksheet
"""
filename = self._worksheet_html_filename(username, id_number)
html_file = self._abspath(filename)
if os.path.exists(html_file):
raise ValueError("Worksheet %s/%s already exists"%(username, id_number))
# We create the worksheet
W = self._basic_to_worksheet({'owner':username, 'id_number':id_number})
W.clear()
return W
[docs] def load_worksheet(self, username, id_number):
"""
Return worksheet with given id_number belonging to the given
user.
If the worksheet does not exist, return ValueError.
INPUT:
- ``username`` -- string
- ``id_number`` -- integer
OUTPUT:
- a worksheet
"""
# Prevent arbitrary directories from being created by
# self.__worksheet_html_filename
dirname = self._worksheet_pathname(username, id_number)
if not os.path.exists(dirname):
raise ValueError("Worksheet %s/%s does not exist"%(username, id_number))
filename = self._worksheet_html_filename(username, id_number)
html_file = self._abspath(filename)
if not os.path.exists(html_file):
raise ValueError("Worksheet %s/%s does not exist"%(username, id_number))
try:
basic = self._load(self._worksheet_conf_filename(username, id_number))
basic['owner'] = username
basic['id_number'] = id_number
W = self._basic_to_worksheet(basic)
W._last_basic = basic # cache
except Exception:
#the worksheet conf loading didn't work, so we make up one
import traceback
print("Warning: problem loading config for %s/%s; using default config: %s" % (username, id_number, traceback.format_exc()))
W = self._basic_to_worksheet({'owner':username, 'id_number': id_number})
if username=='_sage_':
# save the default configuration, since this may be loaded by a random other user
# since *anyone* looking at docs will load all _sage_ worksheets
print("Saving default configuration (overwriting corrupt configuration) for %s/%s" % (username, id_number))
self.save_worksheet(W, conf_only=True)
return W
[docs] def export_worksheet(self, username, id_number, filename, title):
"""
Export the worksheet with given username and id_number to the
given filename (e.g., 'worksheet.sws').
INPUT:
- ``title`` - title to use for the exported worksheet (if
None, just use current title)
"""
T = tarfile.open(filename, 'w:bz2')
worksheet = self.load_worksheet(username, id_number)
basic = copy.deepcopy(self._worksheet_to_basic(worksheet))
if title:
# change the title
basic['name'] = title
basic['name'] = encoded_str(basic['name'])
# Remove metainformation that perhaps shouldn't be distributed
for k in ['owner', 'ratings', 'worksheet_that_was_published', 'viewers', 'tags', 'published_id_number',
'collaborators', 'auto_publish']:
if k in basic:
del basic[k]
self._save(basic, self._worksheet_conf_filename(username, id_number) + '2')
tmp = self._abspath(self._worksheet_conf_filename(username, id_number) + '2')
T.add(tmp, os.path.join('sage_worksheet','worksheet_conf.pickle'))
os.unlink(tmp)
worksheet_html = self._abspath(self._worksheet_html_filename(username, id_number))
T.add(worksheet_html, os.path.join('sage_worksheet','worksheet.html'))
# The following is purely for backwards compatibility with old
# notebook servers prior to sage-4.1.2.
fd, worksheet_txt = tempfile.mkstemp()
old_heading = "%s\nsystem:%s\n"%(basic['name'], basic['system'])
with open(worksheet_txt,'w') as f:
with open(worksheet_html) as g:
f.write(old_heading + g.read())
T.add(worksheet_txt,
os.path.join('sage_worksheet','worksheet.txt'))
os.unlink(worksheet_txt)
# important, so we don't leave an open file handle!
os.close(fd)
# end backwards compat block.
# Add the contents of the DATA directory
path = self._abspath(self._worksheet_pathname(username, id_number))
data = os.path.join(path, 'data')
if os.path.exists(data):
for X in os.listdir(data):
T.add(os.path.join(data, X), os.path.join('sage_worksheet','data',X))
# Add the contents of each of the cell directories.
cells = os.path.join(path, 'cells')
if os.path.exists(cells):
for X in os.listdir(cells):
T.add(os.path.join(cells, X), os.path.join('sage_worksheet','cells',X))
# NOTE: We do not export the snapshot/undo data. People
# frequently *complain* about Sage exporting a record of their
# mistakes anyways.
T.close()
def _import_old_worksheet(self, username, id_number, filename):
"""
Import a worksheet from an old version of Sage.
"""
T = tarfile.open(filename, 'r:bz2')
members = [a for a in T.getmembers() if 'worksheet.txt' in a.name and is_safe(a.name)]
if len(members) == 0:
raise RuntimeError("unable to import worksheet")
worksheet_txt = members[0].name
W = self.load_worksheet(username, id_number)
W.edit_save_old_format(T.extractfile(worksheet_txt).read().decode('utf-8', 'ignore'))
# '/' is right, since old worksheets always unix
dir = worksheet_txt.split('/')[0]
path = self._abspath(self._worksheet_pathname(username, id_number))
base = os.path.join(dir,'data')
members = [a for a in T.getmembers() if a.name.startswith(base) and is_safe(a.name)]
if len(members) > 0:
T.extractall(path, members)
dest = os.path.join(path, 'data')
if os.path.exists(dest): shutil.rmtree(dest)
shutil.move(os.path.join(path,base), path)
base = os.path.join(dir,'cells')
members = [a for a in T.getmembers() if a.name.startswith(base) and is_safe(a.name)]
if len(members) > 0:
T.extractall(path, members)
dest = os.path.join(path, 'cells')
if os.path.exists(dest): shutil.rmtree(dest)
shutil.move(os.path.join(path, base), path)
tmp = os.path.join(path, dir)
if os.path.exists(tmp):
shutil.rmtree(tmp)
T.close()
return W
[docs] def import_worksheet(self, username, id_number, filename):
"""
Import the worksheet username/id_number from the file with
given filename.
"""
path = self._abspath(self._worksheet_pathname(username, id_number))
if os.path.exists(path):
shutil.rmtree(path, ignore_errors=True)
os.makedirs(path)
T = tarfile.open(filename, 'r:bz2')
try:
with open(self._abspath(self._worksheet_conf_filename(username, id_number)),'w') as f:
f.write(T.extractfile(os.path.join('sage_worksheet','worksheet_conf.pickle')).read())
except KeyError:
# Not a valid worksheet. This might mean it is an old
# worksheet from a previous version of Sage.
return self._import_old_worksheet(username, id_number, filename)
with open(self._abspath(self._worksheet_html_filename(username, id_number)),'w') as f:
f.write(T.extractfile(os.path.join('sage_worksheet','worksheet.html')).read())
base = os.path.join('sage_worksheet','data')
members = [a for a in T.getmembers() if a.name.startswith(base) and is_safe(a.name)]
if len(members) > 0:
T.extractall(path, members)
shutil.move(os.path.join(path,base), path)
base = os.path.join('sage_worksheet','cells')
members = [a for a in T.getmembers() if a.name.startswith(base) and is_safe(a.name)]
if len(members) > 0:
T.extractall(path, members)
shutil.move(os.path.join(path, base), path)
tmp = os.path.join(path, 'sage_worksheet')
if os.path.exists(tmp):
shutil.rmtree(tmp)
T.close()
return self.load_worksheet(username, id_number)
[docs] def worksheets(self, username):
"""
Return list of all the worksheets belonging to the user with
given name. If the given user does not exists, an empty list
is returned.
EXAMPLES: The load_user_data function must be defined in the
derived class::
sage: from sagenb.storage import FilesystemDatastore
sage: tmp = tmp_dir()
sage: FilesystemDatastore(tmp).worksheets('foobar')
[]
sage: from sagenb.notebook.worksheet import Worksheet
sage: W = Worksheet('test', 2, tmp, system='gap', owner='sageuser')
sage: from sagenb.storage import FilesystemDatastore
sage: DS = FilesystemDatastore(tmp)
sage: DS.save_worksheet(W)
sage: DS.worksheets('sageuser')
[sageuser/2: [Cell 0: in=, out=]]
"""
path = self._abspath(self._user_path(username))
if not os.path.exists(path): return []
v = []
for id_number in os.listdir(path):
if id_number.isdigit():
try:
v.append(self.load_worksheet(username, int(id_number)))
except Exception:
import traceback
print("Warning: problem loading %s/%s: %s" % (username, id_number, traceback.format_exc()))
return v
[docs] def readonly_user(self, username):
"""
Each line of the readonly file has a username.
"""
filename = os.path.join(self._path, self._readonly_filename)
if not os.path.exists(filename):
return False
mtime = os.path.getmtime(filename)
if mtime > self._readonly_mtime:
with open(filename) as f:
self._readonly = set(line for line in (l.strip() for l in f) if len(line)>0)
self._readonly_mtime = mtime
return username in self._readonly
[docs] def delete(self):
"""
Delete all files associated with this datastore. Dangerous!
This is only here because it is useful for doctesting.
"""
shutil.rmtree(self._path, ignore_errors=True)
##############################################################################
#
# Why not use JSON, YAML, or XML??
#
# I experimented with using these, but they are 10-100 times slower,
# and there is no real benefit. More precisely, the time for
# dumping/loading a worksheet basic datastructure in each of the
# following is given below. XML is also very bad compared to cPickle.
#
# cPickle,
# pickle
# json
# yaml
# yaml + C
#
# This is all on OS X 10.6 64-bit. Here b = w.basic() for any worksheet w.
#
# sage: import cPickle
# sage: timeit('cPickle.loads(cPickle.dumps(b))')
# 625 loops, best of 3: 51.9 us (microseconds) per loop
# sage: import pickle
# sage: timeit('pickle.loads(pickle.dumps(b))')
# 625 loops, best of 3: 464 us (microseconds) per loop
# sage: import json
# sage: timeit('json.loads(json.dumps(b))')
# 625 loops, best of 3: 449 us (microseconds) per loop
# sage: timeit('json.loads(json.dumps(b,indent=4))')
# 625 loops, best of 3: 625 us (microseconds) per loop
# sage: import yaml
# sage: timeit('yaml.load(yaml.dump(b))')
# 25 loops, best of 3: 13.5 ms per loop
# sage: from yaml import load, dump
# sage: from yaml import CLoader as Loader
# sage: from yaml import CDumper as Dumper
# sage: timeit('yaml.load(yaml.dump(b,Dumper=Dumper),Loader=Loader)') # c++ yaml
# 125 loops, best of 3: 1.77 ms per loop
#
# Other problems: json.load(json.dump(b)) != b, because of unicode and
# all kinds of weirdness
# Yaml C library is hard to install; and yaml itself is not included in python (json is).
# Anyway, the difference between 2-13ms and 52 microseconds is significant.
# At 2ms, 100,000 worksheets takes 200 seconds, versus only 5 seconds
# at 52 microseconds. cPickle just can't be beat.
#
# NOTE! Actually simplejson does just as well at cPickle for this benchmark.
# Thanks to Mitesh Patel for pointing this out.
#
#############################################################################