A Python implementation of the Twee compiler.

"""
A Python implementation of the Twee compiler.

This code was written by Chris Klimas <[email protected]>
It is licensed under the GNU General Public License v2
http://creativecommons.org/licenses/GPL/2.0/

This file defines two classes: Tiddler and TiddlyWiki. These match what
you'd normally see in a TiddlyWiki; the goal here is to provide classes
that translate between Twee and TiddlyWiki output seamlessly.
"""

import re, time, locale, os, codecs
import tweeregex
from tweelexer import TweeLexer

class TiddlyWiki(object):
"""An entire TiddlyWiki."""

def __init__(self):
self.tiddlers = {}
self.storysettings = {}

def hasTiddler(self, name):
return name in self.tiddlers

def toTwee(self, order = None):
"""Returns Twee source code for this TiddlyWiki.
The 'order' argument is a sequence of passage titles specifying the order
in which passages should appear in the output string; by default passages
are returned in arbitrary order.
"""
tiddlers = self.tiddlers
if order is None:
order = tiddlers.keys()
return u''.join(tiddlers[i].toTwee() for i in order)

def read(self, filename):
try:
source = codecs.open(filename, 'rU', 'utf_8_sig', 'strict')
w = source.read()
except UnicodeDecodeError:
try:
source = codecs.open(filename, 'rU', 'utf16', 'strict')
w = source.read()
except:
source = open(filename, 'rU')
w = source.read()
source.close()
return w

def toHtml(self, app, header = None, order = None, startAt = '', defaultName = '', metadata = {}):
"""Returns HTML code for this TiddlyWiki."""
if not order: order = self.tiddlers.keys()
output = u''

if not header:
app.displayError("building: no story format was specified.\n"
+ "Please select another format from the Story Format submenu",
stacktrace = False)
return

try:
headerPath = header.path + 'header.html'
# TODO: Move file reading to Header class.
output = self.read(headerPath)

except IOError:
app.displayError("building: the story format '" + header.label + "' isn't available.\n"
+ "Please select another format from the Story Format submenu",
stacktrace = False)
return

def insertEngine(app, output, filename, label, extra = ''):
if output.count(label) > 0:
try:
enginecode = self.read(filename)
return output.replace(label,enginecode + extra)

except IOError:
app.displayError("building: the file '" + filename + "' used by the story format '" + header.label + "' wasn't found",
stacktrace = False)
return ''
else:
return output

# Insert version number
output = output.replace('"VERSION"', "Made in " + app.NAME + " " + app.VERSION)

# Insert timestamp
# Due to Windows limitations, the timezone offset must be computed manually.
tz_offset = (lambda t: '%s%02d%02d' % (('+' if t <= 0 else '-',) + divmod(abs(t) / 60, 60)))(time.timezone)
# Obtain the encoding expected to be used by strftime in this locale
strftime_encoding = locale.getlocale(locale.LC_TIME)[1] or locale.getpreferredencoding()
# Write the timestamp
output = output.replace('"TIME"', "Built on "+time.strftime("%d %b %Y at %H:%M:%S, "+tz_offset).decode(strftime_encoding))

# Insert the test play "start at passage" value
if startAt:
output = output.replace('"START_AT"', '"' + startAt.replace('\\', r'\\').replace('"', '\"') + '"')
else:
output = output.replace('"START_AT"', '""')

# Embed any engine related files required by the header.

embedded = header.filesToEmbed()
for key in embedded.keys():
output = insertEngine(app, output, embedded[key], key)
if not output: return ''

# Insert the Backup Story Title

if defaultName:
name = defaultName.replace('"',r'\"')
# Just gonna assume the <title> has no attributes
output = re.sub(r'<title>.*?<\/title>', '<title>'+name+'</title>', output, count=1, flags=re.I|re.M) \
.replace('"Untitled Story"', '"'+name+'"')

# Insert the metadata

metatags = ''
for name, content in metadata.iteritems():
if content:
metatags += '<meta name="' + name.replace('"','"') + '" content="' + content.replace('"','"') + '">\n'

if metatags:
output = re.sub(r'<\/title>\s*\n?', lambda a: a.group(0) + metatags, output, flags=re.I, count=1)

# Check if the scripts are personally requesting jQuery or Modernizr
jquery = 'jquery' in self.storysettings and self.storysettings['jquery'] != "off"
modernizr = 'modernizr' in self.storysettings and self.storysettings['modernizr'] != "off"
blankCSS = 'blankcss' in self.storysettings and self.storysettings['blankcss'] != "off"

for i in filter(lambda a: (a.isScript() or a.isStylesheet()), self.tiddlers.itervalues()):
if not jquery and i.isScript() and re.search(r'requires? jquery', i.text, re.I):
jquery = True
if not modernizr and re.search(r'requires? modernizr', i.text, re.I):
modernizr = True
if not blankCSS and i.isStylesheet() and re.search(r'blank stylesheet', i.text, re.I):
blankCSS = True
if jquery and modernizr and not blankCSS:
break

# Insert jQuery
if jquery:
output = insertEngine(app, output, app.builtinTargetsPath + 'jquery.js', '"JQUERY"')
if not output: return
else:
output = output.replace('"JQUERY"','')

# Insert Modernizr
if modernizr:
output = insertEngine(app, output, app.builtinTargetsPath + 'modernizr.js', '"MODERNIZR"')
if not output: return
else:
output = output.replace('"MODERNIZR"','')

# Remove default CSS
if blankCSS:
# Just gonna assume the html id is quoted correctly if at all.
output = re.sub(r'<style\s+id=["\']?defaultCSS["\']?\s*>(?:[^<]|<(?!\/style>))*<\/style>', '', output, flags=re.I|re.M, count=1)

rot13 = 'obfuscate' in self.storysettings and \
self.storysettings['obfuscate'] != 'off'
# In case it was set to "swap" (legacy 1.4.1 file),
# alter and remove old properties.
if rot13:
self.storysettings['obfuscate'] = "rot13"
if 'obfuscatekey' in self.storysettings:
del self.storysettings['obfuscatekey']

# Finally add the passage data
storyfragments = []
for i in order:
tiddler = self.tiddlers[i]
# Strip out comments from storysettings and reflect any alterations made
if tiddler.title == 'StorySettings':
tiddler.text = ''.join([(str(k)+":"+str(v)+"\n") for k,v in self.storysettings.iteritems()])
if self.NOINCLUDE_TAGS.isdisjoint(tiddler.tags):
storyfragments.append(tiddler.toHtml(rot13 and tiddler.isObfuscateable()))
storycode = u''.join(storyfragments)

if output.count('"STORY_SIZE"') > 0:
output = output.replace('"STORY_SIZE"', '"' + str(len(storyfragments)) + '"')

if output.count('"STORY"') > 0:
output = output.replace('"STORY"', storycode)
else:
output += storycode
if header:
footername = header.path + 'footer.html'
if os.path.exists(footername):
output += self.read(footername)
else:
output += '</div></body></html>'

return output

def toRtf(self, order = None):
"""Returns RTF source code for this TiddlyWiki."""
if not order: order = self.tiddlers.keys()

def rtf_encode_char(unicodechar):
if ord(unicodechar) < 128:
return str(unicodechar)
return r'\u' + str(ord(unicodechar)) + r'?'

def rtf_encode(unicodestring):
return r''.join(rtf_encode_char(c) for c in unicodestring)

# preamble

output = r'{\rtf1\ansi\ansicpg1251' + '\n'
output += r'{\fonttbl\f0\fswiss\fcharset0 Arial;\f1\fmodern\fcharset0 Courier;}' + '\n'
output += r'{\colortbl;\red128\green128\blue128;\red51\green51\blue204;}' + '\n'
output += r'\margl1440\margr1440\vieww9000\viewh8400\viewkind0' + '\n'
output += r'\pard\tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\tx792' + '\n'
output += r'\tx8640\ql\qnatural\pardirnatural\pgnx720\pgny720' + '\n'

# content

for i in order:
# Handle the situation where items are in the order set but not in the tiddlers set.
if i not in self.tiddlers:
continue
text = rtf_encode(self.tiddlers[i].text)
text = re.sub(r'\n', '\\\n', text) # newlines
text = re.sub(tweeregex.LINK_REGEX, r'\\b\cf2 \ul \1\ulnone \cf0 \\b0 ', text) # links
text = re.sub(r"''(.*?)''", r'\\b \1\\b0 ', text) # bold
text = re.sub(r'\/\/(.*?)\/\/', r'\i \1\i0 ', text) # italics
text = re.sub(r"\^\^(.*?)\^\^", r'\\super \1\\nosupersub ', text) # sup
text = re.sub(r"~~(.*?)~~", r'\\sub \1\\nosupersub ', text) # sub
text = re.sub(r"==(.*?)==", r'\\strike \1\\strike0 ', text) # strike
text = re.sub(r'(\<\<.*?\>\>)', r'\\f1\cf1 \1\cf0\\f0 ', text) # macros
text = re.sub(tweeregex.HTML_REGEX, r'\\f1\cf1 \g<0>\cf0\\f0 ', text) # macros
text = re.sub(tweeregex.MONO_REGEX, r'\\f1 \1\\f0 ', text) # monospace
text = re.sub(tweeregex.COMMENT_REGEX, '', text) # comments

output += r'\fs24\b1 ' + rtf_encode(self.tiddlers[i].title) + r'\b0\fs20 ' + '\\\n'
output += text + '\\\n\\\n'

output += '}'

return output

def addTwee(self, source):
"""Adds Twee source code to this TiddlyWiki.
Returns the tiddler titles in the order they occurred in the Twee source.
"""
source = source.replace("\r\n", "\n")
source = '\n' + source
tiddlers = source.split('\n::')[1:]

order = []
for i in tiddlers:
tiddler = Tiddler('::' + i)
self.addTiddler(tiddler)
order.append(tiddler.title)
return order

def addHtml(self, source):
"""Adds HTML source code to this TiddlyWiki.
Returns the tiddler titles in the order they occurred in the HTML.
"""
order = []
divs = re.search(r'<div\s+id=(["\']?)store(?:A|-a)rea\1(?:\s+data-size=(["\']?)\d+\2)?(?:\s+hidden)?\s*>(.*)</div>', source,
re.DOTALL)
if divs:
divs = divs.group(3)
# HTML may be obfuscated.
obfuscatekey = ''
storysettings_re = r'[^>]*\stiddler=["\']?StorySettings["\']?[^>]*>.*?</div>'
storysettings = re.search(r'<div'+storysettings_re, divs, re.DOTALL)
if storysettings:
ssTiddler = self.addTiddler(Tiddler(storysettings.group(0), 'html'))
obfuscate = re.search(r'obfuscate\s*:\s*((?:[^\no]|o(?!ff))*)\s*(?:\n|$)', ssTiddler.text, re.I)
if obfuscate:
if "swap" in obfuscate.group(1):
# Find the legacy 'obfuscatekey' option from 1.4.0.
match = re.search(r'obfuscatekey\s*:\s*(\w*)\s*(?:\n|$)', ssTiddler.text, re.I)
if match:
obfuscatekey = match.group(1)
nss = u''
for nsc in obfuscatekey:
if nss.find(nsc) == -1 and not nsc in ':\\\"n0':
nss = nss + nsc
obfuscatekey = nss
else:
obfuscatekey = "anbocpdqerfsgthuivjwkxlymz"
divs = divs[:storysettings.start(0)] + divs[storysettings.end(0):]

for div in divs.split('<div'):
div.strip()
if div:
tiddler = Tiddler('<div' + div, 'html', obfuscatekey)
self.addTiddler(tiddler)
order.append(tiddler.title)
return order

def addHtmlFromFilename(self, filename):
return self.addHtml(self.read(filename))

def addTweeFromFilename(self, filename):
return self.addTwee(self.read(filename))

def addTiddler(self, tiddler):
"""Adds a Tiddler object to this TiddlyWiki."""
self.tiddlers[tiddler.title] = tiddler
return tiddler

FORMATTED_INFO_PASSAGES = frozenset([
'StoryMenu', 'StoryTitle', 'StoryAuthor', 'StorySubtitle', 'StoryInit'])
UNFORMATTED_INFO_PASSAGES = frozenset(['StoryIncludes', 'StorySettings'])
INFO_PASSAGES = FORMATTED_INFO_PASSAGES | UNFORMATTED_INFO_PASSAGES
SPECIAL_TAGS = frozenset(['Twine.image'])
NOINCLUDE_TAGS = frozenset(['Twine.private', 'Twine.system'])
INFO_TAGS = frozenset(['script', 'stylesheet', 'annotation']) | SPECIAL_TAGS | NOINCLUDE_TAGS

class Tiddler: # pylint: disable=old-style-class
"""A single tiddler in a TiddlyWiki.

Note: Converting this to a new-style class breaks pickling of new TWS files on old Twine releases.
"""

def __init__(self, source, type = 'twee', obfuscatekey = ""):
# cache of passage names linked from this one
self.links = []
self.displays = []
self.images = []
self.macros = []

"""Pass source code, and optionally 'twee' or 'html'"""
if type == 'twee':
self.initTwee(source)
else:
self.initHtml(source, obfuscatekey)

def __getstate__(self):
"""Need to retain pickle format backwards-compatibility with Twine 1.3.5 """
now = time.localtime()
return {
'created': now,
'modified': now,
'title': self.title,
'tags': self.tags,
'text': self.text,
}

def __repr__(self):
return "<Tiddler '" + self.title + "'>"

def initTwee(self, source):
"""Initializes a Tiddler from Twee source code."""

# used only during builds
self.pos = [0,0]

# figure out our title

lines = source.strip().split('\n')

meta_bits = lines[0].split('[')
self.title = meta_bits[0].strip(' :')

# find tags

self.tags = []

if len(meta_bits) > 1:
tag_bits = meta_bits[1].split(' ')

for tag in tag_bits:
self.tags.append(tag.strip('[]'))

# and then the body text

self.text = u''

for line in lines[1:]:
self.text += line + "\n"

self.text = self.text.strip()

def initHtml(self, source, obfuscatekey = ""):
"""Initializes a Tiddler from HTML source code."""

def decode_obfuscate_swap(text):
"""
Does basic character pair swapping obfuscation.
No longer used since 1.4.2, but can decode passages from 1.4.0 and 1.4.1
"""
r = ''
for c in text:
upper = c.isupper()
p = obfuscatekey.find(c.lower())
if p != -1:
if p % 2 == 0:
p1 = p + 1
if p1 >= len(obfuscatekey):
p1 = p
else:
p1 = p - 1
c = obfuscatekey[p1].upper() if upper else obfuscatekey[p1]
r = r + c
return r

# title

self.title = 'Untitled Passage'
title_re = re.compile(r'(?:data\-)?(?:tiddler|name)="([^"]*?)"')
title = title_re.search(source)
if title:
self.title = title.group(1)

# tags

self.tags = []
tags_re = re.compile(r'(?:data\-)?tags="([^"]*?)"')
tags = tags_re.search(source)
if tags and tags.group(1) != '':
self.tags = tags.group(1).split(' ')

# position
self.pos = [0,0]
pos_re = re.compile(r'(?:data\-)?(?:twine\-)?position="([^"]*?)"')
pos = pos_re.search(source)
if pos:
coord = pos.group(1).split(',')
if len(coord) == 2:
try:
self.pos = map(int, coord)
except ValueError:
pass

# body text
self.text = ''
text_re = re.compile(r'<div(?:[^"]|(?:".*?"))*?>((?:[^<]|<(?!\/div>))*)<\/div>')
text = text_re.search(source)
if text:
self.text = decode_text(text.group(1))

# deobfuscate
# Note that we call isObfuscateable() using the raw title and tags, since if
# the tiddler is not obfuscatable, those will be stored non-obfuscated.
if obfuscatekey and self.isObfuscateable():
self.title = decode_obfuscate_swap(self.title)
self.tags = [decode_obfuscate_swap(tag) for tag in self.tags]
self.text = decode_obfuscate_swap(self.text)

def toHtml(self, rot13):
"""Returns an HTML representation of this tiddler.
The encoder arguments are sequences of functions that take a single text argument
and return a modified version of the given text.
"""

def applyRot13(text):
return text.decode('rot13') if rot13 else text

def iterArgs():
yield 'tiddler', applyRot13(self.title.replace('"', '"'))
if self.tags:
yield 'tags', ' '.join(applyRot13(tag) for tag in self.tags)

return u'<div%s%s>%s</div>' % (
''.join(' %s="%s"' % arg for arg in iterArgs()),
' twine-position="%d,%d"' % tuple(self.pos) if hasattr(self, "pos") else "",
encode_text(applyRot13(self.text))
)

def toTwee(self):
"""Returns a Twee representation of this tiddler."""
output = u':: ' + self.title

if len(self.tags) > 0:
output += u' ['
for tag in self.tags:
output += tag + ' '
output = output.strip()
output += u']'

output += u"\n" + self.text + u"\n\n\n"
return output

def isImage(self):
return 'Twine.image' in self.tags

def isAnnotation(self):
return 'annotation' in self.tags

def isStylesheet(self):
return 'stylesheet' in self.tags

def isScript(self):
return 'script' in self.tags

def isInfoPassage(self):
return self.title in TiddlyWiki.INFO_PASSAGES

def isStoryText(self):
""" Excludes passages which do not contain renderable Twine code. """
return self.title not in TiddlyWiki.UNFORMATTED_INFO_PASSAGES \
and TiddlyWiki.INFO_TAGS.isdisjoint(self.tags)

def isStoryPassage(self):
""" A more restrictive variant of isStoryText that excludes the StoryTitle, StoryMenu etc."""
return self.title not in TiddlyWiki.INFO_PASSAGES \
and TiddlyWiki.INFO_TAGS.isdisjoint(self.tags)

def isObfuscateable(self):
"""Returns true iff this tiddler can be obfuscated when placed in the data store."""
return self.title != 'StorySettings' and not self.isImage()

def linksAndDisplays(self):
return list(set(self.links+self.displays))

def update(self):
"""
Update the lists of all passages linked/displayed by this one.
Returns internal links and <<choice>>/<<actions>> macros.
"""
if not self.isStoryText() and not self.isAnnotation() and not self.isStylesheet():
self.displays = []
self.links = []
self.variableLinks = []
self.images = []
self.macros = []
return

images = set()
macros = set()
links = set()
variableLinks = set()

def addLink(link):
style = TweeLexer.linkStyle(link)
if style == TweeLexer.PARAM:
variableLinks.add(link)
elif style != TweeLexer.EXTERNAL:
links.add(link)

# <<display>>
self.displays = list(set(re.findall(r'\<\<display\s+[\'"]?(.+?)[\'"]?\s?\>\>', self.text, re.IGNORECASE)))

macros = set()
# other macros (including shorthand <<display>>)
for m in re.finditer(tweeregex.MACRO_REGEX, self.text):
# Exclude shorthand <<print>>
if m.group(1) and m.group(1)[0] != '$':
macros.add(m.group(1))
self.macros = list(macros)

# Regular hyperlinks (also matches wiki-style links inside macros)
for m in re.finditer(tweeregex.LINK_REGEX, self.text):
addLink(m.group(2) or m.group(1))

# Include images
for m in re.finditer(tweeregex.IMAGE_REGEX, self.text):
if m.group(5):
addLink(m.group(5))

# HTML data-passage links
for m in re.finditer(tweeregex.HTML_REGEX, self.text):
attrs = m.group(2)
if attrs:
dataPassage = re.search(r"""data-passage\s*=\s*(?:([^<>'"=`\s]+)|'((?:[^'\\]*\\.)*[^'\\]*)'|"((?:[^"\\]*\\.)*[^"\\]*)")""", attrs)
if dataPassage:
link = dataPassage.group(1) or dataPassage.group(2) or dataPassage.group(3)
if m.group(1) == "img":
images.add(link)
else:
addLink(link)

# <<choice passage_name [link_text]>>
for block in re.findall(r'\<\<choice\s+(.*?)\s?\>\>', self.text):
item = re.match(r'(?:"([^"]*)")|(?:\'([^\']*)\')|([^"\'\[\s]\S*)', block)
if item:
links.add(''.join(item.groups('')))

# <<actions '' ''>>
for block in re.findall(r'\<\<actions\s+(.*?)\s?\>\>', self.text):
links.update(re.findall(r'[\'"](.*?)[\'"]', block))

self.links = list(links)
self.variableLinks = list(variableLinks)

# Images

for block in re.finditer(tweeregex.IMAGE_REGEX, self.text):
images.add(block.group(4))

self.images = list(images)

#
# Helper functions
#

def encode_text(text):
"""Encodes a string for use in HTML output."""
output = text \
.replace('\\', '\s') \
.replace('\t', '\\t') \
.replace('&', '&') \
.replace('<', '<') \
.replace('>', '>') \
.replace('"', '"') \
.replace('\0', '')
output = re.sub(r'\r?\n', r'\\n', output)
return output

def decode_text(text):
"""Decodes a string from HTML."""
return text \
.replace('\\n', '\n') \
.replace('\\t', '\t') \
.replace('\s', '\\') \
.replace('"', '"') \
.replace('>', '>') \
.replace('<', '<') \
.replace('&', '&')

def encode_date(date):
"""Encodes a datetime in TiddlyWiki format."""
return time.strftime('%Y%m%d%H%M', date)

def decode_date(date):
"""Decodes a datetime from TiddlyWiki format."""
return time.strptime(date, '%Y%m%d%H%M')