"""
A Python implementation of the Twee compiler.

This code was written by Chris Klimas <[email protected]>
It is licensed under the GNU General Public License v2
http://creativecommons.org/licenses/GPL/2.0/

This file defines two classes: Tiddler and TiddlyWiki. These match what
you'd normally see in a TiddlyWiki; the goal here is to provide classes
that translate between Twee and TiddlyWiki output seamlessly.
"""

import re, time, locale, os, codecs
import tweeregex
from tweelexer import TweeLexer

class TiddlyWiki(object):
   """An entire TiddlyWiki."""

   def __init__(self):
       self.tiddlers = {}
       self.storysettings = {}

   def hasTiddler(self, name):
       return name in self.tiddlers

   def toTwee(self, order = None):
       """Returns Twee source code for this TiddlyWiki.
       The 'order' argument is a sequence of passage titles specifying the order
       in which passages should appear in the output string; by default passages
       are returned in arbitrary order.
       """
       tiddlers = self.tiddlers
       if order is None:
           order = tiddlers.keys()
       return u''.join(tiddlers[i].toTwee() for i in order)

   def read(self, filename):
       try:
           source = codecs.open(filename, 'rU', 'utf_8_sig', 'strict')
           w = source.read()
       except UnicodeDecodeError:
           try:
               source = codecs.open(filename, 'rU', 'utf16', 'strict')
               w = source.read()
           except:
               source = open(filename, 'rU')
               w = source.read()
       source.close()
       return w

   def toHtml(self, app, header = None, order = None, startAt = '', defaultName = '', metadata = {}):
       """Returns HTML code for this TiddlyWiki."""
       if not order: order = self.tiddlers.keys()
       output = u''

       if not header:
           app.displayError("building: no story format was specified.\n"
                           + "Please select another format from the Story Format submenu",
                           stacktrace = False)
           return

       try:
           headerPath = header.path + 'header.html'
           # TODO: Move file reading to Header class.
           output = self.read(headerPath)

       except IOError:
           app.displayError("building: the story format '" + header.label + "' isn't available.\n"
               + "Please select another format from the Story Format submenu",
               stacktrace = False)
           return


       def insertEngine(app, output, filename, label, extra = ''):
           if output.count(label) > 0:
               try:
                   enginecode = self.read(filename)
                   return output.replace(label,enginecode + extra)

               except IOError:
                   app.displayError("building: the file '" + filename + "' used by the story format '" + header.label + "' wasn't found",
                                    stacktrace = False)
                   return ''
           else:
               return output

       # Insert version number
       output = output.replace('"VERSION"', "Made in " + app.NAME + " " + app.VERSION)

       # Insert timestamp
       # Due to Windows limitations, the timezone offset must be computed manually.
       tz_offset = (lambda t: '%s%02d%02d' % (('+' if t <= 0 else '-',) + divmod(abs(t) / 60, 60)))(time.timezone)
       # Obtain the encoding expected to be used by strftime in this locale
       strftime_encoding = locale.getlocale(locale.LC_TIME)[1] or locale.getpreferredencoding()
       # Write the timestamp
       output = output.replace('"TIME"', "Built on "+time.strftime("%d %b %Y at %H:%M:%S, "+tz_offset).decode(strftime_encoding))

       # Insert the test play "start at passage" value
       if startAt:
           output = output.replace('"START_AT"', '"' + startAt.replace('\\', r'\\').replace('"', '\"') + '"')
       else:
           output = output.replace('"START_AT"', '""')

       # Embed any engine related files required by the header.

       embedded = header.filesToEmbed()
       for key in embedded.keys():
           output = insertEngine(app, output, embedded[key], key)
           if not output: return ''

       # Insert the Backup Story Title

       if defaultName:
           name = defaultName.replace('"',r'\"')
           # Just gonna assume the <title> has no attributes
           output = re.sub(r'<title>.*?<\/title>', '<title>'+name+'</title>', output, count=1, flags=re.I|re.M) \
               .replace('"Untitled Story"', '"'+name+'"')

       # Insert the metadata

       metatags = ''
       for name, content in metadata.iteritems():
           if content:
               metatags += '<meta name="' + name.replace('"','&quot;') + '" content="' + content.replace('"','&quot;') + '">\n'

       if metatags:
           output = re.sub(r'<\/title>\s*\n?', lambda a: a.group(0) + metatags, output, flags=re.I, count=1)

       # Check if the scripts are personally requesting jQuery or Modernizr
       jquery = 'jquery' in self.storysettings and self.storysettings['jquery'] != "off"
       modernizr = 'modernizr' in self.storysettings and self.storysettings['modernizr'] != "off"
       blankCSS = 'blankcss' in self.storysettings and self.storysettings['blankcss'] != "off"

       for i in filter(lambda a: (a.isScript() or a.isStylesheet()), self.tiddlers.itervalues()):
           if not jquery and i.isScript() and re.search(r'requires? jquery', i.text, re.I):
               jquery = True
           if not modernizr and re.search(r'requires? modernizr', i.text, re.I):
               modernizr = True
           if not blankCSS and i.isStylesheet() and re.search(r'blank stylesheet', i.text, re.I):
               blankCSS = True
           if jquery and modernizr and not blankCSS:
               break

       # Insert jQuery
       if jquery:
           output = insertEngine(app, output, app.builtinTargetsPath + 'jquery.js', '"JQUERY"')
           if not output: return
       else:
           output = output.replace('"JQUERY"','')

       # Insert Modernizr
       if modernizr:
           output = insertEngine(app, output, app.builtinTargetsPath + 'modernizr.js', '"MODERNIZR"')
           if not output: return
       else:
           output = output.replace('"MODERNIZR"','')

       # Remove default CSS
       if blankCSS:
           # Just gonna assume the html id is quoted correctly if at all.
           output = re.sub(r'<style\s+id=["\']?defaultCSS["\']?\s*>(?:[^<]|<(?!\/style>))*<\/style>', '', output, flags=re.I|re.M, count=1)

       rot13 = 'obfuscate' in self.storysettings and \
           self.storysettings['obfuscate'] != 'off'
       # In case it was set to "swap" (legacy 1.4.1 file),
       # alter and remove old properties.
       if rot13:
           self.storysettings['obfuscate'] = "rot13"
           if 'obfuscatekey' in self.storysettings:
               del self.storysettings['obfuscatekey']

       # Finally add the passage data
       storyfragments = []
       for i in order:
           tiddler = self.tiddlers[i]
           # Strip out comments from storysettings and reflect any alterations made
           if tiddler.title == 'StorySettings':
               tiddler.text = ''.join([(str(k)+":"+str(v)+"\n") for k,v in self.storysettings.iteritems()])
           if self.NOINCLUDE_TAGS.isdisjoint(tiddler.tags):
               storyfragments.append(tiddler.toHtml(rot13 and tiddler.isObfuscateable()))
       storycode = u''.join(storyfragments)

       if output.count('"STORY_SIZE"') > 0:
           output = output.replace('"STORY_SIZE"', '"' + str(len(storyfragments)) + '"')

       if output.count('"STORY"') > 0:
           output = output.replace('"STORY"', storycode)
       else:
           output += storycode
           if header:
               footername = header.path + 'footer.html'
               if os.path.exists(footername):
                   output += self.read(footername)
               else:
                   output += '</div></body></html>'

       return output

   def toRtf(self, order = None):
       """Returns RTF source code for this TiddlyWiki."""
       if not order: order = self.tiddlers.keys()

       def rtf_encode_char(unicodechar):
           if ord(unicodechar) < 128:
               return str(unicodechar)
           return r'\u' + str(ord(unicodechar)) + r'?'

       def rtf_encode(unicodestring):
           return r''.join(rtf_encode_char(c) for c in unicodestring)

       # preamble

       output = r'{\rtf1\ansi\ansicpg1251' + '\n'
       output += r'{\fonttbl\f0\fswiss\fcharset0 Arial;\f1\fmodern\fcharset0 Courier;}' + '\n'
       output += r'{\colortbl;\red128\green128\blue128;\red51\green51\blue204;}' + '\n'
       output += r'\margl1440\margr1440\vieww9000\viewh8400\viewkind0' + '\n'
       output += r'\pard\tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\tx792' + '\n'
       output += r'\tx8640\ql\qnatural\pardirnatural\pgnx720\pgny720' + '\n'

       # content

       for i in order:
           # Handle the situation where items are in the order set but not in the tiddlers set.
           if i not in self.tiddlers:
               continue
           text = rtf_encode(self.tiddlers[i].text)
           text = re.sub(r'\n', '\\\n', text) # newlines
           text = re.sub(tweeregex.LINK_REGEX, r'\\b\cf2 \ul \1\ulnone \cf0 \\b0 ', text) # links
           text = re.sub(r"''(.*?)''", r'\\b \1\\b0 ', text) # bold
           text = re.sub(r'\/\/(.*?)\/\/', r'\i \1\i0 ', text) # italics
           text = re.sub(r"\^\^(.*?)\^\^", r'\\super \1\\nosupersub ', text) # sup
           text = re.sub(r"~~(.*?)~~", r'\\sub \1\\nosupersub ', text) # sub
           text = re.sub(r"==(.*?)==", r'\\strike \1\\strike0 ', text) # strike
           text = re.sub(r'(\<\<.*?\>\>)', r'\\f1\cf1 \1\cf0\\f0 ', text) # macros
           text = re.sub(tweeregex.HTML_REGEX, r'\\f1\cf1 \g<0>\cf0\\f0 ', text) # macros
           text = re.sub(tweeregex.MONO_REGEX, r'\\f1 \1\\f0 ', text) # monospace
           text = re.sub(tweeregex.COMMENT_REGEX, '', text) # comments

           output += r'\fs24\b1 ' + rtf_encode(self.tiddlers[i].title) + r'\b0\fs20 ' + '\\\n'
           output += text + '\\\n\\\n'

       output += '}'

       return output

   def addTwee(self, source):
       """Adds Twee source code to this TiddlyWiki.
       Returns the tiddler titles in the order they occurred in the Twee source.
       """
       source = source.replace("\r\n", "\n")
       source = '\n' + source
       tiddlers = source.split('\n::')[1:]

       order = []
       for i in tiddlers:
           tiddler = Tiddler('::' + i)
           self.addTiddler(tiddler)
           order.append(tiddler.title)
       return order

   def addHtml(self, source):
       """Adds HTML source code to this TiddlyWiki.
       Returns the tiddler titles in the order they occurred in the HTML.
       """
       order = []
       divs = re.search(r'<div\s+id=(["\']?)store(?:A|-a)rea\1(?:\s+data-size=(["\']?)\d+\2)?(?:\s+hidden)?\s*>(.*)</div>', source,
                       re.DOTALL)
       if divs:
           divs = divs.group(3)
           # HTML may be obfuscated.
           obfuscatekey = ''
           storysettings_re = r'[^>]*\stiddler=["\']?StorySettings["\']?[^>]*>.*?</div>'
           storysettings = re.search(r'<div'+storysettings_re, divs, re.DOTALL)
           if storysettings:
               ssTiddler = self.addTiddler(Tiddler(storysettings.group(0), 'html'))
               obfuscate = re.search(r'obfuscate\s*:\s*((?:[^\no]|o(?!ff))*)\s*(?:\n|$)', ssTiddler.text, re.I)
               if obfuscate:
                   if "swap" in obfuscate.group(1):
                       # Find the legacy 'obfuscatekey' option from 1.4.0.
                       match = re.search(r'obfuscatekey\s*:\s*(\w*)\s*(?:\n|$)', ssTiddler.text, re.I)
                       if match:
                           obfuscatekey = match.group(1)
                           nss = u''
                           for nsc in obfuscatekey:
                               if nss.find(nsc) == -1 and not nsc in ':\\\"n0':
                                   nss = nss + nsc
                           obfuscatekey = nss
                   else:
                       obfuscatekey = "anbocpdqerfsgthuivjwkxlymz"
               divs = divs[:storysettings.start(0)] + divs[storysettings.end(0):]

           for div in divs.split('<div'):
               div.strip()
               if div:
                   tiddler = Tiddler('<div' + div, 'html', obfuscatekey)
                   self.addTiddler(tiddler)
                   order.append(tiddler.title)
       return order

   def addHtmlFromFilename(self, filename):
       return self.addHtml(self.read(filename))

   def addTweeFromFilename(self, filename):
       return self.addTwee(self.read(filename))

   def addTiddler(self, tiddler):
       """Adds a Tiddler object to this TiddlyWiki."""
       self.tiddlers[tiddler.title] = tiddler
       return tiddler

   FORMATTED_INFO_PASSAGES = frozenset([
           'StoryMenu', 'StoryTitle', 'StoryAuthor', 'StorySubtitle', 'StoryInit'])
   UNFORMATTED_INFO_PASSAGES = frozenset(['StoryIncludes', 'StorySettings'])
   INFO_PASSAGES = FORMATTED_INFO_PASSAGES | UNFORMATTED_INFO_PASSAGES
   SPECIAL_TAGS = frozenset(['Twine.image'])
   NOINCLUDE_TAGS = frozenset(['Twine.private', 'Twine.system'])
   INFO_TAGS = frozenset(['script', 'stylesheet', 'annotation']) | SPECIAL_TAGS | NOINCLUDE_TAGS


class Tiddler: # pylint: disable=old-style-class
   """A single tiddler in a TiddlyWiki.

   Note: Converting this to a new-style class breaks pickling of new TWS files on old Twine releases.
   """

   def __init__(self, source, type = 'twee', obfuscatekey = ""):
       # cache of passage names linked from this one
       self.links = []
       self.displays = []
       self.images = []
       self.macros = []

       """Pass source code, and optionally 'twee' or 'html'"""
       if type == 'twee':
           self.initTwee(source)
       else:
           self.initHtml(source, obfuscatekey)

   def __getstate__(self):
       """Need to retain pickle format backwards-compatibility with Twine 1.3.5 """
       now = time.localtime()
       return {
           'created': now,
           'modified': now,
           'title': self.title,
           'tags': self.tags,
           'text': self.text,
       }

   def __repr__(self):
       return "<Tiddler '" + self.title + "'>"

   def initTwee(self, source):
       """Initializes a Tiddler from Twee source code."""

       # used only during builds
       self.pos = [0,0]

       # figure out our title

       lines = source.strip().split('\n')

       meta_bits = lines[0].split('[')
       self.title = meta_bits[0].strip(' :')

       # find tags

       self.tags = []

       if len(meta_bits) > 1:
           tag_bits = meta_bits[1].split(' ')

           for tag in tag_bits:
               self.tags.append(tag.strip('[]'))

       # and then the body text

       self.text = u''

       for line in lines[1:]:
           self.text += line + "\n"

       self.text = self.text.strip()


   def initHtml(self, source, obfuscatekey = ""):
       """Initializes a Tiddler from HTML source code."""

       def decode_obfuscate_swap(text):
           """
           Does basic character pair swapping obfuscation.
           No longer used since 1.4.2, but can decode passages from 1.4.0 and 1.4.1
           """
           r = ''
           for c in text:
               upper = c.isupper()
               p = obfuscatekey.find(c.lower())
               if p != -1:
                   if p % 2 == 0:
                       p1 = p + 1
                       if p1 >= len(obfuscatekey):
                           p1 = p
                   else:
                       p1 = p - 1
                   c = obfuscatekey[p1].upper() if upper else obfuscatekey[p1]
               r = r + c
           return r

       # title

       self.title = 'Untitled Passage'
       title_re = re.compile(r'(?:data\-)?(?:tiddler|name)="([^"]*?)"')
       title = title_re.search(source)
       if title:
           self.title = title.group(1)

       # tags

       self.tags = []
       tags_re = re.compile(r'(?:data\-)?tags="([^"]*?)"')
       tags = tags_re.search(source)
       if tags and tags.group(1) != '':
           self.tags = tags.group(1).split(' ')

       # position
       self.pos = [0,0]
       pos_re = re.compile(r'(?:data\-)?(?:twine\-)?position="([^"]*?)"')
       pos = pos_re.search(source)
       if pos:
           coord = pos.group(1).split(',')
           if len(coord) == 2:
               try:
                   self.pos = map(int, coord)
               except ValueError:
                   pass

       # body text
       self.text = ''
       text_re = re.compile(r'<div(?:[^"]|(?:".*?"))*?>((?:[^<]|<(?!\/div>))*)<\/div>')
       text = text_re.search(source)
       if text:
           self.text = decode_text(text.group(1))

       # deobfuscate
       # Note that we call isObfuscateable() using the raw title and tags, since if
       # the tiddler is not obfuscatable, those will be stored non-obfuscated.
       if obfuscatekey and self.isObfuscateable():
           self.title = decode_obfuscate_swap(self.title)
           self.tags = [decode_obfuscate_swap(tag) for tag in self.tags]
           self.text = decode_obfuscate_swap(self.text)

   def toHtml(self, rot13):
       """Returns an HTML representation of this tiddler.
       The encoder arguments are sequences of functions that take a single text argument
       and return a modified version of the given text.
       """

       def applyRot13(text):
           return text.decode('rot13') if rot13 else text

       def iterArgs():
           yield 'tiddler', applyRot13(self.title.replace('"', '&quot;'))
           if self.tags:
               yield 'tags', ' '.join(applyRot13(tag) for tag in self.tags)

       return u'<div%s%s>%s</div>' % (
           ''.join(' %s="%s"' % arg for arg in iterArgs()),
           ' twine-position="%d,%d"' % tuple(self.pos) if hasattr(self, "pos") else "",
           encode_text(applyRot13(self.text))
           )


   def toTwee(self):
       """Returns a Twee representation of this tiddler."""
       output = u':: ' + self.title

       if len(self.tags) > 0:
           output += u' ['
           for tag in self.tags:
               output += tag + ' '
           output = output.strip()
           output += u']'

       output += u"\n" + self.text + u"\n\n\n"
       return output

   def isImage(self):
       return 'Twine.image' in self.tags

   def isAnnotation(self):
       return 'annotation' in self.tags

   def isStylesheet(self):
       return 'stylesheet' in self.tags

   def isScript(self):
       return 'script' in self.tags

   def isInfoPassage(self):
       return self.title in TiddlyWiki.INFO_PASSAGES

   def isStoryText(self):
       """ Excludes passages which do not contain renderable Twine code. """
       return self.title not in TiddlyWiki.UNFORMATTED_INFO_PASSAGES \
           and TiddlyWiki.INFO_TAGS.isdisjoint(self.tags)

   def isStoryPassage(self):
       """ A more restrictive variant of isStoryText that excludes the StoryTitle, StoryMenu etc."""
       return self.title not in TiddlyWiki.INFO_PASSAGES \
           and TiddlyWiki.INFO_TAGS.isdisjoint(self.tags)

   def isObfuscateable(self):
       """Returns true iff this tiddler can be obfuscated when placed in the data store."""
       return self.title != 'StorySettings' and not self.isImage()

   def linksAndDisplays(self):
       return list(set(self.links+self.displays))

   def update(self):
       """
       Update the lists of all passages linked/displayed by this one.
       Returns internal links and <<choice>>/<<actions>> macros.
       """
       if not self.isStoryText() and not self.isAnnotation() and not self.isStylesheet():
           self.displays = []
           self.links = []
           self.variableLinks = []
           self.images = []
           self.macros = []
           return

       images = set()
       macros = set()
       links = set()
       variableLinks = set()

       def addLink(link):
           style = TweeLexer.linkStyle(link)
           if style == TweeLexer.PARAM:
               variableLinks.add(link)
           elif style != TweeLexer.EXTERNAL:
               links.add(link)

       # <<display>>
       self.displays = list(set(re.findall(r'\<\<display\s+[\'"]?(.+?)[\'"]?\s?\>\>', self.text, re.IGNORECASE)))

       macros = set()
       # other macros (including shorthand <<display>>)
       for m in re.finditer(tweeregex.MACRO_REGEX, self.text):
           # Exclude shorthand <<print>>
           if m.group(1) and m.group(1)[0] != '$':
               macros.add(m.group(1))
       self.macros = list(macros)

       # Regular hyperlinks (also matches wiki-style links inside macros)
       for m in re.finditer(tweeregex.LINK_REGEX, self.text):
           addLink(m.group(2) or m.group(1))

       # Include images
       for m in re.finditer(tweeregex.IMAGE_REGEX, self.text):
           if m.group(5):
               addLink(m.group(5))

       # HTML data-passage links
       for m in re.finditer(tweeregex.HTML_REGEX, self.text):
           attrs = m.group(2)
           if attrs:
               dataPassage = re.search(r"""data-passage\s*=\s*(?:([^<>'"=`\s]+)|'((?:[^'\\]*\\.)*[^'\\]*)'|"((?:[^"\\]*\\.)*[^"\\]*)")""", attrs)
               if dataPassage:
                   link = dataPassage.group(1) or dataPassage.group(2) or dataPassage.group(3)
                   if m.group(1) == "img":
                       images.add(link)
                   else:
                       addLink(link)

       # <<choice passage_name [link_text]>>
       for block in re.findall(r'\<\<choice\s+(.*?)\s?\>\>', self.text):
           item = re.match(r'(?:"([^"]*)")|(?:\'([^\']*)\')|([^"\'\[\s]\S*)', block)
           if item:
               links.add(''.join(item.groups('')))

       # <<actions '' ''>>
       for block in re.findall(r'\<\<actions\s+(.*?)\s?\>\>', self.text):
           links.update(re.findall(r'[\'"](.*?)[\'"]', block))

       self.links = list(links)
       self.variableLinks = list(variableLinks)

       # Images

       for block in re.finditer(tweeregex.IMAGE_REGEX, self.text):
           images.add(block.group(4))

       self.images = list(images)

#
# Helper functions
#

def encode_text(text):
   """Encodes a string for use in HTML output."""
   output = text \
       .replace('\\', '\s') \
       .replace('\t', '\\t') \
       .replace('&', '&amp;') \
       .replace('<', '&lt;') \
       .replace('>', '&gt;') \
       .replace('"', '&quot;') \
       .replace('\0', '&#0;')
   output = re.sub(r'\r?\n', r'\\n', output)
   return output

def decode_text(text):
   """Decodes a string from HTML."""
   return text \
       .replace('\\n', '\n') \
       .replace('\\t', '\t') \
       .replace('\s', '\\') \
       .replace('&quot;', '"') \
       .replace('&gt;', '>') \
       .replace('&lt;', '<') \
       .replace('&amp;', '&')

def encode_date(date):
   """Encodes a datetime in TiddlyWiki format."""
   return time.strftime('%Y%m%d%H%M', date)


def decode_date(date):
   """Decodes a datetime from TiddlyWiki format."""
   return time.strptime(date, '%Y%m%d%H%M')