import simplejson
#from urllib import unquote
#from datetime import date
import os, re, cPickle, ConfigParser
from copy import deepcopy
from string import Template
from cStringIO import StringIO
from urllib import unquote_plus, quote_plus
from lxml import etree

global cfg
config = ConfigParser.SafeConfigParser()
config.optionxform = str # this is to prevent conversion of options name to lowercase
cfgfile = 'config.cfg'
try:
    currentdir = os.path.dirname(__file__)
    if currentdir:
        cfgfile = '%s/%s' % (currentdir,cfgfile)
except:
    pass
config.readfp(open(cfgfile))
cfg = dict(config.items('transdef'))

def decodeGroup(symbol):
    f = float('.'.join([symbol[4:8],symbol[8:]]))
    if len(str(f).split('.')[1]) == 1:
        f = '%.2F' % f
    else:
        f = str(f)
    return f.replace('.','/')
    
def decodeSymbol(symbol):
    if symbol.lower() == 'global':
        return 'Global'
    else:
        if len(symbol) < 5:
            return symbol
        return '%s %s' % (symbol[:4],decodeGroup(symbol))

def dropTag(elt):
    """
    Remove the tag, but not its children or text.  The children and text
    are merged into the parent.

    Example::

        >>> h = fragment_fromstring('<div>Hello <b>World!</b></div>')
        >>> h.find('.//b').dropTag()
        >>> print tostring(h)
        <div>Hello World!</div>
    """
    parent = elt.getparent()
    assert parent is not None
    previous = elt.getprevious()
    if elt.text and isinstance(elt.tag, basestring):
        # not a Comment, etc.
        if previous is None:
            parent.text = (parent.text or '') + elt.text
        else:
            previous.tail = (previous.tail or '') + elt.text
    if elt.tail:
        if len(elt):
            last = elt[-1]
            last.tail = (last.tail or '') + elt.tail
        elif previous is None:
            parent.text = (parent.text or '') + elt.tail
        else:
            previous.tail = (previous.tail or '') + elt.tail
    index = parent.index(elt)
    parent[index:index+1] = elt[:]

def parseTxt(elt,tree,data):
    html = ''
    ecount = len(elt)-1
    for e in elt:
        if elt.index(e) == 0 and not elt.text:
            # Add an empty translation field before any element beginning a paragraph
            data.append(['text',tree.getpath(elt),''])
            
        xpath = tree.getpath(e)
        if e.tag in ['SREF','LREF','MREF']:
            if e.tag == 'MREF':
                data.append(['ref',''.join([e.get('LABELS'),'-',e.get('LABELE')])])
            else:
                data.append(['ref',e.get('LABEL')])
        else:
            if e.text:
                data.append(['text',xpath,e.text.encode('utf-8')])
                
            if len(e) > 0:
                parseTxt(e,tree,data)
            
        if e.tail:
            data.append(['tail',xpath,e.tail.encode('utf-8')])
        elif elt.index(e) == ecount:
            # Add an empty translation field after any element ending a paragraph
            data.append(['tail',xpath,''])

def _buildEditor(symbol,datadir,file):
    elt2translate = ['DEFINITION-TITLE',
                     'SUBHEADING',
                     'PARAGRAPH-TEXT',
                     'SUBPARAGRAPH',
                     'TERM',
                     'TERMTEXT'
                     ]
    titles = {'DEFINITION-TITLE':'Title',
              'DEFINITION-STATEMENT': 'Definition statement',
              'LARGESUBJECTS': 'Relationship between large subject matter areas',
              'LIMITINGREFERENCES': 'References relevant to classification in this subclass',
              'INFORMATIVEREFERENCES': 'Informative references',
              'SPECIALRULES': 'SPECIALRULES',
              'GLOSSARYOFTERMS': 'Glossary of terms',
              'SYNONYMSANDKEYWORDS': 'SYNONYMSANDKEYWORDS'
              }
    
    tgtFilePath = os.path.join(datadir,file,'transformations','ipca6transdef','translation','.'.join([symbol,'xml']))
    if os.path.isfile(tgtFilePath):
        defTree = etree.parse(tgtFilePath)
    else:
        defTree = etree.parse(os.path.join(datadir,file,'transformations','ipca6transdef','authentic','.'.join([symbol,'xml'])))
        
    data = []
    root = defTree.getroot()
#    for e in root.xpath('//UND'):
#        dropTag(e)
#    for e in root.xpath('//B'):
#        dropTag(e)
        
    for elt in root.iter():
        if titles.has_key(elt.tag):
            data.append(['label',titles[elt.tag]])
        if elt.tag in elt2translate:
            if elt.text:
                data.append(['text',defTree.getpath(elt),elt.text.encode('utf-8')])
                
            parseTxt(elt,defTree,data)
            data.append(['space'])
            
    return data

def insertDefLinks(text,symbol,lang):
    pass

def getFixedText(id,lang,fixedTextsTrees,utf8=True):
    #TODO: to be externalized and shared with Definition editor/translator
    result = fixedTextsTrees[lang].xpath('/lang/text[@id=$id]',id=id)
    if len(result) == 0:
        logging.info('Tanslation missing for %s (%s)' % (id,lang))
        return ''
    else:
        if utf8:
            return result[0].text.encode('utf-8')
        else:
            return result[0].text
        
srefTpl = Template('<a href="%s?symbol=$symbolCod" target="_blank">$symbolTxt</a>' % cfg['puburl'])
mrefTpl = Template('<a href="%s?symbol=$symbolCod" target="_blank">$symbolTxt-$endsymbolTxt</a>' % cfg['puburl'])
glossaryLinkTpl = Template('<span class="term">$term</span>')
def getText(elem,edition,searchDef,symbol=None,lang=None,glossary=None):
    text = ''
    terms = set()
    
    if elem.text and elem.text.strip():
        if searchDef:
            r = insertDefLinks(elem.text,symbol,lang)
            text += r['text']
            terms.update(r['terms'])
        else:
            text += elem.text
        
    for e in elem:
        if e.tag in ('sref','mref'):
            refsymbol = e.get('ref')
            if len(refsymbol) > 4:
                symbolTxt = decodeSymbol(refsymbol)
            else:
                symbolTxt = refsymbol
                
            if e.tag == 'sref':
                text += srefTpl.substitute(symbolCod=refsymbol,symbolTxt=symbolTxt)
            else:
                endsymbol = e.get('endRef')
                if len(endsymbol) > 4:
                    endsymbolTxt = decodeSymbol(endsymbol)
                else:
                    endsymbolTxt = endsymbol
                text += mrefTpl.substitute(symbolCod=refsymbol,symbolTxt=symbolTxt,endsymbolTxt=endsymbolTxt)
                
        #TODO: Check symbol ref against the validity file
        
        elif e.tag == 'GREF':
            termkey = re.sub(r'(?:es|s|e)$', '', e.text.strip().lower())
            link = glossaryLinkTpl.substitute(termkey=termkey,term=e.text)
            if glossary:
                if glossary.has_key(termkey):
                    text += link
                else:
                    #print 'Warning: term not found "%s"' % termkey.encode('utf-8')
                    text += e.text
            else:
                text += link
                
        elif e.tag == 'SREF':
            text += srefTpl.substitute(symbolCod=e.get('TARGET'),symbolTxt=e.get('LABEL'))
            
        elif e.tag == 'MREF':
            text += mrefTpl.substitute(symbolCod=e.get('START'),symbolTxt=e.get('LABELS'),endsymbolTxt=e.get('LABELE'))
            
        elif e.tag == 'img':
            path = '/'.join(['data',edition,'figs',e.get('src').lower()])
            text += '<img src="%s" align="texttop" alt="%s" />' % (path,e.get('src').split('.')[0])
            
        elif e.tag == 'IMG':
            path = '/'.join(['../data',edition,'illustrations',e.get('SRC')])
            text += '<div align="center"><img src="%s.gif" align="texttop" alt="%s" /></div>' % (path,e.get('SRC').split('_')[0])
            
        elif e.tag.lower() in ('u','und','sup','sub','b'):
            r = getText(e,edition,searchDef,symbol=symbol,lang=lang)
            tag = e.tag.lower()
            if tag == 'und':
                tag = 'u'
            text += '<%(tag)s>%(txt)s</%(tag)s>' % {'tag':tag,'txt':r['text']}
            terms.update(r['terms'])
            
        elif e.tag.lower() == 'symbol':
            #TODO: "font-family: Symbol" does not seems to work under FF2 (but I guess that the symbol tag is deprecated anyway)
            text += '<span style="font-family: Symbol;">%s</span>' % e.get('character')
            
        elif e.tag.lower() == 'llinkthree':
            #TODO: verify if there are other cases of char as image like this
            text += '<img src="shared/img/llinkt.gif" align="texttop"/>'
                
        elif chars.has_key(e.tag):
            text += chars[e.tag]
        else:
            logging.error('Unknown tag: %s' % e.tag)
            
        if e.tail and e.tail.strip():
            if searchDef:
                r = insertDefLinks(e.tail,symbol,lang)
                text += r['text']
                terms.update(r['terms'])
            else:
                text += e.tail
    
    return {'text':text,'terms':terms}

def getTextOnly(e,edition,symbol,lang,glossary):
    return getText(e,edition,False,symbol=symbol,lang=lang,glossary=glossary)['text'].encode('utf-8')

defSymbolTpl = Template('<p class="title">$symbol$othlng</p>')
defTitleTpl = Template('<p class="title">$title</p>')
defSubTitleTpl = Template('<p class="subtitle"><a name="$title"></a>$title</p>')
defSubLabelTpl = Template('<p class="sublabel">$label</p>')
defCaseTitleTpl = Template('<p class="casetitle">$title</p>')
defLargeSubjectTitleTpl = Template('<div class="lrgsubjtitle">$title</div>')
defRefTableTitleTpl = Template('<div class="reftbltitle">$title</div>')
def _renderDefinition(definition,file,symbol,fixedTextsTrees,edition,lang,othLng='',glossary=None):
    file.write('<div class="body">')
    file.write(defSymbolTpl.substitute(symbol=srefTpl.substitute(symbolCod=symbol,symbolTxt=decodeSymbol(symbol)),othlng=othLng))
    for para in definition:
        if para.tag == 'DEFINITION-TITLE':
            file.write(defTitleTpl.substitute(title=getTextOnly(para,edition,symbol,lang,glossary)))
            
        else:
            #TODO: update lang.xml files to avoid such particular case
            if para.tag == 'LIMITINGREFERENCES':
                textid = 'ref_subclass'
            else:
                textid = para.tag
            file.write(defSubTitleTpl.substitute(title=getFixedText(textid,lang,fixedTextsTrees)))
            if para.tag == 'DEFINITION-STATEMENT':
                file.write(defSubLabelTpl.substitute(label=getFixedText('subclass_covers',lang,fixedTextsTrees)))
                for p in para:
                    if p.tag == 'DEFINITION-CASE':
                        file.write(defCaseTitleTpl.substitute(title=getTextOnly(p[0],edition,symbol,lang,glossary)))
                    else: #MAIN-PARAGRAPH (INDEXOFGROUPS not used)
                        #PARAGRAPH-TEXT
                        file.write('<p>%s</p>' % getTextOnly(p[0],edition,symbol,lang,glossary))
                    if len(p) > 1:
                        file.write('<ul>')
                        for sp in p[1:]: #SUBPARAGRAPH
                           file.write('<li>%s</li>' % getTextOnly(sp,edition,symbol,lang,glossary))
                        file.write('</ul>')
                                            
            elif para.tag == 'LARGESUBJECTS':
                for p in para:
                    if p.tag == 'SUBHEADING':
                        file.write(defLargeSubjectTitleTpl.substitute(title=getTextOnly(p,edition,symbol,lang,glossary)))
                    else: #MAIN-PARAGRAPH
                        #PARAGRAPH-TEXT
                        file.write('<p>%s</p>' % getTextOnly(p[0],edition,symbol,lang,glossary))
                        if len(p) > 1:
                            file.write('<ul>')
                            for sp in p[1:]: #SUBPARAGRAPH
                               file.write('<li>%s</li>' % getTextOnly(sp,edition,symbol,lang,glossary))
                            file.write('</ul>')
                            
            elif para.tag in ['LIMITINGREFERENCES','INFORMATIVEREFERENCES']:
                if para.tag == 'LIMITINGREFERENCES':
                    label = getFixedText('subclass_not_cover',lang,fixedTextsTrees)
                else: #INFORMATIVEREFERENCES
                    label = getFixedText('interest_for_search',lang,fixedTextsTrees)
                file.write(defSubLabelTpl.substitute(label=label))
                
                for p in para:
                    if p.tag == 'SUBHEADING':
                        file.write(defRefTableTitleTpl.substitute(title=getTextOnly(p,edition,symbol,lang,glossary)))
                    else: #REFERENCETABLE
                        file.write('<center><table border="1"><tbody>')
                        for row in p: #REFERENCEROW
                            file.write('<tr>')
                            file.write('<td width="80%">')
                            if row[0].tag == 'MAIN-PARAGRAPH':
                                for t in row[0]: #PARAGRAPH-TEXT or SUBPARAGRAPH
                                    file.write('<p>%s</p>' % getTextOnly(t,edition,symbol,lang,glossary))
                            else: #SUBPARAGRAPH
                                file.write('<p>%s</p>' % getTextOnly(row[0],edition,symbol,lang,glossary))
                            file.write('</td>')
                            #IPCREFS
                            file.write('<td width="20%%">%s</td>' % getTextOnly(row[1],edition,symbol,lang,glossary))
                            file.write('</tr>')
                        file.write('</tbody></table></center>')
                        
            elif para.tag == 'SPECIALRULES':
                for p in para:  #MAIN-PARAGRAPH
                    for t in p: #PARAGRAPH-TEXT or SUBPARAGRAPH
                        file.write('<p>%s</p>' % getTextOnly(t,edition,symbol,lang,glossary))
                        
            elif para.tag == 'GLOSSARYOFTERMS':
                file.write(defSubLabelTpl.substitute(label=getFixedText('subclass_expr',lang,fixedTextsTrees)))
                file.write('<center><table><tbody>')
                for row in para: #TERMROW
                    file.write('<tr>')
                    #TERM
                    file.write('<td valign="top"><b>%s<b></td>' % getTextOnly(row[0],edition,symbol,lang,glossary))
                    file.write('<td valign="top" width="70%">')
                    for p in row[1:]: #MAIN-PARAGRAPH
                        for t in p:   #PARAGRAPH-TEXT or SUBPARAGRAPH
                            file.write('<p>%s</p>' % getTextOnly(t,edition,symbol,lang,glossary))
                    file.write('</td></tr>')
                file.write('</tbody></table></center>')
                
            elif para.tag == 'SYNONYMSANDKEYWORDS':
                for p in para:
                    if p.tag == 'TERMTEXT':
                        file.write(defSubLabelTpl.substitute(label=getTextOnly(p,edition,symbol,lang,glossary)))
                    else: #TERMROW
                        file.write('<center><table><tbody><tr>')
                        #TERM
                        file.write('<td valign="top"><b>%s<b></td>' % getTextOnly(p[0],edition,symbol,lang,glossary))
                        file.write('<td valign="top" width="70%">')
                        for pp in p[1:]: #MAIN-PARAGRAPH
                            for t in pp: #PARAGRAPH-TEXT or SUBPARAGRAPH
                                file.write('<p>%s</p>' % getTextOnly(t,edition,symbol,lang,glossary))
                        file.write('</td></tr></tbody></table></center>')
    file.write('</div>')

def _getFiles(datadir):
    files = []
    for file in os.listdir(datadir):
        defdir = '%s/%s/transformations/ipca6transdef' % (datadir,file)
        if os.path.isdir(defdir):
            count = [0,0,0,0]
            list = open('%s/symbols.pic' % defdir)
            for e in cPickle.load(list):
                if   e[0] == 'R': count[0] += 1
                elif e[0] == 'T': count[1] += 1
                elif e[0] == 'I': count[2] += 1
                elif e[0] == 'E': count[3] += 1
            list.close()
            count.insert(0, file)
            files.append(count)
    return files

def application(environ, start_response):
    status = '200 OK'
    
    #print >> environ['wsgi.errors'], environ.get('CONTENT_LENGTH', '0')
    query = []
    for v in environ['QUERY_STRING'].split('&'):
        query.append(v.split('='))
    try:
        query=dict(query)
    except:
        query = {}
    
    fixedTextsTrees = {}
    for lang in [cfg['sourceLang'],cfg['targetLang']]:
        fixedTextsTrees[lang] = etree.parse('%s/IPC_Multilingual_Texts/lang_%s.xml' % (cfg['datadir'],lang.upper()))
        
    if len(query) == 0:
        # action = save
        length = int(environ.get('CONTENT_LENGTH', '0'))
        input = environ['wsgi.input'].read(length)
        data = {}
        for entry in input.split('&'):
            path,text = entry.split('=')
            data[unquote_plus(path)]=unquote_plus(text)
            
        srcDefTree = etree.parse(''.join([cfg['datadir'],'/',data['file'],'/transformations/ipca6transdef/authentic/',data['symbol'],'.xml']))
        
#        root = srcDefTree.getroot()
#        for e in root.xpath('//UND'):
#            dropTag(e)
#        for e in root.xpath('//B'):
#            dropTag(e)
            
        tgtDefTree = deepcopy(srcDefTree)
        for path,text in data.iteritems():
            if path not in ['action','symbol','file','state']:
                xpath,type = path.split(';')
                elt=tgtDefTree.xpath(xpath)[0]
                if type == 'text':
                    elt.text = text.decode("utf-8")
                elif type == 'tail':
                    elt.tail = text.decode("utf-8")
            
        xmlfile = open(''.join([cfg['datadir'],'/',data['file'],'/transformations/ipca6transdef/translation/',data['symbol'],'.xml']),'w')
        xmlfile.write(etree.tostring(tgtDefTree,encoding="utf-8"))
        
        del(srcDefTree)
        del(tgtDefTree)
        
        symbolsFile = '%s/%s/transformations/ipca6transdef/symbols.pic' % (cfg['datadir'],data['file'])
        symbols = cPickle.load(open(symbolsFile))
        for s in symbols:
            if s[2] == data['symbol']:
                s[0] = data['state']
                break
                
        cPickle.dump(symbols,open(symbolsFile,'w'))
        
        output = simplejson.dumps({'success':True});
            
    elif not query.has_key('action'):
        output = simplejson.dumps({'success':True, 'data':_buildEditor(query['symbol'],cfg['datadir'],query['file'])}, ensure_ascii=False)
        
    elif query['action'] == 'getfilelist':
        files = []
        for file in os.listdir(cfg['datadir']):
            if os.path.isdir('%s/%s/transformations/ipca6transdef' % (cfg['datadir'],file)):
                files.append(file)
        output = simplejson.dumps({'success':True, 'data':files})
        
    elif query['action'] == 'getfiles':
        output = simplejson.dumps({'success':True, 'data':_getFiles(cfg['datadir'])})
        
    elif query['action'] == 'getfile':
        output = simplejson.dumps({'success':True, 'data':cPickle.load(open('%s/%s/transformations/ipca6transdef/symbols.pic' % (cfg['datadir'],query['file'])))})
        
    elif query['action'] == 'import':
        s = StringIO()
        s.write(environ['wsgi.input'].read(int(environ.get('CONTENT_LENGTH','0'))))
        s.reset()
        filename = ''
        f = StringIO()
        for l in s.readlines():
            if not (l.startswith('--') or l.startswith('Content-Type:') or l.startswith('undefined')):
                if l.startswith('Content-Disposition:'):
                    d = dict([p.strip().split('=') for p in l.replace('"','').split(';')[1:]])
                    if d.has_key('filename'):
                        filename = d['filename']
                elif l.strip('\n\r'):
                    f.write(l)
        s.close()
        
        if filename.find('\\') > 0 and os.path.sep == '/':
            filename = filename.split('\\')[-1]
        else:
            filename = os.path.basename(filename)
            
        if filename.startswith('ipcr_elayer_definitions_') and filename.endswith('.xml'):
            time = filename.split('.')[0].split('_')[3:]
            dirname = '_'.join(time)
            defdir = '%s/%s/transformations/ipca6transdef' % (cfg['datadir'],dirname)
            if not os.path.isdir('%s/%s' % (defdir,'authentic')):
                os.makedirs('%s/%s' % (defdir,'authentic'))
                os.makedirs('%s/%s' % (defdir,'translation'))
            
#                xmlFile = open('%s/%s' % (defdir,filename),'w')
#                xmlFile.write(f.getvalue())
#                xmlFile = open('%s/%s' % (defdir,filename))
                
                symbols = []
                defTree = etree.fromstring(f.getvalue())
                f.close()
                
                globalDef = None
                for e in defTree.xpath('/IPCDefinitionsSet/IPCDefinitions[@lang="EN"]//IPC-DEFINITION'):
                    if len(e) > 0:
                        symbol = 'global'
                        if e.get('IPC'):
                            symbol = e.getparent().get('symbol')
                            
                        if symbol == 'global':
                            globalDef = ['T','Global','global',dirname]
                        elif len(symbol) > 3:
                            symbols.append(['T',decodeSymbol(symbol),symbol,dirname])
                            
                        file = open('%s/authentic/%s.xml' % (defdir,symbol),'w')
                        file.write(etree.tostring(e,encoding="utf-8"))
                        file.close()
                if globalDef:
                    symbols.insert(0, globalDef)
                
                cPickle.dump(symbols,open('%s/symbols.pic' % defdir,'w'))
                output = simplejson.dumps({'success':True, 'file':dirname, 'files':_getFiles(cfg['datadir'])}, ensure_ascii=False)
            else:
                output = simplejson.dumps({'success':False, 'msg':'File already exists.'}, ensure_ascii=False)
            
        else:
            output = simplejson.dumps({'success':False, 'msg':'Wrong filename:<br/>%s' % filename}, ensure_ascii=False)
        
    elif query['action'] == 'export':
        defdir = '%s/%s' % (cfg['datadir'],query['file'])
        filename = 'ipcr_elayer_definitions_%s.xml' % query['file']
        defTree = etree.parse(open('%s/ipcr_elayer_definitions/%s' % (defdir,filename)))
        for e in defTree.xpath('/IPCDefinitionsSet/IPCDefinitions[@lang="EN"]//IPC-DEFINITION'):
            if len(e) > 0:
                symbol = 'global'
                parent = e.getparent()
                if e.get('IPC'):
                    symbol = parent.get('symbol')
                
                symbfile = '%s/translation/%s.xml' % (defdir,symbol)
                if (symbol == 'global' or len(symbol) > 3) and os.path.isfile(symbfile):
                    symbTree = etree.parse(open(symbfile))
                    parent[parent.index(e)] = symbTree.getroot()
        
        exportdir = '%s/%s/ipcr_elayer_definitions/%s' % (cfg['datadir'],query['file'],cfg['targetLang'])
        if not os.path.isdir(exportdir):
            os.makedirs(exportdir)
        xmlFile = open('%s/%s' % (exportdir,filename),'w')
        xmlFile.write(etree.tostring(defTree,encoding="utf-8", xml_declaration=True))
        xmlFile.close()
        output = simplejson.dumps({'success':True, 'file':filename, 'exportdir':exportdir}, ensure_ascii=False)
            
    elif query['action'] == 'render':
        srcDefTree = etree.parse(''.join([cfg['datadir'],'/',query['file'],'/transformations/ipca6transdef/authentic/',query['symbol'],'.xml']))
        tgtFilePath = os.path.join(cfg['datadir'],query['file'],'transformations','ipca6transdef','translation','.'.join([query['symbol'],'xml']))
        if os.path.isfile(tgtFilePath):
            tgtDefTree = etree.parse(tgtFilePath)
        else:
            tgtDefTree = deepcopy(srcDefTree)
            
        htmlfile = StringIO()
        htmlfile.write('<table width="100%"><tbody><tr><td>')
        _renderDefinition(srcDefTree.getroot(),htmlfile,query['symbol'],fixedTextsTrees,edition=query['file'],lang=cfg['sourceLang'],othLng=' (Authentic language)')
        htmlfile.write('</td><td width="5px">&nbsp</td><td>')
        _renderDefinition(tgtDefTree.getroot(),htmlfile,query['symbol'],fixedTextsTrees,edition=query['file'],lang=cfg['targetLang'],othLng=' (Translation language)')
        htmlfile.write('</td></tr></tbody></table>')
        
        del(srcDefTree)
        del(tgtDefTree)
        output = htmlfile.getvalue()

    response_headers = [('Content-type', 'text/html'),
                        ('Content-Length', str(len(output)))]
    start_response(status, response_headers)

    return output
    
if __name__ == "__main__":
    from wsgiref.simple_server import make_server

    httpd = make_server('', 3838, application)
    print "Serving HTTP on port 3838..."
    
    # Respond to requests until process is killed
    httpd.serve_forever()

