var oFSO = new ActiveXObject('Scripting.FileSystemObject');

var sStopChars = '>,.!?:;\r\t\n';
var sChars = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
var sUpperChars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ';
var asElements = [
	'Ala', 'Arg', 'Asn', 'Asp', 'Cys', 'Gln', 'Glu', 'Gly', 'His', 'Ile', 'Leu', 'Lys', 
	'Met', 'Phe', 'Pro', 'Ser', 'Thr', 'Trp', 'Tyr', 'Val', 'Uut', 'Uuq', 'Uup', 'Uuh', 
	'Uus', 'Uuo', 

	'Cr', 'Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Cl', 'Ar', 'Ca', 'Sc', 'Ti', 'Si', 'Li', 
	'Zn', 'Ga', 'Ge', 'As', 'Se', 'Br', 'Kr', 'Rb', 'Sr', 'Zr', 'Nb', 'Mo', 'Tc', 
	'Ru', 'Rh', 'Pd', 'Ag', 'Cd', 'In', 'Sn', 'Sb', 'Te', 'Xe', 'Cs', 'Ba', 'Lu', 
	'Hf', 'Ta', 'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg', 'Tl', 'Pb', 'Bi', 'Po', 'At', 
	'Rn', 'Fr', 'Ra', 'Lr', 'Rf', 'Db', 'Sg', 'Bh', 'Hs', 'Mt', 'Ds', 'Rg', 'Uub', 
	'La', 'Ce', 'Pr', 'Nd', 'Pm', 'Sm', 'Eu', 'Gd', 'Be', 'Ne', 'Na', 'Mg', 'Al', 
	'Tb', 'Dy', 'Ho', 'Er', 'Tm', 'Yb', 'Ac', 'Th', 'Pa', 'Np', 'Pu', 'Am', 'Cm', 
	'Bk', 'Cf', 'Es', 'Fm', 'Md', 'No', 'He',
	
	'H', 'B', 'C', 'N', 'O', 'F', 'P', 'S', 'K', 'V', 'Y', 'I', 'W', 'U', 'X' // ???
];
var asCheckSpecTags = [
	'<endash/>', '<emdash/>', '<rlinkt/>', '<llinkt/>', '<sub>', '</sub>', '<rarrow/>', '<larrow/>', '<dbond/>', '<tbond/>', '<llinkthree/>', '<img '
	,'\u2014','<img src="llinkthree.gif"/>','<img src="llinkt.gif"/>','\u2190','\u2261','C='
];
var asSpecTags = [
	'<endash/>', '<emdash/>', '<rlinkt/>', '<llinkt/>', '<sub>', '</sub>', '<rarrow/>', '<larrow/>', '<dbond/>', '<tbond/>', '<llinkthree/>'
	,'\u2014','<img src="llinkthree.gif"/>','<img src="llinkt.gif"/>','\u2190','\u2261','C='
];

var asReplaceTags = [
	['<endash></endash>', '<endash/>'], 
	['<emdash></emdash>', '<emdash/>'], 
	['<rlinkt></rlinkt>', '<rlinkt/>'], 
	['<llinkt></llinkt>', '<llinkt/>'], 
	['<rarrow></rarrow>', '<rarrow/>'], 
	['<larrow></larrow>', '<larrow/>'], 
	['<dbond></dbond>', '<dbond/>'], 
	['<tbond></tbond>', '<tbond/>'], 
	['<llinkthree></llinkthree>', '<llinkthree/>'],
	['<endash\\s*/>', '<endash/>'], 
	['<emdash\\s*/>', '<emdash/>'], 
	['<rlinkt\\s*/>', '<rlinkt/>'], 
	['<llinkt\\s*/>', '<llinkt/>'], 
	['<rarrow\\s*/>', '<rarrow/>'], 
	['<larrow\\s*/>', '<larrow/>'], 
	['<dbond\\s*/>', '<dbond/>'], 
	['<tbond\\s*/>', '<tbond/>'], 
	['<llinkthree\\s*/>', '<llinkthree/>']
];

var giInfIndex = 1;

updateXML();

// [E]-[word]
// <emdash>

function htmlFindTagPair(sHTML, iPos) {
	var i, iTemp;
	var iLevel = 0;

	for (i = iPos; i < sHTML.length; i++) {
		if (sHTML.charAt(i) == '<')	{
			iTemp = sHTML.indexOf('>', i);
			if (sHTML.charAt(iTemp -1) != '/') {
				if (sHTML.charAt(i +1) == '/')
					iLevel--;
				else
					iLevel++;
			}
		}

		if (iLevel == 0) {
			iPos = iTemp +1;
			break;
		}
	}
	return iPos;
}

function updateXML() {
	if (WScript.arguments.length < 1) {
		WScript.echo('Formula hilighter tool v1.0');
		WScript.echo('The given XML file\'s formulae texts will be enhanced with <fig> tags.');
		WScript.echo('Use: cscript addfigs.js [filename]');
	} else {
		var sFileName = getParam(1);
		var sFileName2 = getParam(2);

		log('Sometimes the script wait a lot. Don\'t stop.');
	
		if (oFSO.FileExists(sFileName)) {
			var sText = readUTF8File(sFileName);
			log("File read.");
			var aNewText = updateFile(sText);
			writeUTF8File(sFileName2,aNewText);
			/*
			var oFile = oFSO.OpenTextFile(sFileName, 1, false, false);
			var oOutFile = oFSO.CreateTextFile(sFileName2, true, false);
			updateFile(oFile.ReadAll(), oOutFile);
			oOutFile.Close();
			oFile.Close();
			*/
			log('Done.');
		} else
			log('File not found \'' + sFileName +'\'.');
	}
}

function updateFile(sText) {
	var iLen = sText.length;
	var iStart = 0;
	var iPrevEnd = 0;
	var iPrevPerc = 0;
	
	// 20150915 TA remove namespace
	sText=sText.replace(/xmlns=\"[^\"]+\"/,"");
	var aRes = new Array();
	log("Text size:"+sText.length);
	while (true) {
		var iFmStart = sText.indexOf('<ipcEntry ', iStart);
		if (iFmStart == -1)
			break;
//		log(iLen +' / ' +iFmStart);
		iPerc = Math.round(iFmStart *100 /iLen);
		if (iPerc != iPrevPerc) {
			log(iPerc +'%');
			iPrevPerc = iPerc;
		}
		var nKindAt = sText.indexOf('kind="',iFmStart);
		var cKind = sText.charAt(nKindAt +6);
		
		if (cKind == 'n' || cKind == 'm' || (cKind >= '0' && cKind <= '9')) {
			var iFmEnd = htmlFindTagPair(sText, iFmStart);
			var sTemp = sText.substring(iFmStart, iFmEnd);

			for (var i = 0; i < asReplaceTags.length; i++)
				sTemp = sTemp.replace(new RegExp(asReplaceTags[i][0], 'mig'), asReplaceTags[i][1]);
	
			var bFound = false;
			for (var i = 0; i < asCheckSpecTags.length; i++)
				if (sTemp.indexOf(asCheckSpecTags[i]) != -1) {
					bFound = true;
					break;
				}
	
			if (bFound) {
				sTemp = addFigs(sTemp);
			
				// Replace special formula elements, like: A-acyl
				sTemp = sTemp.replace(
					new RegExp('([A-Z]+[a-z]{0,3})(-[A-Z]?[a-z]*)', 'g'), 
					function ($0, $1, $2) { 
						var bFound = false;
						var sElement = $1;
						var sMatch;
						while (sElement != '') {
							bFound = false;
							
							for (var i = 0; i < asElements.length; i++) {
								var sMatch = asElements[i];
								if (sMatch == sElement.substr(0, sMatch.length)) {
									bFound = true;
									break;
								}
							}
							
							if (!bFound)
								break;
							
							sElement = sElement.substr(sMatch.length);
						}
	
						if (bFound)
							return '<fig>' +$1 +'</fig>' +$2;
						else
							return $1 +$2;
					}
				);
				
				// Replace stand alone uppercase letters
				sTemp = sTemp.replace(
					new RegExp('([ \r\n\t,;\.:!\?])([A-Z]+[a-z]{0,2})([ \r\n\t,;\.:!\?])', 'g'), 
					function ($0, $1, $2, $3) { 
						var bFound = false;
						var sElement = $2;
						var sMatch;
						while (sElement != '') {
							bFound = false;
							
							for (var i = 0; i < asElements.length; i++) {
								var sMatch = asElements[i];
								if (sMatch == sElement.substr(0, sMatch.length)) {
									bFound = true;
									break;
								}
							}
							
							if (!bFound)
								break;
							
							sElement = sElement.substr(sMatch.length);
						}
	
						if (bFound)
							return $1 +'<fig>' +$2 +'</fig>' +$3;
						else
							return $1 +$2 +$3;
					}
				);
				if( sTemp.indexOf("<fig>") > 0 ){
					sTemp = sTemp.replace(/(<\x2ffig>, )([A-Z][a-z]?)([^a-z])/g,"$1<fig>$2</fig>$3");
					sTemp = sTemp.replace(/(section )<fig>(.)<\x2ffig>/g,"$1$2");
				}
				aRes.push(sText.substring(iPrevEnd, iFmStart) +sTemp);
				//oOutFile.Write(sText.substring(iPrevEnd, iFmStart) +sTemp);
				iPrevEnd = iFmEnd;
//				sText = sText.substr(0, iFmStart) +sTemp +sText.substr(iFmEnd);
				iStart = iFmStart +sTemp.length;
			} else
				iStart = iFmEnd;
		} else
			iStart = iFmStart +1;
	}
	if (iPrevEnd < iLen){
		//oOutFile.Write(sText.substr(iPrevEnd));
		aRes.push(sText.substr(iPrevEnd));
	}
	log("Result size:"+aRes.length);
	return aRes;
}

function arrayToString(aArr) {
	if (aArr.length == 0)
		return '';

	while (aArr.length != 1)
		for (i = 0; i < aArr.length -1; i++) {
			aArr[i] += aArr[i +1];
			aArr.splice(i +1, 1);
		}
	
	return aArr[0];
}

// Check and replace text for formula elements. The formula text will be nested inside <fig> tags.
function addFigs(sText) {
	var iStart = 0;
	var bDone = false;
	var iMinPos;
	
	for (iInf = 0; iInf < 2000; iInf++) {
//	while(true) {
		// Get the first formula
		iMinPos = sText.length;
		var iPos, sNewFormula = '';
		for (var i = 0; i < asSpecTags.length; i++) {
			iPos = sText.indexOf(asSpecTags[i], iStart);
			if (iPos != -1 && iPos < iMinPos)
				iMinPos = iPos;
		}
		if (iMinPos == sText.length) {
			bDone = true;
			break;
		}
		
		// Find beginning of the formula
		var iFmStart = 0, cChar;
		for (var t = iMinPos -1; t >= 0; t--) {
			var cChar = sText.charAt(t);
			if (sStopChars.indexOf(cChar) != -1) {
//			if (cChar == '>' || cChar == '\r' || cChar == '\n' || cChar == '\t') {
				iFmStart = t +1;
				break;
			}
			if (cChar == ' ') {
				if (t < 2) {
					iFmStart = t +1;
					break;
				} else {
					var cTemp = sText.charAt(t -1);
					if (sUpperChars.indexOf(cTemp) == -1) {
						iFmStart = t +1;
						break;
					}
				}
			}
		}
		
		// Find the end of formula
		var iFmEnd = 0, iTagStart = -1, sChars = '', bInSub = false;
		for (var t = iFmStart; t < sText.length; t++) {
			if (iTagStart != -1) {
				if (sText.charAt(t) == '>') {
					if (sChars != '') {
						if (bInSub)
							sNewFormula += sChars;
						else
							sNewFormula += '<fig>' +sChars +'</fig>';
					}
				
					var sTag = sText.substring(iTagStart, t +1);

					var bFound = false;
					for (var i = 0; i < asSpecTags.length; i++)
						if (asSpecTags[i] == sTag) {
							bFound = true;
							break;
						}
					if (!bFound)
						iFmEnd = iTagStart;
					else {
						if (sTag == '<sub>')
							bInSub = true;
						if (sTag == '</sub>')
							bInSub = false;
							
						sNewFormula += sTag;
					}

					sChars = '';
					iTagStart = -1;
				}
			} else 
				switch (sText.charAt(t)) {
					case '<':
						iTagStart = t;
						break;
					case ' ':
						if (t >= sText.length -2) {
							iFmEnd = t;
							break;
						} else {
							var cTemp = sText.charAt(t +1);
							if (sUpperChars.indexOf(cTemp) == -1) {
								iFmEnd = t;
								break;
							}
						}
						sChars += sText.charAt(t);
						break;
					case ',':
					case '.':
					case '!':
					case '?':
					case ':':
					case ';':
					case '\r':
					case '\n':
					case '\t':
						iFmEnd = t;
						break;
					default:
						sChars += sText.charAt(t);
						break;
				}
				
			if (iFmEnd != 0)
				break;
		}
		if (sChars != '')
			sNewFormula += '<fig>' +sChars +'</fig>';
		
		sText = sText.substr(0, iFmStart) +sNewFormula +sText.substr(iFmEnd);
		iStart = iFmStart +sNewFormula.length;
	}
	
	if (!bDone) {
		var sTempFile = 'log' +(giInfIndex++) +'.txt';
		var oTempFile = oFSO.CreateTextFile(sTempFile, true, false);
		oTempFile.Write(sText);
		oTempFile.Close();
		log('Infinite loop detected. Logged into \'' +sTempFile +'\'! Skip this.');
	}
	
	return sText;
}

function expandZeros(iVal, iSize) {
	var sReturn = String(iVal);
	var sZeros = '';
	for (var i = iSize -sReturn.length; i > 0; i--)
		sZeros += '0';
	return sZeros +sReturn;
}

function log(sLog) {
	var oDate = new Date();
	WScript.echo(
		 oDate.getYear()
		 +'.' +expandZeros(oDate.getMonth(), 2)
		 +'.' +expandZeros(oDate.getDate(), 2)
		 +' ' +expandZeros(oDate.getHours() ,2)
		 +':' +expandZeros(oDate.getMinutes(), 2)
		 +':' +expandZeros(oDate.getSeconds(), 2)
		 +' - ' +sLog)
} // log

function getParam(oIndex) {
	var sPara, aPara, sRes = '';
	if (isNaN(oIndex)) {
		var sIndex = String(oIndex);
		for(var i = 0; i < WScript.arguments.length; i++) {
			aPara = WScript.arguments(i).toLowerCase().split('=');
			sPara = aPara[0];
			if (sPara.charAt(0) == '-' && sPara.substr(1) == sIndex) {
				sRes = aPara[1];
				break;
			}
		}
	} else {
		var iIndex = Number(oIndex);
		var t = 1;
		for(var i = 0; i < WScript.arguments.length; i++) {
			sPara = WScript.arguments(i).toLowerCase();
			if (sPara.charAt(0) != '-') {
				if (iIndex == t) {
					sRes = sPara;
					break;
				}
				t++
			}
		}
	}

	return sRes;
}

function readUTF8File(f) 
{
    var stm;
    var buf;
    
    stm = new ActiveXObject("ADODB.Stream");
    log("Open "+f);
        stm.Open();
        stm.Type = 2; //adTypeText;
        stm.Position = 0;
        stm.Charset = "UTF-8";
    log("Loadfromfile "+f);
        stm.LoadFromFile(f)
    var buffSize=65536;
    var aResult = new Array();
    while(true){
        buf = stm.ReadText(buffSize);
        aResult.push(buf);
        if( buf.length != buffSize)
        	break;
    }
    log("Close "+f);
        stm.Close();
    log("Joining");
    buf = aResult.join('');
    return buf;
}


function writeUTF8File(f,aText)
{
    var stm1;
    var stm2;
    var s;
    var bbuf;
    var i;
    
    stm1 = new ActiveXObject("ADODB.Stream");
    stm2 = new ActiveXObject("ADODB.Stream");
    
    log("Save");
        stm1.Open();
        stm1.Type = 2; // adTypeText
        stm1.Position = 0; 
        stm1.Charset = "UTF-8";
        for(var i in aText){
        	stm1.WriteText(aText[i]);
        }
        stm1.SaveToFile(f, 2); //adSaveCreateOverWrite
        stm1.Close();
    
    log("Reopen for BOM");
        stm2.Open();
        stm2.Type = 1; //adTypeBinary
        stm2.Position = 0;
    
    
        stm1.Open()
        stm1.Position = 0;
        stm1.Type = 1; //adTypeBinary
        stm1.LoadFromFile(f);
        bbuf = stm1.Read();
        stm1.Position = 3;
    log("Copy without BOM");
        stm1.CopyTo(stm2);
        stm1.Close();
    
        stm2.SaveToFile(f, 2); //adSaveCreateOverWrite
        stm2.Close();
    
    
}