Team:Slovenia/libraries/bibtexparse-js

// modified by iGEM Team Slovenia 2016

/* start zoteroBibtexParse 0.0.13

  https://github.com/apcshields/zotero-bibtex-parse
  • /

// Original work by Henrik Muehe (c) 2010 // // CommonJS port by Mikola Lysenko 2013 // // Port to Browser lib by ORCID / RCPETERS // // Additions and slight changes by apcshields, Jul 2014. // (Some of which bring this back closer to Lysenko's version.) // // Issues: // no comment handling within strings // no string concatenation // no variable values yet // Grammar implemented here: // bibtex -> (string | preamble | comment | entry)*; // string -> '@STRING' '{' key_equals_value '}'; // preamble -> '@PREAMBLE' '{' value '}'; // comment -> '@COMMENT' '{' value '}'; // entry -> '@' key '{' key ',' key_value_list '}'; // key_value_list -> key_equals_value (',' key_equals_value)*; // key_equals_value -> key '=' value; // value -> value_quotes | value_braces | key; // value_quotes -> '"' .*? '"'; // not quite // value_braces -> '{' .*? '"'; // not quite (function (exports) {

   function BibtexParser() {
       this.pos = 0;
       this.input = "";
       this.entries = [];
       this.strings = {  // Added from Mikola Lysenko's bibtex-parser. -APCS
           JAN: "January",
           FEB: "February",
           MAR: "March",
           APR: "April",
           MAY: "May",
           JUN: "June",
           JUL: "July",
           AUG: "August",
           SEP: "September",
           OCT: "October",
           NOV: "November",
           DEC: "December"
       };
       this.currentEntry = "";
       this.setInput = function (t) {
           this.input = t;
       };
       this.getEntries = function () {
           return this.entries;
       };
       this.isWhitespace = function (s) {
           return (s == ' ' || s == '\r' || s == '\t' || s == '\n');
       };
       this.match = function (s, canCommentOut) {
           if (canCommentOut == undefined || canCommentOut == null)
               canCommentOut = true;
           this.skipWhitespace(canCommentOut);
           if (this.input.substring(this.pos, this.pos + s.length) == s) {
               this.pos += s.length;
           } else {
               throw "Token mismatch, expected '" + s + "', found '"
               + this.input.substr(this.pos, s.length) + "'";
           }
           this.skipWhitespace(canCommentOut);
       };
       this.tryMatch = function (s, canCommentOut) {
           if (canCommentOut == undefined || canCommentOut == null)
               canComment = true;
           this.skipWhitespace(canCommentOut);
           return this.input.substring(this.pos, this.pos + s.length) == s;
       };
       /* when search for a match all text can be ignored, not just white space */
       this.matchAt = function () {
           while (this.input.length > this.pos && this.input[this.pos] != '@') {
               this.pos++;
           }
           return this.input[this.pos] == '@';
       };
       this.skipWhitespace = function (canCommentOut) {
           while (this.isWhitespace(this.input[this.pos])) {
               this.pos++;
           }
           if (this.input[this.pos] == "%" && canCommentOut == true) {
               while (this.input[this.pos] != "\n") {
                   this.pos++;
               }
               this.skipWhitespace(canCommentOut);
           }
       };
       this.value_braces = function () {
           var bracecount = 0;
           this.match("{", false);
           var start = this.pos;
           var escaped = false;
           while (true) {
               if (!escaped) {
                   if (this.input[this.pos] == '}') {
                       if (bracecount > 0) {
                           bracecount--;
                       } else {
                           var end = this.pos;
                           this.match("}", false);
                           return this.input.substring(start, end);
                       }
                   } else if (this.input[this.pos] == '{') {
                       bracecount++;
                   } else if (this.pos >= this.input.length - 1) {
                       throw "Unterminated value";
                   }
               }
               escaped = !!(this.input[this.pos] == '\\' && escaped == false);
               /*changed*/
               this.pos++;
           }
       };
       this.value_comment = function () {
           var str = ;
           var brcktCnt = 0;
           while (!(this.tryMatch("}", false) && brcktCnt == 0)) {
               str = str + this.input[this.pos];
               if (this.input[this.pos] == '{')
                   brcktCnt++;
               if (this.input[this.pos] == '}')
                   brcktCnt--;
               if (this.pos >= this.input.length - 1) {
                   throw "Unterminated value:" + this.input.substring(start);
               }
               this.pos++;
           }
           return str;
       };
       this.value_quotes = function () {
           this.match('"', false);
           var start = this.pos;
           var escaped = false;
           while (true) {
               if (!escaped) {
                   if (this.input[this.pos] == '"') {
                       var end = this.pos;
                       this.match('"', false);
                       return this.input.substring(start, end);
                   } else if (this.pos >= this.input.length - 1) {
                       throw "Unterminated value:" + this.input.substring(start);
                   }
               }
               escaped = !!(this.input[this.pos] == '\\' && escaped == false); // changed
               this.pos++;
           }
       };
       this.single_value = function () {
           var start = this.pos;
           if (this.tryMatch("{")) {
               return this.value_braces();
           } else if (this.tryMatch('"')) {
               return this.value_quotes();
           } else {
               var k = this.key();
               if (this.strings[k.toUpperCase()]) { // Added from Mikola Lysenko's bibtex-parser. -APCS
                   return this.strings[k];
               } else if (k.match("^[0-9]+$")) {
                   return k;
               } else {
                   throw "Value expected:" + this.input.substring(start);
               }
           }
       };
       this.value = function () {
           var values = [];
           values.push(this.single_value());
           while (this.tryMatch("#")) {
               this.match("#");
               values.push(this.single_value());
           }
           return latexToUTF8.decodeLatex(values.join(""));
       };
       this.key = function () {
           var start = this.pos;
           while (true) {
               if (this.pos >= this.input.length) {
                   throw "Runaway key";
               }
               if (this.input[this.pos].match(/[a-zA-Z0-9+_:\?\.\/\[\]\-]/)) { // Added question marks to handle Zotero going sideways. -APCS
                   this.pos++;
               } else {
                   return this.input.substring(start, this.pos);
               }
           }
       };
       this.key_equals_value = function () {
           var key = this.key();
           if (this.tryMatch("=")) {
               this.match("=");
               var val = this.value();
               return [key, val];
           } else {
               throw "... = value expected, equals sign missing:"
               + this.input.substring(this.pos);
           }
       };
       this.key_value_list = function () {
           var kv = this.key_equals_value();
           this.currentEntry['entryTags'] = {};
           this.currentEntry['entryTags'][kv[0].toLowerCase()] = kv[1];
           while (this.tryMatch(",")) {
               this.match(",");
               // fixes problems with commas at the end of a list
               if (this.tryMatch("}")) {
                   break;
               }
               kv = this.key_equals_value();
               this.currentEntry['entryTags'][kv[0].toLowerCase()] = kv[1];
           }
       };
       this.entry_body = function (d) {
           this.currentEntry = {};
           this.currentEntry['citationKey'] = this.key();
           this.currentEntry['entryType'] = d.substring(1);
           this.match(",");
           this.key_value_list();
           this.entries.push(this.currentEntry);
       };
       this.directive = function () {
           this.match("@");
           return "@" + this.key();
       };
       this.preamble = function () {
           this.currentEntry = {};
           this.currentEntry['entryType'] = 'PREAMBLE';
           this.currentEntry['entry'] = this.value_comment();
           this.entries.push(this.currentEntry);
       };
       this.comment = function () {
           this.currentEntry = {};
           this.currentEntry['entryType'] = 'COMMENT';
           this.currentEntry['entry'] = this.value_comment();
           this.entries.push(this.currentEntry);
       };
       this.entry = function (d) {
           this.entry_body(d);
       };
       this.bibtex = function () {
           while (this.matchAt()) {
               var d = this.directive();
               this.match("{");
               if (d == "@STRING") {
                   this.string();
               } else if (d == "@PREAMBLE") {
                   this.preamble();
               } else if (d == "@COMMENT") {
                   this.comment();
               } else {
                   this.entry(d);
               }
               this.match("}");
           }
       };
   }
   function LatexToUTF8() {
       this.uniToLatex = {};
       this.latexToUni = {
           "`A": "À", // begin grave
           "`E": "È",
           "`I": "Ì",
           "`O": "Ò",
           "`U": "Ù",
           "`a": "à",
           "`e": "è",
           "`i": "ì",
           "`o": "ò",
           "`u": "ù",
           "\'A": "Á", // begin acute
           "\'E": "É",
           "\'I": "Í",
           "\'O": "Ó",
           "\'U": "Ú",
           "\'Y": "Ý",
           "\'a": "á",
           "\'e": "é",
           "\'i": "í",
           "\'o": "ó",
           "\'u": "ú",
           "\'y": "ý",
           "\"A": "Ä", // begin diaeresis
           "\"E": "Ë",
           "\"I": "Ï",
           "\"O": "Ö",
           "\"U": "Ü",
           "\"a": "ä",
           "\"e": "ë",
           "\"i": "ï",
           "\"o": "ö",
           "\"u": "ü",
           "~A": "Ã", // begin tilde
           "~N": "Ñ",
           "~O": "Õ",
           "~a": "ã",
           "~n": "ñ",
           "~o": "õ",
           "rU": "Ů", // begin ring above
           "ru": "ů",
           "vC": "Č",  // begin caron
           "vD": "Ď",
           "vE": "Ě",
           "vN": "Ň",
           "vR": "Ř",
           "vS": "Š",
           "vT": "Ť",
           "vZ": "Ž",
           "vc": "č",
           "vd": "ď",
           "ve": "ě",
           "vn": "ň",
           "vr": "ř",
           "vs": "š",
           "vt": "ť",
           "vz": "ž",
           "#": "#",  // begin special symbols
           "$": "$",
           "%": "%",
           "&": "&",
           "\\": "\\",
           "^": "^",
           "_": "_",
           "{": "{",
           "}": "}",
           "~": "~",
           "\"": "\"",
           "\'": "’", // closing single quote
           "`": "‘", // opening single quote
           "AA": "Å", // begin non-ASCII letters
           "AE": "Æ",
           "O": "Ø",
           "aa": "å",
           "ae": "æ",
           "o": "ø",
           "ss": "ß",
           "textcopyright": "©",
           "textellipsis": "…",
           "textemdash": "—",
           "textendash": "–",
           "textregistered": "®",
           "texttrademark": "™",
           "alpha": "α", // begin greek alphabet
           "beta": "β",
           "gamma": "γ",
           "delta": "δ",
           "epsilon": "ε",
           "zeta": "ζ",
           "eta": "η",
           "theta": "θ",
           "iota": "ι",
           "kappa": "κ",
           "lambda": "λ",
           "mu": "μ",
           "nu": "ν",
           "xi": "ξ",
           "omicron": "ο",
           "pi": "π",
           "rho": "ρ",
           "sigma": "ς",
           "tau": "σ",
           "upsilon": "τ",
           "phi": "υ",
           "chi": "φ",
           "psi": "χ",
           "omega": "ψ"
       };
       String.prototype.addSlashes = function () {
           // no need to do (str+) anymore because 'this' can only be a string
           return this.replace(/[\\"']/g, '\\$&').replace(/\u0000/g, '\\0');
       };
       for (var idx in this.latexToUni) {
           if (this.latexToUni[idx].length > this.maxLatexLength)
               this.maxLatexLength = this.latexToUni[idx].length;
           this.uniToLatex[this.latexToUni[idx]] = idx;
       }
       this.longestEscapeMatch = function (value, pos) {
           var subStringEnd = pos + 1 + this.maxLatexLength <= value.length ?
           pos + 1 + this.maxLatexLength : value.length;
           var subStr = value.substring(pos + 1, subStringEnd);
           while (subStr.length > 0) {
               if (subStr in this.latexToUni) {
                   break;
               }
               subStr = subStr.substring(0, subStr.length - 1);
           }
           return subStr;
       };
       this.decodeLatex = function (value) {
           var newVal = ;
           var pos = 0;
           while (pos < value.length) {
               if (value[pos] == '\\') {
                   var match = this.longestEscapeMatch(value, pos);
                   if (match.length > 0) {
                       newVal += this.latexToUni[match];
                       pos = pos + 1 + match.length;
                   } else {
                       newVal += value[pos];
                       pos++;
                   }
               } else if (value[pos] == '{' || value[pos] == '}') {
                   pos++;
               } else {
                   newVal += value[pos];
                   pos++;
               }
           }
           return newVal;
       };
       this.encodeLatex = function (value) {
           var trans = ;
           for (var idx = 0; idx < value.length; ++idx) {
               var c = value.charAt(idx);
               if (c in this.uniToLatex)
                   trans += '\\' + this.uniToLatex[c];
               else
                   trans += c;
           }
           return trans;
       };
   }
   var latexToUTF8 = new LatexToUTF8();
   exports.toJSON = function (bibtex) {
       var b = new BibtexParser();
       b.setInput(bibtex);
       b.bibtex();
       return b.entries;
   };
   /* added during hackathon don't hate on me */
   exports.toBibtex = function (json) {
       var out = ;
       for (var i in json) {
           out += "@" + json[i].entryType;
           out += '{';
           if (json[i].citationKey)
               out += json[i].citationKey + ', ';
           if (json[i].entry)
               out += json[i].entry;
           if (json[i].entryTags) {
               var tags = ;
               for (jdx in json[i].entryTags) {
                   if (tags.length != 0)
                       tags += ', ';
                   tags += jdx + '= {' + latexToUTF8.encodeLatex(json[i].entryTags[jdx]) + '}';
               }
               out += tags;
           }
           out += '}\n\n';
       }
       console.log(out);
       return out;
   };

})(typeof exports === 'undefined' ? this['bibtexParse'] = {} : exports);

/* end zoteroBibtexParse */