www

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README

Parser.js (29594B)


      1 /* eslint no-constant-condition:0 */
      2 const functions = require("./functions");
      3 const environments = require("./environments");
      4 const MacroExpander = require("./MacroExpander");
      5 const symbols = require("./symbols");
      6 const utils = require("./utils");
      7 const cjkRegex = require("./unicodeRegexes").cjkRegex;
      8 
      9 const parseData = require("./parseData");
     10 const ParseError = require("./ParseError");
     11 
     12 /**
     13  * This file contains the parser used to parse out a TeX expression from the
     14  * input. Since TeX isn't context-free, standard parsers don't work particularly
     15  * well.
     16  *
     17  * The strategy of this parser is as such:
     18  *
     19  * The main functions (the `.parse...` ones) take a position in the current
     20  * parse string to parse tokens from. The lexer (found in Lexer.js, stored at
     21  * this.lexer) also supports pulling out tokens at arbitrary places. When
     22  * individual tokens are needed at a position, the lexer is called to pull out a
     23  * token, which is then used.
     24  *
     25  * The parser has a property called "mode" indicating the mode that
     26  * the parser is currently in. Currently it has to be one of "math" or
     27  * "text", which denotes whether the current environment is a math-y
     28  * one or a text-y one (e.g. inside \text). Currently, this serves to
     29  * limit the functions which can be used in text mode.
     30  *
     31  * The main functions then return an object which contains the useful data that
     32  * was parsed at its given point, and a new position at the end of the parsed
     33  * data. The main functions can call each other and continue the parsing by
     34  * using the returned position as a new starting point.
     35  *
     36  * There are also extra `.handle...` functions, which pull out some reused
     37  * functionality into self-contained functions.
     38  *
     39  * The earlier functions return ParseNodes.
     40  * The later functions (which are called deeper in the parse) sometimes return
     41  * ParseFuncOrArgument, which contain a ParseNode as well as some data about
     42  * whether the parsed object is a function which is missing some arguments, or a
     43  * standalone object which can be used as an argument to another function.
     44  */
     45 
     46 /**
     47  * Main Parser class
     48  */
     49 function Parser(input, settings) {
     50     // Create a new macro expander (gullet) and (indirectly via that) also a
     51     // new lexer (mouth) for this parser (stomach, in the language of TeX)
     52     this.gullet = new MacroExpander(input, settings.macros);
     53     // Store the settings for use in parsing
     54     this.settings = settings;
     55     // Count leftright depth (for \middle errors)
     56     this.leftrightDepth = 0;
     57 }
     58 
     59 const ParseNode = parseData.ParseNode;
     60 
     61 /**
     62  * An initial function (without its arguments), or an argument to a function.
     63  * The `result` argument should be a ParseNode.
     64  */
     65 function ParseFuncOrArgument(result, isFunction, token) {
     66     this.result = result;
     67     // Is this a function (i.e. is it something defined in functions.js)?
     68     this.isFunction = isFunction;
     69     this.token = token;
     70 }
     71 
     72 /**
     73  * Checks a result to make sure it has the right type, and throws an
     74  * appropriate error otherwise.
     75  *
     76  * @param {boolean=} consume whether to consume the expected token,
     77  *                           defaults to true
     78  */
     79 Parser.prototype.expect = function(text, consume) {
     80     if (this.nextToken.text !== text) {
     81         throw new ParseError(
     82             "Expected '" + text + "', got '" + this.nextToken.text + "'",
     83             this.nextToken
     84         );
     85     }
     86     if (consume !== false) {
     87         this.consume();
     88     }
     89 };
     90 
     91 /**
     92  * Considers the current look ahead token as consumed,
     93  * and fetches the one after that as the new look ahead.
     94  */
     95 Parser.prototype.consume = function() {
     96     this.nextToken = this.gullet.get(this.mode === "math");
     97 };
     98 
     99 Parser.prototype.switchMode = function(newMode) {
    100     this.gullet.unget(this.nextToken);
    101     this.mode = newMode;
    102     this.consume();
    103 };
    104 
    105 /**
    106  * Main parsing function, which parses an entire input.
    107  *
    108  * @return {?Array.<ParseNode>}
    109  */
    110 Parser.prototype.parse = function() {
    111     // Try to parse the input
    112     this.mode = "math";
    113     this.consume();
    114     const parse = this.parseInput();
    115     return parse;
    116 };
    117 
    118 /**
    119  * Parses an entire input tree.
    120  */
    121 Parser.prototype.parseInput = function() {
    122     // Parse an expression
    123     const expression = this.parseExpression(false);
    124     // If we succeeded, make sure there's an EOF at the end
    125     this.expect("EOF", false);
    126     return expression;
    127 };
    128 
    129 const endOfExpression = ["}", "\\end", "\\right", "&", "\\\\", "\\cr"];
    130 
    131 /**
    132  * Parses an "expression", which is a list of atoms.
    133  *
    134  * @param {boolean} breakOnInfix  Should the parsing stop when we hit infix
    135  *                  nodes? This happens when functions have higher precendence
    136  *                  than infix nodes in implicit parses.
    137  *
    138  * @param {?string} breakOnTokenText  The text of the token that the expression
    139  *                  should end with, or `null` if something else should end the
    140  *                  expression.
    141  *
    142  * @return {ParseNode}
    143  */
    144 Parser.prototype.parseExpression = function(breakOnInfix, breakOnTokenText) {
    145     const body = [];
    146     // Keep adding atoms to the body until we can't parse any more atoms (either
    147     // we reached the end, a }, or a \right)
    148     while (true) {
    149         const lex = this.nextToken;
    150         if (endOfExpression.indexOf(lex.text) !== -1) {
    151             break;
    152         }
    153         if (breakOnTokenText && lex.text === breakOnTokenText) {
    154             break;
    155         }
    156         if (breakOnInfix && functions[lex.text] && functions[lex.text].infix) {
    157             break;
    158         }
    159         const atom = this.parseAtom();
    160         if (!atom) {
    161             if (!this.settings.throwOnError && lex.text[0] === "\\") {
    162                 const errorNode = this.handleUnsupportedCmd();
    163                 body.push(errorNode);
    164                 continue;
    165             }
    166 
    167             break;
    168         }
    169         body.push(atom);
    170     }
    171     return this.handleInfixNodes(body);
    172 };
    173 
    174 /**
    175  * Rewrites infix operators such as \over with corresponding commands such
    176  * as \frac.
    177  *
    178  * There can only be one infix operator per group.  If there's more than one
    179  * then the expression is ambiguous.  This can be resolved by adding {}.
    180  *
    181  * @returns {Array}
    182  */
    183 Parser.prototype.handleInfixNodes = function(body) {
    184     let overIndex = -1;
    185     let funcName;
    186 
    187     for (let i = 0; i < body.length; i++) {
    188         const node = body[i];
    189         if (node.type === "infix") {
    190             if (overIndex !== -1) {
    191                 throw new ParseError(
    192                     "only one infix operator per group",
    193                     node.value.token);
    194             }
    195             overIndex = i;
    196             funcName = node.value.replaceWith;
    197         }
    198     }
    199 
    200     if (overIndex !== -1) {
    201         let numerNode;
    202         let denomNode;
    203 
    204         const numerBody = body.slice(0, overIndex);
    205         const denomBody = body.slice(overIndex + 1);
    206 
    207         if (numerBody.length === 1 && numerBody[0].type === "ordgroup") {
    208             numerNode = numerBody[0];
    209         } else {
    210             numerNode = new ParseNode("ordgroup", numerBody, this.mode);
    211         }
    212 
    213         if (denomBody.length === 1 && denomBody[0].type === "ordgroup") {
    214             denomNode = denomBody[0];
    215         } else {
    216             denomNode = new ParseNode("ordgroup", denomBody, this.mode);
    217         }
    218 
    219         const value = this.callFunction(
    220             funcName, [numerNode, denomNode], null);
    221         return [new ParseNode(value.type, value, this.mode)];
    222     } else {
    223         return body;
    224     }
    225 };
    226 
    227 // The greediness of a superscript or subscript
    228 const SUPSUB_GREEDINESS = 1;
    229 
    230 /**
    231  * Handle a subscript or superscript with nice errors.
    232  */
    233 Parser.prototype.handleSupSubscript = function(name) {
    234     const symbolToken = this.nextToken;
    235     const symbol = symbolToken.text;
    236     this.consume();
    237     const group = this.parseGroup();
    238 
    239     if (!group) {
    240         if (!this.settings.throwOnError && this.nextToken.text[0] === "\\") {
    241             return this.handleUnsupportedCmd();
    242         } else {
    243             throw new ParseError(
    244                 "Expected group after '" + symbol + "'",
    245                 symbolToken
    246             );
    247         }
    248     } else if (group.isFunction) {
    249         // ^ and _ have a greediness, so handle interactions with functions'
    250         // greediness
    251         const funcGreediness = functions[group.result].greediness;
    252         if (funcGreediness > SUPSUB_GREEDINESS) {
    253             return this.parseFunction(group);
    254         } else {
    255             throw new ParseError(
    256                 "Got function '" + group.result + "' with no arguments " +
    257                     "as " + name, symbolToken);
    258         }
    259     } else {
    260         return group.result;
    261     }
    262 };
    263 
    264 /**
    265  * Converts the textual input of an unsupported command into a text node
    266  * contained within a color node whose color is determined by errorColor
    267  */
    268 Parser.prototype.handleUnsupportedCmd = function() {
    269     const text = this.nextToken.text;
    270     const textordArray = [];
    271 
    272     for (let i = 0; i < text.length; i++) {
    273         textordArray.push(new ParseNode("textord", text[i], "text"));
    274     }
    275 
    276     const textNode = new ParseNode(
    277         "text",
    278         {
    279             body: textordArray,
    280             type: "text",
    281         },
    282         this.mode);
    283 
    284     const colorNode = new ParseNode(
    285         "color",
    286         {
    287             color: this.settings.errorColor,
    288             value: [textNode],
    289             type: "color",
    290         },
    291         this.mode);
    292 
    293     this.consume();
    294     return colorNode;
    295 };
    296 
    297 /**
    298  * Parses a group with optional super/subscripts.
    299  *
    300  * @return {?ParseNode}
    301  */
    302 Parser.prototype.parseAtom = function() {
    303     // The body of an atom is an implicit group, so that things like
    304     // \left(x\right)^2 work correctly.
    305     const base = this.parseImplicitGroup();
    306 
    307     // In text mode, we don't have superscripts or subscripts
    308     if (this.mode === "text") {
    309         return base;
    310     }
    311 
    312     // Note that base may be empty (i.e. null) at this point.
    313 
    314     let superscript;
    315     let subscript;
    316     while (true) {
    317         // Lex the first token
    318         const lex = this.nextToken;
    319 
    320         if (lex.text === "\\limits" || lex.text === "\\nolimits") {
    321             // We got a limit control
    322             if (!base || base.type !== "op") {
    323                 throw new ParseError(
    324                     "Limit controls must follow a math operator",
    325                     lex);
    326             } else {
    327                 const limits = lex.text === "\\limits";
    328                 base.value.limits = limits;
    329                 base.value.alwaysHandleSupSub = true;
    330             }
    331             this.consume();
    332         } else if (lex.text === "^") {
    333             // We got a superscript start
    334             if (superscript) {
    335                 throw new ParseError("Double superscript", lex);
    336             }
    337             superscript = this.handleSupSubscript("superscript");
    338         } else if (lex.text === "_") {
    339             // We got a subscript start
    340             if (subscript) {
    341                 throw new ParseError("Double subscript", lex);
    342             }
    343             subscript = this.handleSupSubscript("subscript");
    344         } else if (lex.text === "'") {
    345             // We got a prime
    346             if (superscript) {
    347                 throw new ParseError("Double superscript", lex);
    348             }
    349             const prime = new ParseNode("textord", "\\prime", this.mode);
    350 
    351             // Many primes can be grouped together, so we handle this here
    352             const primes = [prime];
    353             this.consume();
    354             // Keep lexing tokens until we get something that's not a prime
    355             while (this.nextToken.text === "'") {
    356                 // For each one, add another prime to the list
    357                 primes.push(prime);
    358                 this.consume();
    359             }
    360             // If there's a superscript following the primes, combine that
    361             // superscript in with the primes.
    362             if (this.nextToken.text === "^") {
    363                 primes.push(this.handleSupSubscript("superscript"));
    364             }
    365             // Put everything into an ordgroup as the superscript
    366             superscript = new ParseNode("ordgroup", primes, this.mode);
    367         } else {
    368             // If it wasn't ^, _, or ', stop parsing super/subscripts
    369             break;
    370         }
    371     }
    372 
    373     if (superscript || subscript) {
    374         // If we got either a superscript or subscript, create a supsub
    375         return new ParseNode("supsub", {
    376             base: base,
    377             sup: superscript,
    378             sub: subscript,
    379         }, this.mode);
    380     } else {
    381         // Otherwise return the original body
    382         return base;
    383     }
    384 };
    385 
    386 // A list of the size-changing functions, for use in parseImplicitGroup
    387 const sizeFuncs = [
    388     "\\tiny", "\\scriptsize", "\\footnotesize", "\\small", "\\normalsize",
    389     "\\large", "\\Large", "\\LARGE", "\\huge", "\\Huge",
    390 ];
    391 
    392 // A list of the style-changing functions, for use in parseImplicitGroup
    393 const styleFuncs = [
    394     "\\displaystyle", "\\textstyle", "\\scriptstyle", "\\scriptscriptstyle",
    395 ];
    396 
    397 // Old font functions
    398 const oldFontFuncs = {
    399     "\\rm": "mathrm",
    400     "\\sf": "mathsf",
    401     "\\tt": "mathtt",
    402     "\\bf": "mathbf",
    403     "\\it": "mathit",
    404     //"\\sl": "textsl",
    405     //"\\sc": "textsc",
    406 };
    407 
    408 /**
    409  * Parses an implicit group, which is a group that starts at the end of a
    410  * specified, and ends right before a higher explicit group ends, or at EOL. It
    411  * is used for functions that appear to affect the current style, like \Large or
    412  * \textrm, where instead of keeping a style we just pretend that there is an
    413  * implicit grouping after it until the end of the group. E.g.
    414  *   small text {\Large large text} small text again
    415  * It is also used for \left and \right to get the correct grouping.
    416  *
    417  * @return {?ParseNode}
    418  */
    419 Parser.prototype.parseImplicitGroup = function() {
    420     const start = this.parseSymbol();
    421 
    422     if (start == null) {
    423         // If we didn't get anything we handle, fall back to parseFunction
    424         return this.parseFunction();
    425     }
    426 
    427     const func = start.result;
    428 
    429     if (func === "\\left") {
    430         // If we see a left:
    431         // Parse the entire left function (including the delimiter)
    432         const left = this.parseFunction(start);
    433         // Parse out the implicit body
    434         ++this.leftrightDepth;
    435         const body = this.parseExpression(false);
    436         --this.leftrightDepth;
    437         // Check the next token
    438         this.expect("\\right", false);
    439         const right = this.parseFunction();
    440         return new ParseNode("leftright", {
    441             body: body,
    442             left: left.value.value,
    443             right: right.value.value,
    444         }, this.mode);
    445     } else if (func === "\\begin") {
    446         // begin...end is similar to left...right
    447         const begin = this.parseFunction(start);
    448         const envName = begin.value.name;
    449         if (!environments.hasOwnProperty(envName)) {
    450             throw new ParseError(
    451                 "No such environment: " + envName, begin.value.nameGroup);
    452         }
    453         // Build the environment object. Arguments and other information will
    454         // be made available to the begin and end methods using properties.
    455         const env = environments[envName];
    456         const args = this.parseArguments("\\begin{" + envName + "}", env);
    457         const context = {
    458             mode: this.mode,
    459             envName: envName,
    460             parser: this,
    461             positions: args.pop(),
    462         };
    463         const result = env.handler(context, args);
    464         this.expect("\\end", false);
    465         const endNameToken = this.nextToken;
    466         const end = this.parseFunction();
    467         if (end.value.name !== envName) {
    468             throw new ParseError(
    469                 "Mismatch: \\begin{" + envName + "} matched " +
    470                 "by \\end{" + end.value.name + "}",
    471                 endNameToken);
    472         }
    473         result.position = end.position;
    474         return result;
    475     } else if (utils.contains(sizeFuncs, func)) {
    476         // If we see a sizing function, parse out the implicit body
    477         this.consumeSpaces();
    478         const body = this.parseExpression(false);
    479         return new ParseNode("sizing", {
    480             // Figure out what size to use based on the list of functions above
    481             size: "size" + (utils.indexOf(sizeFuncs, func) + 1),
    482             value: body,
    483         }, this.mode);
    484     } else if (utils.contains(styleFuncs, func)) {
    485         // If we see a styling function, parse out the implicit body
    486         this.consumeSpaces();
    487         const body = this.parseExpression(true);
    488         return new ParseNode("styling", {
    489             // Figure out what style to use by pulling out the style from
    490             // the function name
    491             style: func.slice(1, func.length - 5),
    492             value: body,
    493         }, this.mode);
    494     } else if (func in oldFontFuncs) {
    495         const style = oldFontFuncs[func];
    496         // If we see an old font function, parse out the implicit body
    497         this.consumeSpaces();
    498         const body = this.parseExpression(true);
    499         if (style.slice(0, 4) === 'text') {
    500             return new ParseNode("text", {
    501                 style: style,
    502                 body: new ParseNode("ordgroup", body, this.mode),
    503             }, this.mode);
    504         } else {
    505             return new ParseNode("font", {
    506                 font: style,
    507                 body: new ParseNode("ordgroup", body, this.mode),
    508             }, this.mode);
    509         }
    510     } else {
    511         // Defer to parseFunction if it's not a function we handle
    512         return this.parseFunction(start);
    513     }
    514 };
    515 
    516 /**
    517  * Parses an entire function, including its base and all of its arguments.
    518  * The base might either have been parsed already, in which case
    519  * it is provided as an argument, or it's the next group in the input.
    520  *
    521  * @param {ParseFuncOrArgument=} baseGroup optional as described above
    522  * @return {?ParseNode}
    523  */
    524 Parser.prototype.parseFunction = function(baseGroup) {
    525     if (!baseGroup) {
    526         baseGroup = this.parseGroup();
    527     }
    528 
    529     if (baseGroup) {
    530         if (baseGroup.isFunction) {
    531             const func = baseGroup.result;
    532             const funcData = functions[func];
    533             if (this.mode === "text" && !funcData.allowedInText) {
    534                 throw new ParseError(
    535                     "Can't use function '" + func + "' in text mode",
    536                     baseGroup.token);
    537             }
    538 
    539             const args = this.parseArguments(func, funcData);
    540             const token = baseGroup.token;
    541             const result = this.callFunction(func, args, args.pop(), token);
    542             return new ParseNode(result.type, result, this.mode);
    543         } else {
    544             return baseGroup.result;
    545         }
    546     } else {
    547         return null;
    548     }
    549 };
    550 
    551 /**
    552  * Call a function handler with a suitable context and arguments.
    553  */
    554 Parser.prototype.callFunction = function(name, args, positions, token) {
    555     const context = {
    556         funcName: name,
    557         parser: this,
    558         positions: positions,
    559         token: token,
    560     };
    561     return functions[name].handler(context, args);
    562 };
    563 
    564 /**
    565  * Parses the arguments of a function or environment
    566  *
    567  * @param {string} func  "\name" or "\begin{name}"
    568  * @param {{numArgs:number,numOptionalArgs:number|undefined}} funcData
    569  * @return the array of arguments, with the list of positions as last element
    570  */
    571 Parser.prototype.parseArguments = function(func, funcData) {
    572     const totalArgs = funcData.numArgs + funcData.numOptionalArgs;
    573     if (totalArgs === 0) {
    574         return [[this.pos]];
    575     }
    576 
    577     const baseGreediness = funcData.greediness;
    578     const positions = [this.pos];
    579     const args = [];
    580 
    581     for (let i = 0; i < totalArgs; i++) {
    582         const nextToken = this.nextToken;
    583         const argType = funcData.argTypes && funcData.argTypes[i];
    584         let arg;
    585         if (i < funcData.numOptionalArgs) {
    586             if (argType) {
    587                 arg = this.parseGroupOfType(argType, true);
    588             } else {
    589                 arg = this.parseGroup(true);
    590             }
    591             if (!arg) {
    592                 args.push(null);
    593                 positions.push(this.pos);
    594                 continue;
    595             }
    596         } else {
    597             if (argType) {
    598                 arg = this.parseGroupOfType(argType);
    599             } else {
    600                 arg = this.parseGroup();
    601             }
    602             if (!arg) {
    603                 if (!this.settings.throwOnError &&
    604                     this.nextToken.text[0] === "\\") {
    605                     arg = new ParseFuncOrArgument(
    606                         this.handleUnsupportedCmd(this.nextToken.text),
    607                         false);
    608                 } else {
    609                     throw new ParseError(
    610                         "Expected group after '" + func + "'", nextToken);
    611                 }
    612             }
    613         }
    614         let argNode;
    615         if (arg.isFunction) {
    616             const argGreediness =
    617                 functions[arg.result].greediness;
    618             if (argGreediness > baseGreediness) {
    619                 argNode = this.parseFunction(arg);
    620             } else {
    621                 throw new ParseError(
    622                     "Got function '" + arg.result + "' as " +
    623                     "argument to '" + func + "'", nextToken);
    624             }
    625         } else {
    626             argNode = arg.result;
    627         }
    628         args.push(argNode);
    629         positions.push(this.pos);
    630     }
    631 
    632     args.push(positions);
    633 
    634     return args;
    635 };
    636 
    637 
    638 /**
    639  * Parses a group when the mode is changing.
    640  *
    641  * @return {?ParseFuncOrArgument}
    642  */
    643 Parser.prototype.parseGroupOfType = function(innerMode, optional) {
    644     const outerMode = this.mode;
    645     // Handle `original` argTypes
    646     if (innerMode === "original") {
    647         innerMode = outerMode;
    648     }
    649 
    650     if (innerMode === "color") {
    651         return this.parseColorGroup(optional);
    652     }
    653     if (innerMode === "size") {
    654         return this.parseSizeGroup(optional);
    655     }
    656 
    657     this.switchMode(innerMode);
    658     if (innerMode === "text") {
    659         // text mode is special because it should ignore the whitespace before
    660         // it
    661         this.consumeSpaces();
    662     }
    663     // By the time we get here, innerMode is one of "text" or "math".
    664     // We switch the mode of the parser, recurse, then restore the old mode.
    665     const res = this.parseGroup(optional);
    666     this.switchMode(outerMode);
    667     return res;
    668 };
    669 
    670 Parser.prototype.consumeSpaces = function() {
    671     while (this.nextToken.text === " ") {
    672         this.consume();
    673     }
    674 };
    675 
    676 /**
    677  * Parses a group, essentially returning the string formed by the
    678  * brace-enclosed tokens plus some position information.
    679  *
    680  * @param {string} modeName  Used to describe the mode in error messages
    681  * @param {boolean=} optional  Whether the group is optional or required
    682  */
    683 Parser.prototype.parseStringGroup = function(modeName, optional) {
    684     if (optional && this.nextToken.text !== "[") {
    685         return null;
    686     }
    687     const outerMode = this.mode;
    688     this.mode = "text";
    689     this.expect(optional ? "[" : "{");
    690     let str = "";
    691     const firstToken = this.nextToken;
    692     let lastToken = firstToken;
    693     while (this.nextToken.text !== (optional ? "]" : "}")) {
    694         if (this.nextToken.text === "EOF") {
    695             throw new ParseError(
    696                 "Unexpected end of input in " + modeName,
    697                 firstToken.range(this.nextToken, str));
    698         }
    699         lastToken = this.nextToken;
    700         str += lastToken.text;
    701         this.consume();
    702     }
    703     this.mode = outerMode;
    704     this.expect(optional ? "]" : "}");
    705     return firstToken.range(lastToken, str);
    706 };
    707 
    708 /**
    709  * Parses a regex-delimited group: the largest sequence of tokens
    710  * whose concatenated strings match `regex`. Returns the string
    711  * formed by the tokens plus some position information.
    712  *
    713  * @param {RegExp} regex
    714  * @param {string} modeName  Used to describe the mode in error messages
    715  */
    716 Parser.prototype.parseRegexGroup = function(regex, modeName) {
    717     const outerMode = this.mode;
    718     this.mode = "text";
    719     const firstToken = this.nextToken;
    720     let lastToken = firstToken;
    721     let str = "";
    722     while (this.nextToken.text !== "EOF"
    723            && regex.test(str + this.nextToken.text)) {
    724         lastToken = this.nextToken;
    725         str += lastToken.text;
    726         this.consume();
    727     }
    728     if (str === "") {
    729         throw new ParseError(
    730             "Invalid " + modeName + ": '" + firstToken.text + "'",
    731             firstToken);
    732     }
    733     this.mode = outerMode;
    734     return firstToken.range(lastToken, str);
    735 };
    736 
    737 /**
    738  * Parses a color description.
    739  */
    740 Parser.prototype.parseColorGroup = function(optional) {
    741     const res = this.parseStringGroup("color", optional);
    742     if (!res) {
    743         return null;
    744     }
    745     const match = (/^(#[a-z0-9]+|[a-z]+)$/i).exec(res.text);
    746     if (!match) {
    747         throw new ParseError("Invalid color: '" + res.text + "'", res);
    748     }
    749     return new ParseFuncOrArgument(
    750         new ParseNode("color", match[0], this.mode),
    751         false);
    752 };
    753 
    754 /**
    755  * Parses a size specification, consisting of magnitude and unit.
    756  */
    757 Parser.prototype.parseSizeGroup = function(optional) {
    758     let res;
    759     if (!optional && this.nextToken.text !== "{") {
    760         res = this.parseRegexGroup(
    761             /^[-+]? *(?:$|\d+|\d+\.\d*|\.\d*) *[a-z]{0,2}$/, "size");
    762     } else {
    763         res = this.parseStringGroup("size", optional);
    764     }
    765     if (!res) {
    766         return null;
    767     }
    768     const match = (/([-+]?) *(\d+(?:\.\d*)?|\.\d+) *([a-z]{2})/).exec(res.text);
    769     if (!match) {
    770         throw new ParseError("Invalid size: '" + res.text + "'", res);
    771     }
    772     const data = {
    773         number: +(match[1] + match[2]), // sign + magnitude, cast to number
    774         unit: match[3],
    775     };
    776     if (data.unit !== "em" && data.unit !== "ex" && data.unit !== "mu") {
    777         throw new ParseError("Invalid unit: '" + data.unit + "'", res);
    778     }
    779     return new ParseFuncOrArgument(
    780         new ParseNode("color", data, this.mode),
    781         false);
    782 };
    783 
    784 /**
    785  * If the argument is false or absent, this parses an ordinary group,
    786  * which is either a single nucleus (like "x") or an expression
    787  * in braces (like "{x+y}").
    788  * If the argument is true, it parses either a bracket-delimited expression
    789  * (like "[x+y]") or returns null to indicate the absence of a
    790  * bracket-enclosed group.
    791  *
    792  * @param {boolean=} optional  Whether the group is optional or required
    793  * @return {?ParseFuncOrArgument}
    794  */
    795 Parser.prototype.parseGroup = function(optional) {
    796     const firstToken = this.nextToken;
    797     // Try to parse an open brace
    798     if (this.nextToken.text === (optional ? "[" : "{")) {
    799         // If we get a brace, parse an expression
    800         this.consume();
    801         const expression = this.parseExpression(false, optional ? "]" : null);
    802         const lastToken = this.nextToken;
    803         // Make sure we get a close brace
    804         this.expect(optional ? "]" : "}");
    805         if (this.mode === "text") {
    806             this.formLigatures(expression);
    807         }
    808         return new ParseFuncOrArgument(
    809             new ParseNode("ordgroup", expression, this.mode,
    810                           firstToken, lastToken),
    811             false);
    812     } else {
    813         // Otherwise, just return a nucleus, or nothing for an optional group
    814         return optional ? null : this.parseSymbol();
    815     }
    816 };
    817 
    818 /**
    819  * Form ligature-like combinations of characters for text mode.
    820  * This includes inputs like "--", "---", "``" and "''".
    821  * The result will simply replace multiple textord nodes with a single
    822  * character in each value by a single textord node having multiple
    823  * characters in its value.  The representation is still ASCII source.
    824  *
    825  * @param {Array.<ParseNode>} group  the nodes of this group,
    826  *                                   list will be moified in place
    827  */
    828 Parser.prototype.formLigatures = function(group) {
    829     let n = group.length - 1;
    830     for (let i = 0; i < n; ++i) {
    831         const a = group[i];
    832         const v = a.value;
    833         if (v === "-" && group[i + 1].value === "-") {
    834             if (i + 1 < n && group[i + 2].value === "-") {
    835                 group.splice(i, 3, new ParseNode(
    836                     "textord", "---", "text", a, group[i + 2]));
    837                 n -= 2;
    838             } else {
    839                 group.splice(i, 2, new ParseNode(
    840                     "textord", "--", "text", a, group[i + 1]));
    841                 n -= 1;
    842             }
    843         }
    844         if ((v === "'" || v === "`") && group[i + 1].value === v) {
    845             group.splice(i, 2, new ParseNode(
    846                 "textord", v + v, "text", a, group[i + 1]));
    847             n -= 1;
    848         }
    849     }
    850 };
    851 
    852 /**
    853  * Parse a single symbol out of the string. Here, we handle both the functions
    854  * we have defined, as well as the single character symbols
    855  *
    856  * @return {?ParseFuncOrArgument}
    857  */
    858 Parser.prototype.parseSymbol = function() {
    859     const nucleus = this.nextToken;
    860 
    861     if (functions[nucleus.text]) {
    862         this.consume();
    863         // If there exists a function with this name, we return the function and
    864         // say that it is a function.
    865         return new ParseFuncOrArgument(
    866             nucleus.text,
    867             true, nucleus);
    868     } else if (symbols[this.mode][nucleus.text]) {
    869         this.consume();
    870         // Otherwise if this is a no-argument function, find the type it
    871         // corresponds to in the symbols map
    872         return new ParseFuncOrArgument(
    873             new ParseNode(symbols[this.mode][nucleus.text].group,
    874                           nucleus.text, this.mode, nucleus),
    875             false, nucleus);
    876     } else if (this.mode === "text" && cjkRegex.test(nucleus.text)) {
    877         this.consume();
    878         return new ParseFuncOrArgument(
    879             new ParseNode("textord", nucleus.text, this.mode, nucleus),
    880             false, nucleus);
    881     } else {
    882         return null;
    883     }
    884 };
    885 
    886 Parser.prototype.ParseNode = ParseNode;
    887 
    888 module.exports = Parser;