Parser.js (29594B)
1 /* eslint no-constant-condition:0 */ 2 const functions = require("./functions"); 3 const environments = require("./environments"); 4 const MacroExpander = require("./MacroExpander"); 5 const symbols = require("./symbols"); 6 const utils = require("./utils"); 7 const cjkRegex = require("./unicodeRegexes").cjkRegex; 8 9 const parseData = require("./parseData"); 10 const ParseError = require("./ParseError"); 11 12 /** 13 * This file contains the parser used to parse out a TeX expression from the 14 * input. Since TeX isn't context-free, standard parsers don't work particularly 15 * well. 16 * 17 * The strategy of this parser is as such: 18 * 19 * The main functions (the `.parse...` ones) take a position in the current 20 * parse string to parse tokens from. The lexer (found in Lexer.js, stored at 21 * this.lexer) also supports pulling out tokens at arbitrary places. When 22 * individual tokens are needed at a position, the lexer is called to pull out a 23 * token, which is then used. 24 * 25 * The parser has a property called "mode" indicating the mode that 26 * the parser is currently in. Currently it has to be one of "math" or 27 * "text", which denotes whether the current environment is a math-y 28 * one or a text-y one (e.g. inside \text). Currently, this serves to 29 * limit the functions which can be used in text mode. 30 * 31 * The main functions then return an object which contains the useful data that 32 * was parsed at its given point, and a new position at the end of the parsed 33 * data. The main functions can call each other and continue the parsing by 34 * using the returned position as a new starting point. 35 * 36 * There are also extra `.handle...` functions, which pull out some reused 37 * functionality into self-contained functions. 38 * 39 * The earlier functions return ParseNodes. 40 * The later functions (which are called deeper in the parse) sometimes return 41 * ParseFuncOrArgument, which contain a ParseNode as well as some data about 42 * whether the parsed object is a function which is missing some arguments, or a 43 * standalone object which can be used as an argument to another function. 44 */ 45 46 /** 47 * Main Parser class 48 */ 49 function Parser(input, settings) { 50 // Create a new macro expander (gullet) and (indirectly via that) also a 51 // new lexer (mouth) for this parser (stomach, in the language of TeX) 52 this.gullet = new MacroExpander(input, settings.macros); 53 // Store the settings for use in parsing 54 this.settings = settings; 55 // Count leftright depth (for \middle errors) 56 this.leftrightDepth = 0; 57 } 58 59 const ParseNode = parseData.ParseNode; 60 61 /** 62 * An initial function (without its arguments), or an argument to a function. 63 * The `result` argument should be a ParseNode. 64 */ 65 function ParseFuncOrArgument(result, isFunction, token) { 66 this.result = result; 67 // Is this a function (i.e. is it something defined in functions.js)? 68 this.isFunction = isFunction; 69 this.token = token; 70 } 71 72 /** 73 * Checks a result to make sure it has the right type, and throws an 74 * appropriate error otherwise. 75 * 76 * @param {boolean=} consume whether to consume the expected token, 77 * defaults to true 78 */ 79 Parser.prototype.expect = function(text, consume) { 80 if (this.nextToken.text !== text) { 81 throw new ParseError( 82 "Expected '" + text + "', got '" + this.nextToken.text + "'", 83 this.nextToken 84 ); 85 } 86 if (consume !== false) { 87 this.consume(); 88 } 89 }; 90 91 /** 92 * Considers the current look ahead token as consumed, 93 * and fetches the one after that as the new look ahead. 94 */ 95 Parser.prototype.consume = function() { 96 this.nextToken = this.gullet.get(this.mode === "math"); 97 }; 98 99 Parser.prototype.switchMode = function(newMode) { 100 this.gullet.unget(this.nextToken); 101 this.mode = newMode; 102 this.consume(); 103 }; 104 105 /** 106 * Main parsing function, which parses an entire input. 107 * 108 * @return {?Array.<ParseNode>} 109 */ 110 Parser.prototype.parse = function() { 111 // Try to parse the input 112 this.mode = "math"; 113 this.consume(); 114 const parse = this.parseInput(); 115 return parse; 116 }; 117 118 /** 119 * Parses an entire input tree. 120 */ 121 Parser.prototype.parseInput = function() { 122 // Parse an expression 123 const expression = this.parseExpression(false); 124 // If we succeeded, make sure there's an EOF at the end 125 this.expect("EOF", false); 126 return expression; 127 }; 128 129 const endOfExpression = ["}", "\\end", "\\right", "&", "\\\\", "\\cr"]; 130 131 /** 132 * Parses an "expression", which is a list of atoms. 133 * 134 * @param {boolean} breakOnInfix Should the parsing stop when we hit infix 135 * nodes? This happens when functions have higher precendence 136 * than infix nodes in implicit parses. 137 * 138 * @param {?string} breakOnTokenText The text of the token that the expression 139 * should end with, or `null` if something else should end the 140 * expression. 141 * 142 * @return {ParseNode} 143 */ 144 Parser.prototype.parseExpression = function(breakOnInfix, breakOnTokenText) { 145 const body = []; 146 // Keep adding atoms to the body until we can't parse any more atoms (either 147 // we reached the end, a }, or a \right) 148 while (true) { 149 const lex = this.nextToken; 150 if (endOfExpression.indexOf(lex.text) !== -1) { 151 break; 152 } 153 if (breakOnTokenText && lex.text === breakOnTokenText) { 154 break; 155 } 156 if (breakOnInfix && functions[lex.text] && functions[lex.text].infix) { 157 break; 158 } 159 const atom = this.parseAtom(); 160 if (!atom) { 161 if (!this.settings.throwOnError && lex.text[0] === "\\") { 162 const errorNode = this.handleUnsupportedCmd(); 163 body.push(errorNode); 164 continue; 165 } 166 167 break; 168 } 169 body.push(atom); 170 } 171 return this.handleInfixNodes(body); 172 }; 173 174 /** 175 * Rewrites infix operators such as \over with corresponding commands such 176 * as \frac. 177 * 178 * There can only be one infix operator per group. If there's more than one 179 * then the expression is ambiguous. This can be resolved by adding {}. 180 * 181 * @returns {Array} 182 */ 183 Parser.prototype.handleInfixNodes = function(body) { 184 let overIndex = -1; 185 let funcName; 186 187 for (let i = 0; i < body.length; i++) { 188 const node = body[i]; 189 if (node.type === "infix") { 190 if (overIndex !== -1) { 191 throw new ParseError( 192 "only one infix operator per group", 193 node.value.token); 194 } 195 overIndex = i; 196 funcName = node.value.replaceWith; 197 } 198 } 199 200 if (overIndex !== -1) { 201 let numerNode; 202 let denomNode; 203 204 const numerBody = body.slice(0, overIndex); 205 const denomBody = body.slice(overIndex + 1); 206 207 if (numerBody.length === 1 && numerBody[0].type === "ordgroup") { 208 numerNode = numerBody[0]; 209 } else { 210 numerNode = new ParseNode("ordgroup", numerBody, this.mode); 211 } 212 213 if (denomBody.length === 1 && denomBody[0].type === "ordgroup") { 214 denomNode = denomBody[0]; 215 } else { 216 denomNode = new ParseNode("ordgroup", denomBody, this.mode); 217 } 218 219 const value = this.callFunction( 220 funcName, [numerNode, denomNode], null); 221 return [new ParseNode(value.type, value, this.mode)]; 222 } else { 223 return body; 224 } 225 }; 226 227 // The greediness of a superscript or subscript 228 const SUPSUB_GREEDINESS = 1; 229 230 /** 231 * Handle a subscript or superscript with nice errors. 232 */ 233 Parser.prototype.handleSupSubscript = function(name) { 234 const symbolToken = this.nextToken; 235 const symbol = symbolToken.text; 236 this.consume(); 237 const group = this.parseGroup(); 238 239 if (!group) { 240 if (!this.settings.throwOnError && this.nextToken.text[0] === "\\") { 241 return this.handleUnsupportedCmd(); 242 } else { 243 throw new ParseError( 244 "Expected group after '" + symbol + "'", 245 symbolToken 246 ); 247 } 248 } else if (group.isFunction) { 249 // ^ and _ have a greediness, so handle interactions with functions' 250 // greediness 251 const funcGreediness = functions[group.result].greediness; 252 if (funcGreediness > SUPSUB_GREEDINESS) { 253 return this.parseFunction(group); 254 } else { 255 throw new ParseError( 256 "Got function '" + group.result + "' with no arguments " + 257 "as " + name, symbolToken); 258 } 259 } else { 260 return group.result; 261 } 262 }; 263 264 /** 265 * Converts the textual input of an unsupported command into a text node 266 * contained within a color node whose color is determined by errorColor 267 */ 268 Parser.prototype.handleUnsupportedCmd = function() { 269 const text = this.nextToken.text; 270 const textordArray = []; 271 272 for (let i = 0; i < text.length; i++) { 273 textordArray.push(new ParseNode("textord", text[i], "text")); 274 } 275 276 const textNode = new ParseNode( 277 "text", 278 { 279 body: textordArray, 280 type: "text", 281 }, 282 this.mode); 283 284 const colorNode = new ParseNode( 285 "color", 286 { 287 color: this.settings.errorColor, 288 value: [textNode], 289 type: "color", 290 }, 291 this.mode); 292 293 this.consume(); 294 return colorNode; 295 }; 296 297 /** 298 * Parses a group with optional super/subscripts. 299 * 300 * @return {?ParseNode} 301 */ 302 Parser.prototype.parseAtom = function() { 303 // The body of an atom is an implicit group, so that things like 304 // \left(x\right)^2 work correctly. 305 const base = this.parseImplicitGroup(); 306 307 // In text mode, we don't have superscripts or subscripts 308 if (this.mode === "text") { 309 return base; 310 } 311 312 // Note that base may be empty (i.e. null) at this point. 313 314 let superscript; 315 let subscript; 316 while (true) { 317 // Lex the first token 318 const lex = this.nextToken; 319 320 if (lex.text === "\\limits" || lex.text === "\\nolimits") { 321 // We got a limit control 322 if (!base || base.type !== "op") { 323 throw new ParseError( 324 "Limit controls must follow a math operator", 325 lex); 326 } else { 327 const limits = lex.text === "\\limits"; 328 base.value.limits = limits; 329 base.value.alwaysHandleSupSub = true; 330 } 331 this.consume(); 332 } else if (lex.text === "^") { 333 // We got a superscript start 334 if (superscript) { 335 throw new ParseError("Double superscript", lex); 336 } 337 superscript = this.handleSupSubscript("superscript"); 338 } else if (lex.text === "_") { 339 // We got a subscript start 340 if (subscript) { 341 throw new ParseError("Double subscript", lex); 342 } 343 subscript = this.handleSupSubscript("subscript"); 344 } else if (lex.text === "'") { 345 // We got a prime 346 if (superscript) { 347 throw new ParseError("Double superscript", lex); 348 } 349 const prime = new ParseNode("textord", "\\prime", this.mode); 350 351 // Many primes can be grouped together, so we handle this here 352 const primes = [prime]; 353 this.consume(); 354 // Keep lexing tokens until we get something that's not a prime 355 while (this.nextToken.text === "'") { 356 // For each one, add another prime to the list 357 primes.push(prime); 358 this.consume(); 359 } 360 // If there's a superscript following the primes, combine that 361 // superscript in with the primes. 362 if (this.nextToken.text === "^") { 363 primes.push(this.handleSupSubscript("superscript")); 364 } 365 // Put everything into an ordgroup as the superscript 366 superscript = new ParseNode("ordgroup", primes, this.mode); 367 } else { 368 // If it wasn't ^, _, or ', stop parsing super/subscripts 369 break; 370 } 371 } 372 373 if (superscript || subscript) { 374 // If we got either a superscript or subscript, create a supsub 375 return new ParseNode("supsub", { 376 base: base, 377 sup: superscript, 378 sub: subscript, 379 }, this.mode); 380 } else { 381 // Otherwise return the original body 382 return base; 383 } 384 }; 385 386 // A list of the size-changing functions, for use in parseImplicitGroup 387 const sizeFuncs = [ 388 "\\tiny", "\\scriptsize", "\\footnotesize", "\\small", "\\normalsize", 389 "\\large", "\\Large", "\\LARGE", "\\huge", "\\Huge", 390 ]; 391 392 // A list of the style-changing functions, for use in parseImplicitGroup 393 const styleFuncs = [ 394 "\\displaystyle", "\\textstyle", "\\scriptstyle", "\\scriptscriptstyle", 395 ]; 396 397 // Old font functions 398 const oldFontFuncs = { 399 "\\rm": "mathrm", 400 "\\sf": "mathsf", 401 "\\tt": "mathtt", 402 "\\bf": "mathbf", 403 "\\it": "mathit", 404 //"\\sl": "textsl", 405 //"\\sc": "textsc", 406 }; 407 408 /** 409 * Parses an implicit group, which is a group that starts at the end of a 410 * specified, and ends right before a higher explicit group ends, or at EOL. It 411 * is used for functions that appear to affect the current style, like \Large or 412 * \textrm, where instead of keeping a style we just pretend that there is an 413 * implicit grouping after it until the end of the group. E.g. 414 * small text {\Large large text} small text again 415 * It is also used for \left and \right to get the correct grouping. 416 * 417 * @return {?ParseNode} 418 */ 419 Parser.prototype.parseImplicitGroup = function() { 420 const start = this.parseSymbol(); 421 422 if (start == null) { 423 // If we didn't get anything we handle, fall back to parseFunction 424 return this.parseFunction(); 425 } 426 427 const func = start.result; 428 429 if (func === "\\left") { 430 // If we see a left: 431 // Parse the entire left function (including the delimiter) 432 const left = this.parseFunction(start); 433 // Parse out the implicit body 434 ++this.leftrightDepth; 435 const body = this.parseExpression(false); 436 --this.leftrightDepth; 437 // Check the next token 438 this.expect("\\right", false); 439 const right = this.parseFunction(); 440 return new ParseNode("leftright", { 441 body: body, 442 left: left.value.value, 443 right: right.value.value, 444 }, this.mode); 445 } else if (func === "\\begin") { 446 // begin...end is similar to left...right 447 const begin = this.parseFunction(start); 448 const envName = begin.value.name; 449 if (!environments.hasOwnProperty(envName)) { 450 throw new ParseError( 451 "No such environment: " + envName, begin.value.nameGroup); 452 } 453 // Build the environment object. Arguments and other information will 454 // be made available to the begin and end methods using properties. 455 const env = environments[envName]; 456 const args = this.parseArguments("\\begin{" + envName + "}", env); 457 const context = { 458 mode: this.mode, 459 envName: envName, 460 parser: this, 461 positions: args.pop(), 462 }; 463 const result = env.handler(context, args); 464 this.expect("\\end", false); 465 const endNameToken = this.nextToken; 466 const end = this.parseFunction(); 467 if (end.value.name !== envName) { 468 throw new ParseError( 469 "Mismatch: \\begin{" + envName + "} matched " + 470 "by \\end{" + end.value.name + "}", 471 endNameToken); 472 } 473 result.position = end.position; 474 return result; 475 } else if (utils.contains(sizeFuncs, func)) { 476 // If we see a sizing function, parse out the implicit body 477 this.consumeSpaces(); 478 const body = this.parseExpression(false); 479 return new ParseNode("sizing", { 480 // Figure out what size to use based on the list of functions above 481 size: "size" + (utils.indexOf(sizeFuncs, func) + 1), 482 value: body, 483 }, this.mode); 484 } else if (utils.contains(styleFuncs, func)) { 485 // If we see a styling function, parse out the implicit body 486 this.consumeSpaces(); 487 const body = this.parseExpression(true); 488 return new ParseNode("styling", { 489 // Figure out what style to use by pulling out the style from 490 // the function name 491 style: func.slice(1, func.length - 5), 492 value: body, 493 }, this.mode); 494 } else if (func in oldFontFuncs) { 495 const style = oldFontFuncs[func]; 496 // If we see an old font function, parse out the implicit body 497 this.consumeSpaces(); 498 const body = this.parseExpression(true); 499 if (style.slice(0, 4) === 'text') { 500 return new ParseNode("text", { 501 style: style, 502 body: new ParseNode("ordgroup", body, this.mode), 503 }, this.mode); 504 } else { 505 return new ParseNode("font", { 506 font: style, 507 body: new ParseNode("ordgroup", body, this.mode), 508 }, this.mode); 509 } 510 } else { 511 // Defer to parseFunction if it's not a function we handle 512 return this.parseFunction(start); 513 } 514 }; 515 516 /** 517 * Parses an entire function, including its base and all of its arguments. 518 * The base might either have been parsed already, in which case 519 * it is provided as an argument, or it's the next group in the input. 520 * 521 * @param {ParseFuncOrArgument=} baseGroup optional as described above 522 * @return {?ParseNode} 523 */ 524 Parser.prototype.parseFunction = function(baseGroup) { 525 if (!baseGroup) { 526 baseGroup = this.parseGroup(); 527 } 528 529 if (baseGroup) { 530 if (baseGroup.isFunction) { 531 const func = baseGroup.result; 532 const funcData = functions[func]; 533 if (this.mode === "text" && !funcData.allowedInText) { 534 throw new ParseError( 535 "Can't use function '" + func + "' in text mode", 536 baseGroup.token); 537 } 538 539 const args = this.parseArguments(func, funcData); 540 const token = baseGroup.token; 541 const result = this.callFunction(func, args, args.pop(), token); 542 return new ParseNode(result.type, result, this.mode); 543 } else { 544 return baseGroup.result; 545 } 546 } else { 547 return null; 548 } 549 }; 550 551 /** 552 * Call a function handler with a suitable context and arguments. 553 */ 554 Parser.prototype.callFunction = function(name, args, positions, token) { 555 const context = { 556 funcName: name, 557 parser: this, 558 positions: positions, 559 token: token, 560 }; 561 return functions[name].handler(context, args); 562 }; 563 564 /** 565 * Parses the arguments of a function or environment 566 * 567 * @param {string} func "\name" or "\begin{name}" 568 * @param {{numArgs:number,numOptionalArgs:number|undefined}} funcData 569 * @return the array of arguments, with the list of positions as last element 570 */ 571 Parser.prototype.parseArguments = function(func, funcData) { 572 const totalArgs = funcData.numArgs + funcData.numOptionalArgs; 573 if (totalArgs === 0) { 574 return [[this.pos]]; 575 } 576 577 const baseGreediness = funcData.greediness; 578 const positions = [this.pos]; 579 const args = []; 580 581 for (let i = 0; i < totalArgs; i++) { 582 const nextToken = this.nextToken; 583 const argType = funcData.argTypes && funcData.argTypes[i]; 584 let arg; 585 if (i < funcData.numOptionalArgs) { 586 if (argType) { 587 arg = this.parseGroupOfType(argType, true); 588 } else { 589 arg = this.parseGroup(true); 590 } 591 if (!arg) { 592 args.push(null); 593 positions.push(this.pos); 594 continue; 595 } 596 } else { 597 if (argType) { 598 arg = this.parseGroupOfType(argType); 599 } else { 600 arg = this.parseGroup(); 601 } 602 if (!arg) { 603 if (!this.settings.throwOnError && 604 this.nextToken.text[0] === "\\") { 605 arg = new ParseFuncOrArgument( 606 this.handleUnsupportedCmd(this.nextToken.text), 607 false); 608 } else { 609 throw new ParseError( 610 "Expected group after '" + func + "'", nextToken); 611 } 612 } 613 } 614 let argNode; 615 if (arg.isFunction) { 616 const argGreediness = 617 functions[arg.result].greediness; 618 if (argGreediness > baseGreediness) { 619 argNode = this.parseFunction(arg); 620 } else { 621 throw new ParseError( 622 "Got function '" + arg.result + "' as " + 623 "argument to '" + func + "'", nextToken); 624 } 625 } else { 626 argNode = arg.result; 627 } 628 args.push(argNode); 629 positions.push(this.pos); 630 } 631 632 args.push(positions); 633 634 return args; 635 }; 636 637 638 /** 639 * Parses a group when the mode is changing. 640 * 641 * @return {?ParseFuncOrArgument} 642 */ 643 Parser.prototype.parseGroupOfType = function(innerMode, optional) { 644 const outerMode = this.mode; 645 // Handle `original` argTypes 646 if (innerMode === "original") { 647 innerMode = outerMode; 648 } 649 650 if (innerMode === "color") { 651 return this.parseColorGroup(optional); 652 } 653 if (innerMode === "size") { 654 return this.parseSizeGroup(optional); 655 } 656 657 this.switchMode(innerMode); 658 if (innerMode === "text") { 659 // text mode is special because it should ignore the whitespace before 660 // it 661 this.consumeSpaces(); 662 } 663 // By the time we get here, innerMode is one of "text" or "math". 664 // We switch the mode of the parser, recurse, then restore the old mode. 665 const res = this.parseGroup(optional); 666 this.switchMode(outerMode); 667 return res; 668 }; 669 670 Parser.prototype.consumeSpaces = function() { 671 while (this.nextToken.text === " ") { 672 this.consume(); 673 } 674 }; 675 676 /** 677 * Parses a group, essentially returning the string formed by the 678 * brace-enclosed tokens plus some position information. 679 * 680 * @param {string} modeName Used to describe the mode in error messages 681 * @param {boolean=} optional Whether the group is optional or required 682 */ 683 Parser.prototype.parseStringGroup = function(modeName, optional) { 684 if (optional && this.nextToken.text !== "[") { 685 return null; 686 } 687 const outerMode = this.mode; 688 this.mode = "text"; 689 this.expect(optional ? "[" : "{"); 690 let str = ""; 691 const firstToken = this.nextToken; 692 let lastToken = firstToken; 693 while (this.nextToken.text !== (optional ? "]" : "}")) { 694 if (this.nextToken.text === "EOF") { 695 throw new ParseError( 696 "Unexpected end of input in " + modeName, 697 firstToken.range(this.nextToken, str)); 698 } 699 lastToken = this.nextToken; 700 str += lastToken.text; 701 this.consume(); 702 } 703 this.mode = outerMode; 704 this.expect(optional ? "]" : "}"); 705 return firstToken.range(lastToken, str); 706 }; 707 708 /** 709 * Parses a regex-delimited group: the largest sequence of tokens 710 * whose concatenated strings match `regex`. Returns the string 711 * formed by the tokens plus some position information. 712 * 713 * @param {RegExp} regex 714 * @param {string} modeName Used to describe the mode in error messages 715 */ 716 Parser.prototype.parseRegexGroup = function(regex, modeName) { 717 const outerMode = this.mode; 718 this.mode = "text"; 719 const firstToken = this.nextToken; 720 let lastToken = firstToken; 721 let str = ""; 722 while (this.nextToken.text !== "EOF" 723 && regex.test(str + this.nextToken.text)) { 724 lastToken = this.nextToken; 725 str += lastToken.text; 726 this.consume(); 727 } 728 if (str === "") { 729 throw new ParseError( 730 "Invalid " + modeName + ": '" + firstToken.text + "'", 731 firstToken); 732 } 733 this.mode = outerMode; 734 return firstToken.range(lastToken, str); 735 }; 736 737 /** 738 * Parses a color description. 739 */ 740 Parser.prototype.parseColorGroup = function(optional) { 741 const res = this.parseStringGroup("color", optional); 742 if (!res) { 743 return null; 744 } 745 const match = (/^(#[a-z0-9]+|[a-z]+)$/i).exec(res.text); 746 if (!match) { 747 throw new ParseError("Invalid color: '" + res.text + "'", res); 748 } 749 return new ParseFuncOrArgument( 750 new ParseNode("color", match[0], this.mode), 751 false); 752 }; 753 754 /** 755 * Parses a size specification, consisting of magnitude and unit. 756 */ 757 Parser.prototype.parseSizeGroup = function(optional) { 758 let res; 759 if (!optional && this.nextToken.text !== "{") { 760 res = this.parseRegexGroup( 761 /^[-+]? *(?:$|\d+|\d+\.\d*|\.\d*) *[a-z]{0,2}$/, "size"); 762 } else { 763 res = this.parseStringGroup("size", optional); 764 } 765 if (!res) { 766 return null; 767 } 768 const match = (/([-+]?) *(\d+(?:\.\d*)?|\.\d+) *([a-z]{2})/).exec(res.text); 769 if (!match) { 770 throw new ParseError("Invalid size: '" + res.text + "'", res); 771 } 772 const data = { 773 number: +(match[1] + match[2]), // sign + magnitude, cast to number 774 unit: match[3], 775 }; 776 if (data.unit !== "em" && data.unit !== "ex" && data.unit !== "mu") { 777 throw new ParseError("Invalid unit: '" + data.unit + "'", res); 778 } 779 return new ParseFuncOrArgument( 780 new ParseNode("color", data, this.mode), 781 false); 782 }; 783 784 /** 785 * If the argument is false or absent, this parses an ordinary group, 786 * which is either a single nucleus (like "x") or an expression 787 * in braces (like "{x+y}"). 788 * If the argument is true, it parses either a bracket-delimited expression 789 * (like "[x+y]") or returns null to indicate the absence of a 790 * bracket-enclosed group. 791 * 792 * @param {boolean=} optional Whether the group is optional or required 793 * @return {?ParseFuncOrArgument} 794 */ 795 Parser.prototype.parseGroup = function(optional) { 796 const firstToken = this.nextToken; 797 // Try to parse an open brace 798 if (this.nextToken.text === (optional ? "[" : "{")) { 799 // If we get a brace, parse an expression 800 this.consume(); 801 const expression = this.parseExpression(false, optional ? "]" : null); 802 const lastToken = this.nextToken; 803 // Make sure we get a close brace 804 this.expect(optional ? "]" : "}"); 805 if (this.mode === "text") { 806 this.formLigatures(expression); 807 } 808 return new ParseFuncOrArgument( 809 new ParseNode("ordgroup", expression, this.mode, 810 firstToken, lastToken), 811 false); 812 } else { 813 // Otherwise, just return a nucleus, or nothing for an optional group 814 return optional ? null : this.parseSymbol(); 815 } 816 }; 817 818 /** 819 * Form ligature-like combinations of characters for text mode. 820 * This includes inputs like "--", "---", "``" and "''". 821 * The result will simply replace multiple textord nodes with a single 822 * character in each value by a single textord node having multiple 823 * characters in its value. The representation is still ASCII source. 824 * 825 * @param {Array.<ParseNode>} group the nodes of this group, 826 * list will be moified in place 827 */ 828 Parser.prototype.formLigatures = function(group) { 829 let n = group.length - 1; 830 for (let i = 0; i < n; ++i) { 831 const a = group[i]; 832 const v = a.value; 833 if (v === "-" && group[i + 1].value === "-") { 834 if (i + 1 < n && group[i + 2].value === "-") { 835 group.splice(i, 3, new ParseNode( 836 "textord", "---", "text", a, group[i + 2])); 837 n -= 2; 838 } else { 839 group.splice(i, 2, new ParseNode( 840 "textord", "--", "text", a, group[i + 1])); 841 n -= 1; 842 } 843 } 844 if ((v === "'" || v === "`") && group[i + 1].value === v) { 845 group.splice(i, 2, new ParseNode( 846 "textord", v + v, "text", a, group[i + 1])); 847 n -= 1; 848 } 849 } 850 }; 851 852 /** 853 * Parse a single symbol out of the string. Here, we handle both the functions 854 * we have defined, as well as the single character symbols 855 * 856 * @return {?ParseFuncOrArgument} 857 */ 858 Parser.prototype.parseSymbol = function() { 859 const nucleus = this.nextToken; 860 861 if (functions[nucleus.text]) { 862 this.consume(); 863 // If there exists a function with this name, we return the function and 864 // say that it is a function. 865 return new ParseFuncOrArgument( 866 nucleus.text, 867 true, nucleus); 868 } else if (symbols[this.mode][nucleus.text]) { 869 this.consume(); 870 // Otherwise if this is a no-argument function, find the type it 871 // corresponds to in the symbols map 872 return new ParseFuncOrArgument( 873 new ParseNode(symbols[this.mode][nucleus.text].group, 874 nucleus.text, this.mode, nucleus), 875 false, nucleus); 876 } else if (this.mode === "text" && cjkRegex.test(nucleus.text)) { 877 this.consume(); 878 return new ParseFuncOrArgument( 879 new ParseNode("textord", nucleus.text, this.mode, nucleus), 880 false, nucleus); 881 } else { 882 return null; 883 } 884 }; 885 886 Parser.prototype.ParseNode = ParseNode; 887 888 module.exports = Parser;