Don't slice in lexer - www - Unnamed repository; edit this file 'description' to name the repository.

commit 0f6530096b31483d7b1e20133c27b417e768dd6c
parent b2fbd08871512eb11a268ebc20fa7604aaf616ab
Author: Ben Alpert <ben@benalpert.com>
Date:   Mon,  6 Apr 2015 10:39:39 -0700

Don't slice in lexer

Summary: Theoretically this allocates way less. In practice it seems to be exactly the same speed.

Test Plan: make test

Reviewers: emily

Reviewed By: emily

Differential Revision: https://phabricator.khanacademy.org/D16621

Diffstat:
M package.json  | 3 +++
M src/Lexer.js  | 72 ++++++++++++++++++++++++++++++++++++------------------------------------

2 files changed, 39 insertions(+), 36 deletions(-)
diff --git a/package.json b/package.json
@@ -25,5 +25,8 @@
   "bin": "cli.js",
   "scripts": {
     "test": "make lint test"
+  },
+  "dependencies": {
+    "match-at": "^0.1.0"
   }
 }
diff --git a/src/Lexer.js b/src/Lexer.js
@@ -11,6 +11,8 @@
  * kinds.
  */
 
+var matchAt = require("match-at");
+
 var ParseError = require("./ParseError");
 
 // The main lexer class
@@ -28,31 +30,31 @@ function Token(text, data, position) {
 // "normal" types of tokens. These are tokens which can be matched by a simple
 // regex
 var mathNormals = [
-    /^[/|@.""`0-9a-zA-Z]/, // ords
-    /^[*+-]/, // bins
-    /^[=<>:]/, // rels
-    /^[,;]/, // punctuation
-    /^['\^_{}]/, // misc
-    /^[(\[]/, // opens
-    /^[)\]?!]/, // closes
-    /^~/ // spacing
+    /[/|@.""`0-9a-zA-Z]/, // ords
+    /[*+-]/, // bins
+    /[=<>:]/, // rels
+    /[,;]/, // punctuation
+    /['\^_{}]/, // misc
+    /[(\[]/, // opens
+    /[)\]?!]/, // closes
+    /~/ // spacing
 ];
 
 // These are "normal" tokens like above, but should instead be parsed in text
 // mode.
 var textNormals = [
-    /^[a-zA-Z0-9`!@*()-=+\[\]'";:?\/.,]/, // ords
-    /^[{}]/, // grouping
-    /^~/ // spacing
+    /[a-zA-Z0-9`!@*()-=+\[\]'";:?\/.,]/, // ords
+    /[{}]/, // grouping
+    /~/ // spacing
 ];
 
 // Regexes for matching whitespace
-var whitespaceRegex = /^\s*/;
-var whitespaceConcatRegex = /^( +|\\  +)/;
+var whitespaceRegex = /\s*/;
+var whitespaceConcatRegex = / +|\\  +/;
 
 // This regex matches any other TeX function, which is a backslash followed by a
 // word or a single symbol
-var anyFunc = /^\\(?:[a-zA-Z]+|.)/;
+var anyFunc = /\\(?:[a-zA-Z]+|.)/;
 
 /**
  * This function lexes a single normal token. It takes a position, a list of
@@ -60,29 +62,28 @@ var anyFunc = /^\\(?:[a-zA-Z]+|.)/;
  * not.
  */
 Lexer.prototype._innerLex = function(pos, normals, ignoreWhitespace) {
-    var input = this._input.slice(pos);
+    var input = this._input;
     var whitespace;
 
     if (ignoreWhitespace) {
         // Get rid of whitespace.
-        whitespace = input.match(whitespaceRegex)[0];
+        whitespace = matchAt(whitespaceRegex, input, pos)[0];
         pos += whitespace.length;
-        input = input.slice(whitespace.length);
     } else {
         // Do the funky concatenation of whitespace that happens in text mode.
-        whitespace = input.match(whitespaceConcatRegex);
+        whitespace = matchAt(whitespaceConcatRegex, input, pos);
         if (whitespace !== null) {
             return new Token(" ", null, pos + whitespace[0].length);
         }
     }
 
     // If there's no more input to parse, return an EOF token
-    if (input.length === 0) {
+    if (pos === input.length) {
         return new Token("EOF", null, pos);
     }
 
     var match;
-    if ((match = input.match(anyFunc))) {
+    if ((match = matchAt(anyFunc, input, pos))) {
         // If we match a function token, return it
         return new Token(match[0], null, pos + match[0].length);
     } else {
@@ -91,7 +92,7 @@ Lexer.prototype._innerLex = function(pos, normals, ignoreWhitespace) {
         for (var i = 0; i < normals.length; i++) {
             var normal = normals[i];
 
-            if ((match = input.match(normal))) {
+            if ((match = matchAt(normal, input, pos))) {
                 // If it is, return it
                 return new Token(
                     match[0], null, pos + match[0].length);
@@ -99,26 +100,26 @@ Lexer.prototype._innerLex = function(pos, normals, ignoreWhitespace) {
         }
     }
 
-    throw new ParseError("Unexpected character: '" + input[0] +
-        "'", this, pos);
+    throw new ParseError(
+            "Unexpected character: '" + input[pos] + "'",
+            this, pos);
 };
 
 // A regex to match a CSS color (like #ffffff or BlueViolet)
-var cssColor = /^(#[a-z0-9]+|[a-z]+)/i;
+var cssColor = /#[a-z0-9]+|[a-z]+/i;
 
 /**
  * This function lexes a CSS color.
  */
 Lexer.prototype._innerLexColor = function(pos) {
-    var input = this._input.slice(pos);
+    var input = this._input;
 
     // Ignore whitespace
-    var whitespace = input.match(whitespaceRegex)[0];
+    var whitespace = matchAt(whitespaceRegex, input, pos)[0];
     pos += whitespace.length;
-    input = input.slice(whitespace.length);
 
     var match;
-    if ((match = input.match(cssColor))) {
+    if ((match = matchAt(cssColor, input, pos))) {
         // If we look like a color, return a color
         return new Token(match[0], null, pos + match[0].length);
     } else {
@@ -128,21 +129,20 @@ Lexer.prototype._innerLexColor = function(pos) {
 
 // A regex to match a dimension. Dimensions look like
 // "1.2em" or ".4pt" or "1 ex"
-var sizeRegex = /^(-?)\s*(\d+(?:\.\d*)?|\.\d+)\s*([a-z]{2})/;
+var sizeRegex = /(-?)\s*(\d+(?:\.\d*)?|\.\d+)\s*([a-z]{2})/;
 
 /**
  * This function lexes a dimension.
  */
 Lexer.prototype._innerLexSize = function(pos) {
-    var input = this._input.slice(pos);
+    var input = this._input;
 
     // Ignore whitespace
-    var whitespace = input.match(whitespaceRegex)[0];
+    var whitespace = matchAt(whitespaceRegex, input, pos)[0];
     pos += whitespace.length;
-    input = input.slice(whitespace.length);
 
     var match;
-    if ((match = input.match(sizeRegex))) {
+    if ((match = matchAt(sizeRegex, input, pos))) {
         var unit = match[3];
         // We only currently handle "em" and "ex" units
         if (unit !== "em" && unit !== "ex") {
@@ -161,12 +161,12 @@ Lexer.prototype._innerLexSize = function(pos) {
  * This function lexes a string of whitespace.
  */
 Lexer.prototype._innerLexWhitespace = function(pos) {
-    var input = this._input.slice(pos);
+    var input = this._input;
 
-    var whitespace = input.match(whitespaceRegex)[0];
+    var whitespace = matchAt(whitespaceRegex, input, pos)[0];
     pos += whitespace.length;
 
-    return new Token(whitespace, null, pos);
+    return new Token(whitespace[0], null, pos);
 };
 
 /**

	www Unnamed repository; edit this file 'description' to name the repository.
	Log \| Files \| Refs \| README

M	package.json	\|	3	+++
M	src/Lexer.js	\|	72	++++++++++++++++++++++++++++++++++++------------------------------------