commit 8f99433c800d76b52deb589491aacaf73100c90b
parent 33625f7b08708dd5d35eceb6664703465ee3ea9d
Author: Emily Eisenberg <xymostech@gmail.com>
Date: Sun, 7 Jul 2013 21:13:43 -0700
Use our own lexer, not jison's
Summary:
Build our own lexer and inject it into jison's parser, because jison's
lexer notation is confusing and annoying, and it doesn't let us do some fun
stuff.
Test Plan: Run stuff, make sure it still works.
Reviewers: spicyj
Reviewed By: spicyj
Differential Revision: http://phabricator.benalpert.com/D40
Diffstat:
| M | MJLite.js | | | 1 | + |
| A | lexer.js | | | 94 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| M | parser.jison | | | 32 | ++++++++------------------------ |
3 files changed, 103 insertions(+), 24 deletions(-)
diff --git a/MJLite.js b/MJLite.js
@@ -1,4 +1,5 @@
var parser = require("./parser.jison");
+parser.lexer = require("./lexer");
var buildExpression = function(expression) {
return _.map(expression, function(ex, i) {
diff --git a/lexer.js b/lexer.js
@@ -0,0 +1,94 @@
+var DEFAULT_STATE = 0,
+ FUNC_STATE = 1;
+
+function Lexer() {
+};
+
+var funcs = [
+ 'cdot', 'frac', 'lvert', 'rvert', 'pm', 'div'
+];
+
+var normals = [
+ [/^[/|a-zA-Z0-9.]/, 'ORD'],
+ [/^[*+-]/, 'BIN'],
+ [/^\^/, '^'],
+ [/^_/, '_'],
+ [/^{/, '{'],
+ [/^}/, '}'],
+ [/^[(\[]/, 'OPEN'],
+ [/^[)\]]/, 'CLOSE']
+];
+
+Lexer.prototype.doMatch = function(match) {
+ this.yytext = match;
+ this.yyleng = match.length;
+
+ this.yylloc.first_column = this._pos;
+ this.yylloc.last_column = this._pos + match.length;
+
+ this._pos += match.length;
+};
+
+Lexer.prototype.lex = function() {
+ // Get rid of whitespace
+ var whitespace = this._input.substr(this._pos).match(/^\s*/)[0];
+ this._pos += whitespace.length;
+
+ if (this._pos >= this._input.length) {
+ return 'EOF';
+ }
+
+ var toMatch = this._input.substr(this._pos);
+
+ if (this.state === DEFAULT_STATE) {
+ if (/^\\/.test(toMatch)) {
+ this.state = FUNC_STATE;
+ this.doMatch('\\');
+ return '\\';
+ } else {
+ for (var i = 0; i < normals.length; i++) {
+ var normal = normals[i];
+
+ var match = toMatch.match(normal[0]);
+ if (match) {
+ this.doMatch(match[0]);
+ return normal[1];
+ }
+ }
+ }
+ } else if (this.state === FUNC_STATE) {
+ for (var i = 0; i < funcs.length; i++) {
+ var func = funcs[i];
+
+ var regex = new RegExp('^' + func + '(?!a-zA-Z)');
+
+ var match = toMatch.match(regex);
+ if (match) {
+ this.doMatch(match[0]);
+ this.state = DEFAULT_STATE;
+ return func;
+ }
+ }
+ }
+
+ throw "Unexpected character: '" + toMatch[0] + "' at position " + this._pos;
+};
+
+Lexer.prototype.setInput = function(input) {
+ this._input = input;
+ this._pos = 0;
+
+ this.yyleng = 0;
+ this.yytext = "";
+ this.yylineno = 0;
+ this.yylloc = {
+ first_line: 1,
+ first_column: 0,
+ last_line: 1,
+ last_column: 0
+ };
+
+ this.state = DEFAULT_STATE;
+};
+
+module.exports = new Lexer();
diff --git a/parser.jison b/parser.jison
@@ -4,22 +4,6 @@
%lex
%%
-\s+ /* skip whitespace */
-cdot return 'CDOT'
-frac return 'FRAC'
-lvert return 'LVERT'
-rvert return 'RVERT'
-pm return 'PM'
-div return 'DIV'
-[/|a-zA-Z0-9] return 'ORD'
-[*+-] return 'BIN'
-\^ return '^'
-[_] return '_'
-[{] return '{'
-[}] return '}'
-[(] return 'OPEN'
-[)] return 'CLOSE'
-[\\] return '\\'
<<EOF>> return 'EOF'
/lex
@@ -37,7 +21,7 @@ div return 'DIV'
%% /* language grammar */
expression
- : ex EOF
+ : ex 'EOF'
{return $1;}
;
@@ -61,22 +45,22 @@ group
{$$ = $1;}
| '{' ex '}'
{$$ = $2;}
- | '\\' func
+ | '\' func
{$$ = $2;}
;
func
- : 'CDOT'
+ : 'cdot'
{$$ = [{type: 'bin', value: yytext}];}
- | 'PM'
+ | 'pm'
{$$ = [{type: 'bin', value: yytext}];}
- | 'DIV'
+ | 'div'
{$$ = [{type: 'bin', value: yytext}];}
- | 'FRAC' group group
+ | 'frac' group group
{$$ = [{type: 'frac', value: {numer: $2, denom: $3}}];}
- | 'LVERT'
+ | 'lvert'
{$$ = [{type: 'open', value: yytext}];}
- | 'RVERT'
+ | 'rvert'
{$$ = [{type: 'close', value: yytext}];}
;