Add some more symbols (#502) - www - Unnamed repository; edit this file 'description' to name the repository.

commit 4a9c2acbf7a50a6205cc3f42f374a4b3047c9bf1
parent befe1c1af7a2ce88a1c7099ee260f05d834d76c6
Author: Martin von Gagern <Martin.vGagern@gmx.net>
Date:   Mon, 25 Jul 2016 04:56:31 +0200

Add some more symbols (#502)

This adds support for the following input sequences:

    -- --- ` ' `` '' \degree \pounds \maltese

resulting in – — ‘ ’ “ ” ° £ ✠ symbols already present in our fonts.

As part of this modification, the recognition of multiple dashes was moved
from the lexer to the parser.
This is neccessary since in math mode a sequence of hyphens is just a
sequence of minus signs.  Just like a pair of apostrophes in math mode is a
double prime not a right double quotation mark.
To make this easier, parseGroup and parseOptionalGroup have been merged.
Diffstat:
M src/Lexer.js  | 6 ++----
M src/Parser.js  | 83 ++++++++++++++++++++++++++++++++++++++++++++++---------------------------------
M src/buildCommon.js  | 8 +++++---
M src/symbols.js  | 14 +++++++++++++-
A test/screenshotter/images/DashesAndQuotes-chrome.png  | 0 
A test/screenshotter/images/DashesAndQuotes-firefox.png  | 0 
A test/screenshotter/images/Symbols1-chrome.png  | 0 
A test/screenshotter/images/Symbols1-firefox.png  | 0 
M test/screenshotter/ss_data.yaml  | 4 ++++
M test/screenshotter/test.tex  | 2 +-

10 files changed, 73 insertions(+), 44 deletions(-)
diff --git a/src/Lexer.js b/src/Lexer.js
@@ -63,7 +63,6 @@ Token.prototype.range = function(endToken, text) {
 
 /* The following tokenRegex
  * - matches typical whitespace (but not NBSP etc.) using its first group
- * - matches symbol combinations which result in a single output character
  * - does not match any control character \x00-\x1f except whitespace
  * - does not match a bare backslash
  * - matches any ASCII character except those just mentioned
@@ -78,9 +77,8 @@ Token.prototype.range = function(endToken, text) {
  * still reject the input.
  */
 var tokenRegex = new RegExp(
-    "([ \r\n\t]+)|(" +                                // whitespace
-    "---?" +                                          // special combinations
-    "|[!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" +  // single codepoint
+    "([ \r\n\t]+)|" +                                 // whitespace
+    "([!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" +  // single codepoint
     "|[\uD800-\uDBFF][\uDC00-\uDFFF]" +               // surrogate pair
     "|\\\\(?:[a-zA-Z]+|[^\uD800-\uDFFF])" +           // function name
     ")"
diff --git a/src/Parser.js b/src/Parser.js
@@ -545,7 +545,7 @@ Parser.prototype.parseArguments = function(func, funcData) {
             if (argType) {
                 arg = this.parseGroupOfType(argType, true);
             } else {
-                arg = this.parseOptionalGroup();
+                arg = this.parseGroup(true);
             }
             if (!arg) {
                 args.push(null);
@@ -623,12 +623,7 @@ Parser.prototype.parseGroupOfType = function(innerMode, optional) {
     }
     // By the time we get here, innerMode is one of "text" or "math".
     // We switch the mode of the parser, recurse, then restore the old mode.
-    var res;
-    if (optional) {
-        res = this.parseOptionalGroup();
-    } else {
-        res = this.parseGroup();
-    }
+    var res = this.parseGroup(optional);
     this.switchMode(outerMode);
     return res;
 };
@@ -638,7 +633,7 @@ Parser.prototype.parseGroupOfType = function(innerMode, optional) {
  * brace-enclosed tokens plus some position information.
  *
  * @param {string} modeName  Used to describe the mode in error messages
- * @param {boolean} optional  Whether the group is optional or required
+ * @param {boolean=} optional  Whether the group is optional or required
  */
 Parser.prototype.parseStringGroup = function(modeName, optional) {
     if (optional && this.nextToken.text !== "[") {
@@ -707,53 +702,71 @@ Parser.prototype.parseSizeGroup = function(optional) {
 };
 
 /**
- * Parses a group, which is either a single nucleus (like "x") or an expression
- * in braces (like "{x+y}")
+ * If the argument is false or absent, this parses an ordinary group,
+ * which is either a single nucleus (like "x") or an expression
+ * in braces (like "{x+y}").
+ * If the argument is true, it parses either a bracket-delimited expression
+ * (like "[x+y]") or returns null to indicate the absence of a
+ * bracket-enclosed group.
  *
+ * @param {boolean=} optional  Whether the group is optional or required
  * @return {?ParseFuncOrArgument}
  */
-Parser.prototype.parseGroup = function() {
+Parser.prototype.parseGroup = function(optional) {
     var firstToken = this.nextToken;
     // Try to parse an open brace
-    if (this.nextToken.text === "{") {
+    if (this.nextToken.text === (optional ? "[" : "{")) {
         // If we get a brace, parse an expression
         this.consume();
-        var expression = this.parseExpression(false);
+        var expression = this.parseExpression(false, optional ? "]" : null);
         var lastToken = this.nextToken;
         // Make sure we get a close brace
-        this.expect("}");
+        this.expect(optional ? "]" : "}");
+        if (this.mode === "text") {
+            this.formLigatures(expression);
+        }
         return new ParseFuncOrArgument(
             new ParseNode("ordgroup", expression, this.mode,
                           firstToken, lastToken),
             false);
     } else {
-        // Otherwise, just return a nucleus
-        return this.parseSymbol();
+        // Otherwise, just return a nucleus, or nothing for an optional group
+        return optional ? null : this.parseSymbol();
     }
 };
 
 /**
- * Parses a group, which is an expression in brackets (like "[x+y]")
+ * Form ligature-like combinations of characters for text mode.
+ * This includes inputs like "--", "---", "``" and "''".
+ * The result will simply replace multiple textord nodes with a single
+ * character in each value by a single textord node having multiple
+ * characters in its value.  The representation is still ASCII source.
  *
- * @return {?ParseFuncOrArgument}
+ * @param {Array.<ParseNode>} group  the nodes of this group,
+ *                                   list will be moified in place
  */
-Parser.prototype.parseOptionalGroup = function() {
-    var firstToken = this.nextToken;
-    // Try to parse an open bracket
-    if (this.nextToken.text === "[") {
-        // If we get a brace, parse an expression
-        this.consume();
-        var expression = this.parseExpression(false, "]");
-        var lastToken = this.nextToken;
-        // Make sure we get a close bracket
-        this.expect("]");
-        return new ParseFuncOrArgument(
-            new ParseNode("ordgroup", expression, this.mode,
-                          firstToken, lastToken),
-            false);
-    } else {
-        // Otherwise, return null,
-        return null;
+Parser.prototype.formLigatures = function(group) {
+    var i;
+    var n = group.length - 1;
+    for (i = 0; i < n; ++i) {
+        var a = group[i];
+        var v = a.value;
+        if (v === "-" && group[i + 1].value === "-") {
+            if (i + 1 < n && group[i + 2].value === "-") {
+                group.splice(i, 3, new ParseNode(
+                    "textord", "---", "text", a, group[i + 2]));
+                n -= 2;
+            } else {
+                group.splice(i, 2, new ParseNode(
+                    "textord", "--", "text", a, group[i + 1]));
+                n -= 1;
+            }
+        }
+        if ((v === "'" || v === "`") && group[i + 1].value === v) {
+            group.splice(i, 2, new ParseNode(
+                "textord", v + v, "text", a, group[i + 1]));
+            n -= 1;
+        }
     }
 };
 
diff --git a/src/buildCommon.js b/src/buildCommon.js
@@ -23,9 +23,11 @@ var greekCapitals = [
     "\\Omega",
 ];
 
-var dotlessLetters = [
+// The following have to be loaded from Main-Italic font, using class mainit
+var mainitLetters = [
     "\u0131",   // dotless i, \imath
     "\u0237",   // dotless j, \jmath
+    "\u00a3",   // \pounds
 ];
 
 /**
@@ -101,7 +103,7 @@ var mathit = function(value, mode, color, classes) {
     if (/[0-9]/.test(value.charAt(0)) ||
             // glyphs for \imath and \jmath do not exist in Math-Italic so we
             // need to use Main-Italic instead
-            utils.contains(dotlessLetters, value) ||
+            utils.contains(mainitLetters, value) ||
             utils.contains(greekCapitals, value)) {
         return makeSymbol(
             value, "Main-Italic", mode, color, classes.concat(["mainit"]));
@@ -126,7 +128,7 @@ var makeOrd = function(group, options, type) {
 
     var font = options.font;
     if (font) {
-        if (font === "mathit" || utils.contains(dotlessLetters, value)) {
+        if (font === "mathit" || utils.contains(mainitLetters, value)) {
             return mathit(value, mode, color, classes);
         } else {
             var fontName = fontMap[font].fontName;
diff --git a/src/symbols.js b/src/symbols.js
@@ -589,6 +589,18 @@ defineSymbol(math, main, accent, "\u02d9", "\\dot");
 defineSymbol(math, main, mathord, "\u0131", "\\imath");
 defineSymbol(math, main, mathord, "\u0237", "\\jmath");
 
+defineSymbol(text, main, textord, "\u2013", "--");
+defineSymbol(text, main, textord, "\u2014", "---");
+defineSymbol(text, main, textord, "\u2018", "`");
+defineSymbol(text, main, textord, "\u2019", "'");
+defineSymbol(text, main, textord, "\u201c", "``");
+defineSymbol(text, main, textord, "\u201d", "''");
+defineSymbol(math, main, textord, "\u00b0", "\\degree");
+defineSymbol(text, main, textord, "\u00b0", "\\degree");
+defineSymbol(math, main, mathord, "\u00a3", "\\pounds");
+defineSymbol(math, ams, textord, "\u2720", "\\maltese");
+defineSymbol(text, ams, textord, "\u2720", "\\maltese");
+
 defineSymbol(text, main, spacing, "\u00a0", "\\ ");
 defineSymbol(text, main, spacing, "\u00a0", " ");
 defineSymbol(text, main, spacing, "\u00a0", "~");
@@ -605,7 +617,7 @@ for (i = 0; i < mathTextSymbols.length; i++) {
 }
 
 // All of these are textords in text mode
-var textSymbols = "0123456789`!@*()-=+[]'\";:?/.,";
+var textSymbols = "0123456789!@*()-=+[]\";:?/.,";
 for (i = 0; i < textSymbols.length; i++) {
     ch = textSymbols.charAt(i);
     defineSymbol(text, main, textord, ch, ch);
diff --git a/test/screenshotter/images/DashesAndQuotes-chrome.png b/test/screenshotter/images/DashesAndQuotes-chrome.png
Binary files differ.
diff --git a/test/screenshotter/images/DashesAndQuotes-firefox.png b/test/screenshotter/images/DashesAndQuotes-firefox.png
Binary files differ.
diff --git a/test/screenshotter/images/Symbols1-chrome.png b/test/screenshotter/images/Symbols1-chrome.png
Binary files differ.
diff --git a/test/screenshotter/images/Symbols1-firefox.png b/test/screenshotter/images/Symbols1-firefox.png
Binary files differ.
diff --git a/test/screenshotter/ss_data.yaml b/test/screenshotter/ss_data.yaml
@@ -36,6 +36,7 @@ Cases: |
 Colors:
     tex: \blue{a}\color{#0f0}{b}\color{red}{c}
     nolatex: different syntax and different scope
+DashesAndQuotes: \text{``a'' b---c -- d----`e'-{-}-f}--``x''
 DeepFontSizing:
     tex: |
         a^{\big| x^{\big(}}_{\Big\uparrow} +
@@ -109,6 +110,9 @@ SupSubHorizSpacing: |
 SupSubLeftAlignReset: |
     \omega^8_{888} \quad \frac{1}{\hat{\omega}^{8}_{888}} \quad \displaystyle\sum_{\omega^{8}_{888}}
 SupSubOffsets: \displaystyle \int_{2+3}x f^{2+3}+3\lim_{2+3+4+5}f
+Symbols1: |
+    \maltese\degree\pounds\$
+    \text{\maltese\degree}
 Text: \frac{a}{b}\text{c~ {ab} \ e}+fg
 UnsupportedCmds:
     tex: \err\,\frac\fracerr3\,2^\superr_\suberr\,\sqrt\sqrterr
diff --git a/test/screenshotter/test.tex b/test/screenshotter/test.tex
@@ -1,6 +1,6 @@
 \documentclass[10pt]{article}
 
-\usepackage{amsmath,amssymb}
+\usepackage{amsmath,amssymb,textcomp,gensymb}
 \usepackage[mathscr]{eucal}
 \usepackage{eufrak}
 \usepackage[papersize={133pt,100pt},margin=0.5pt]{geometry}

	www Unnamed repository; edit this file 'description' to name the repository.
	Log \| Files \| Refs \| README

M	src/Lexer.js	\|	6	++----
M	src/Parser.js	\|	83	++++++++++++++++++++++++++++++++++++++++++++++---------------------------------
M	src/buildCommon.js	\|	8	+++++---
M	src/symbols.js	\|	14	+++++++++++++-
A	test/screenshotter/images/DashesAndQuotes-chrome.png	\|	0
A	test/screenshotter/images/DashesAndQuotes-firefox.png	\|	0
A	test/screenshotter/images/Symbols1-chrome.png	\|	0
A	test/screenshotter/images/Symbols1-firefox.png	\|	0
M	test/screenshotter/ss_data.yaml	\|	4	++++
M	test/screenshotter/test.tex	\|	2	+-