commit a06744e9411a4ae7527bce5f523fb592a5fe8fa6
parent 7be056c1f4a5cfe00120f5b20662a876573b348c
Author: Kevin Barabash <kevinb7@gmail.com>
Date: Mon, 6 Jul 2015 21:04:39 -0600
Merge pull request #268 from gagern/texcmp
Generate reference images from LaTeX
Diffstat:
8 files changed, 420 insertions(+), 1 deletion(-)
diff --git a/.gitignore b/.gitignore
@@ -3,3 +3,5 @@ node_modules
npm-debug.log
last.png
diff.png
+/test/screenshotter/tex/
+/test/screenshotter/diff/
diff --git a/Makefile b/Makefile
@@ -16,7 +16,7 @@ export UGLIFYJS = $(realpath ./node_modules/.bin/uglifyjs) \
setup:
npm install
-lint: katex.js server.js cli.js $(wildcard src/*.js) $(wildcard test/*.js) $(wildcard contrib/*/*.js)
+lint: katex.js server.js cli.js $(wildcard src/*.js) $(wildcard test/*.js) $(wildcard contrib/*/*.js) $(wildcard dockers/*/*.js)
./node_modules/.bin/jshint $^
build/katex.js: katex.js $(wildcard src/*.js)
diff --git a/dockers/texcmp/Dockerfile b/dockers/texcmp/Dockerfile
@@ -0,0 +1,11 @@
+# convert from PDF to PNG with -flatten looks really bad on 14.04 LTS
+FROM ubuntu:15.04
+
+MAINTAINER Martin von Gagern <gagern@ma.tum.de>
+
+# Disable regular updates, but keep security updates
+RUN sed -i 's/^\(deb.*updates\)/#\1/' /etc/apt/sources.list && apt-get update
+
+# Install all required packages, but try not to pull in TOO much
+RUN apt-get -qy --no-install-recommends install \
+ texlive-latex-base etoolbox imagemagick ghostscript nodejs
diff --git a/dockers/texcmp/README.md b/dockers/texcmp/README.md
@@ -0,0 +1,82 @@
+# How to compare against LaTeX
+
+The tools in this directory can be used to create reference images
+using LaTeX, and to compare them against the screenshots taken from a
+browser.
+
+## Execution environment
+
+### Docker environment
+
+If you don't want to ensure the presence of all required tools, or
+want to make sure that you create reproducible results, simply run
+
+ dockers/texcmp/texcmp.sh
+
+from the root of your KaTeX directory tree.
+This will build a suitable docker image unless such an image already
+exists. It will then use a container based on that image to generate
+all the images described below.
+
+Note that the files and directories created in the source tree from
+within the docker will be owned by root, so you might have trouble
+deleting them later on. Be sure you can obtain superuser permissions
+on your computer or know someone who can, just to be safe.
+
+### Native environment
+
+If you want to avoid the overhead of creating a docker container, or
+the even larger overhead of setting up docker and creating the initial
+image, then you may instead execute the commands
+
+ cd dockers/texcmp
+ npm install
+ node texcmp.js
+
+from the root of your KaTeX directory tree. Required tools include the
+`pdflatex` tool of a standard TeX distribution as well as the
+`convert` tool from ImageMagick.
+
+Note that this approach will use `/tmp/texcmp` as a temporary directory.
+The use of a single directory name here can lead to conflicts if
+multiple developers on the same machine try to use that directory.
+
+Also note that different software configurations can lead to different results,
+so if reproducibility is desired, the Docker approach should be chosen.
+
+## Generated files
+
+After running either of the above commands, you will find two
+(possibly new) subdirectories inside `test/screenshotter`,
+called `tex` and `diff`.
+
+### Rasterized documents
+
+`test/screenshotter/tex` will contain images created by `pdflatex` by
+plugging the test case formula in question into the template
+`test/screenshotter/test.tex`. This is essentially our reference of
+how LaTeX renders a given input.
+
+### Difference images
+
+`test/screenshotter/diff` will contain images depicting the difference
+between the LaTeX rendering and the Firefox screenshot. Black areas
+indicate overlapping print. Green areas are black in LaTeX but white
+in Firefox, while it's the other way round for red areas. Colored
+input is first converted to grayscale, before being subject to the
+coloring just described. The pictures will be aligned in such a way
+as to maximize the overlap between the two versions (i.e. the amount
+of black output). The result will then be trimmed so it can easily be
+pasted into bug reports.
+
+## Command line arguments
+
+Both `texcmp.sh` and `texcmp.js` will accept the names of test cases
+on the command line. This can be useful if one particular test case
+is affected by current development, so that the effects on it can be
+seen more quickly.
+
+Examples:
+
+ dockers/texcmp/texcmp.sh Sqrt SqrtRoot
+ node dockers/texcmp/texcmp.js Baseline
diff --git a/dockers/texcmp/package.json b/dockers/texcmp/package.json
@@ -0,0 +1,10 @@
+{
+ "name": "texcmp",
+ "description": "KaTeX helper to compare LaTeX output against screenshots",
+ "license": "MIT",
+ "dependencies": {
+ "ndarray-fft": "1.0.0",
+ "pngparse": "2.0.1",
+ "q": "1.4.1"
+ }
+}
diff --git a/dockers/texcmp/texcmp.js b/dockers/texcmp/texcmp.js
@@ -0,0 +1,265 @@
+"use strict";
+
+var querystring = require("querystring");
+var childProcess = require("child_process");
+var fs = require("fs");
+var path = require("path");
+var Q = require("q"); // To debug, pass Q_DEBUG=1 in the environment
+var pngparse = require("pngparse");
+var fft = require("ndarray-fft");
+var ndarray = require("ndarray-fft/node_modules/ndarray");
+
+var data = require("../../test/screenshotter/ss_data");
+
+// Adapt node functions to Q promises
+var readFile = Q.denodeify(fs.readFile);
+var writeFile = Q.denodeify(fs.writeFile);
+var mkdir = Q.denodeify(fs.mkdir);
+
+// ignore some tests, since they contain commands not supported by LaTeX
+var blacklist = {
+ Colors: "Color handling differs",
+ DeepFontSizing: "\\Huge inside \\dfrac doesn't work for some reason",
+ KaTeX: "Custom command, doesn't exist in LaTeX"
+};
+var todo;
+if (process.argv.length > 2) {
+ todo = process.argv.slice(2);
+} else {
+ todo = Object.keys(data).filter(function(key) {
+ return !blacklist[key];
+ });
+}
+
+// Dimensions used when we do the FFT-based alignment computation
+var alignWidth = 2048; // should be at least twice the width resp. height
+var alignHeight = 2048; // of the screenshots, and a power of two.
+
+// Compute required resolution to match test.html. 16px default font,
+// scaled to 4em in test.html, and to 1.21em in katex.css. Corresponding
+// LaTeX font size is 10pt. There are 72.27pt per inch.
+var pxPerEm = 16 * 4 * 1.21;
+var pxPerPt = pxPerEm / 10;
+var dpi = pxPerPt * 72.27;
+
+var tmpDir = "/tmp/texcmp";
+var ssDir = path.normalize(
+ path.join(__dirname, "..", "..", "test", "screenshotter"));
+var imagesDir = path.join(ssDir, "images");
+var teximgDir = path.join(ssDir, "tex");
+var diffDir = path.join(ssDir, "diff");
+var template;
+
+Q.all([
+ readFile(path.join(ssDir, "test.tex"), "utf-8"),
+ ensureDir(tmpDir),
+ ensureDir(teximgDir),
+ ensureDir(diffDir)
+]).spread(function(data) {
+ template = data;
+ // dirs have been created, template has been read, now rasterize.
+ return Q.all(todo.map(processTestCase));
+}).done();
+
+// Process a single test case: rasterize, then create diff
+function processTestCase(key) {
+ if (blacklist[key]) {
+ return;
+ }
+ var url = data[key];
+ var query = url.replace(/^.*?\?/, ""); // extract query string
+ query = query.replace(/\+/g, "%2B"); // plus doesn't mean space here
+ query = querystring.parse(query);
+ var tex = "$" + query.m + "$";
+ if (query.display) {
+ tex = "$$" + query.m + "$$";
+ }
+ if (query.pre) {
+ tex = query.pre.replace("<br>", "\\\\") + tex;
+ }
+ if (query.post) {
+ tex = tex + query.post.replace("<br>", "\\\\");
+ }
+ tex = template.replace(/\$.*\$/, tex.replace(/\$/g, "$$$$"));
+ var texFile = path.join(tmpDir, key + ".tex");
+ var pdfFile = path.join(tmpDir, key + ".pdf");
+ var pngFile = path.join(teximgDir, key + "-pdflatex.png");
+ var browserFile = path.join(imagesDir, key + "-firefox.png");
+ var diffFile = path.join(diffDir, key + ".png");
+
+ // Step 1: write key.tex file
+ var fftLatex = writeFile(texFile, tex).then(function() {
+ // Step 2: call "pdflatex key" to create key.pdf
+ return execFile("pdflatex", [
+ "-interaction", "nonstopmode", key
+ ], {cwd: tmpDir});
+ }).then(function() {
+ console.log("Typeset " + key);
+ // Step 3: call "convert ... key.pdf key.png" to create key.png
+ return execFile("convert", [
+ "-density", dpi, "-units", "PixelsPerInch", "-flatten",
+ pdfFile, pngFile
+ ]);
+ }).then(function() {
+ console.log("Rasterized " + key);
+ // Step 4: apply FFT to that
+ return readPNG(pngFile).then(fftImage);
+ });
+ // Step 5: apply FFT to reference image as well
+ var fftBrowser = readPNG(browserFile).then(fftImage);
+
+ return Q.all([fftBrowser, fftLatex]).spread(function(browser, latex) {
+ // Now we have the FFT result from both
+ // Step 6: find alignment which maximizes overlap.
+ // This uses a FFT-based correlation computation.
+ var x, y;
+ var real = createMatrix();
+ var imag = createMatrix();
+
+ // Step 6a: (real + i*imag) = latex * conjugate(browser)
+ for (y = 0; y < alignHeight; ++y) {
+ for (x = 0; x < alignWidth; ++x) {
+ var br = browser.real.get(y, x);
+ var bi = browser.imag.get(y, x);
+ var lr = latex.real.get(y, x);
+ var li = latex.imag.get(y, x);
+ real.set(y, x, br * lr + bi * li);
+ imag.set(y, x, br * li - bi * lr);
+ }
+ }
+
+ // Step 6b: (real + i*imag) = inverseFFT(real + i*imag)
+ fft(-1, real, imag);
+
+ // Step 6c: find position where the (squared) absolute value is maximal
+ var offsetX = 0;
+ var offsetY = 0;
+ var maxSquaredNorm = -1; // any result is greater than initial value
+ for (y = 0; y < alignHeight; ++y) {
+ for (x = 0; x < alignWidth; ++x) {
+ var or = real.get(y, x);
+ var oi = imag.get(y, x);
+ var squaredNorm = or * or + oi * oi;
+ if (maxSquaredNorm < squaredNorm) {
+ maxSquaredNorm = squaredNorm;
+ offsetX = x;
+ offsetY = y;
+ }
+ }
+ }
+
+ // Step 6d: Treat negative offsets in a non-cyclic way
+ if (offsetY > (alignHeight / 2)) {
+ offsetY -= alignHeight;
+ }
+ if (offsetX > (alignWidth / 2)) {
+ offsetX -= alignWidth;
+ }
+ console.log("Positioned " + key + ": " + offsetX + ", " + offsetY);
+
+ // Step 7: use these offsets to compute difference illustration
+ var bx = Math.max(offsetX, 0); // browser left padding
+ var by = Math.max(offsetY, 0); // browser top padding
+ var lx = Math.max(-offsetX, 0); // latex left padding
+ var ly = Math.max(-offsetY, 0); // latex top padding
+ var uw = Math.max(browser.width + bx, latex.width + lx); // union width
+ var uh = Math.max(browser.height + by, latex.height + ly); // u. height
+ return execFile("convert", [
+ // First image: latex rendering, converted to grayscale and padded
+ "(", pngFile, "-grayscale", "Rec709Luminance",
+ "-extent", uw + "x" + uh + "-" + lx + "-" + ly,
+ ")",
+ // Second image: browser screenshot, to grayscale and padded
+ "(", browserFile, "-grayscale", "Rec709Luminance",
+ "-extent", uw + "x" + uh + "-" + bx + "-" + by,
+ ")",
+ // Third image: the per-pixel minimum of the first two images
+ "(", "-clone", "0-1", "-compose", "darken", "-composite", ")",
+ // First image is red, second green, third blue channel of result
+ "-channel", "RGB", "-combine",
+ "-trim", // remove everything that has the same color as the corners
+ diffFile // output file name
+ ]);
+ }).then(function() {
+ console.log("Compared " + key);
+ });
+}
+
+// Create a directory, but ignore error if the directory already exists.
+function ensureDir(dir) {
+ return mkdir(dir).fail(function(err) {
+ if (err.code !== "EEXIST") {
+ throw err;
+ }
+ });
+}
+
+// Execute a given command, and return a promise to its output.
+// Don't denodeify here, since fail branch needs access to stderr.
+function execFile(cmd, args, opts) {
+ var deferred = Q.defer();
+ childProcess.execFile(cmd, args, opts, function(err, stdout, stderr) {
+ if (err) {
+ console.error("Error executing " + cmd + " " + args.join(" "));
+ console.error(stdout + stderr);
+ err.stdout = stdout;
+ err.stderr = stderr;
+ deferred.reject(err);
+ } else {
+ deferred.resolve(stdout);
+ }
+ });
+ return deferred.promise;
+}
+
+// Read given file and parse it as a PNG file.
+function readPNG(file) {
+ var deferred = Q.defer();
+ var onerror = deferred.reject.bind(deferred);
+ var stream = fs.createReadStream(file);
+ stream.on("error", onerror);
+ pngparse.parseStream(stream, function(err, image) {
+ if (err) {
+ onerror(err);
+ return;
+ }
+ deferred.resolve(image);
+ });
+ return deferred.promise;
+}
+
+// Take a parsed image data structure and apply FFT transformation to it
+function fftImage(image) {
+ var real = createMatrix();
+ var imag = createMatrix();
+ var idx = 0;
+ var nchan = image.channels;
+ var alphachan = 1 - (nchan % 2);
+ var colorchan = nchan - alphachan;
+ for (var y = 0; y < image.height; ++y) {
+ for (var x = 0; x < image.width; ++x) {
+ var c;
+ var v = 0;
+ for (c = 0; c < colorchan; ++c) {
+ v += 255 - image.data[idx++];
+ }
+ for (c = 0; c < alphachan; ++c) {
+ v += image.data[idx++];
+ }
+ real.set(y, x, v);
+ }
+ }
+ fft(1, real, imag);
+ return {
+ real: real,
+ imag: imag,
+ width: image.width,
+ height: image.height
+ };
+}
+
+// Create a new matrix of preconfigured dimensions, initialized to zero
+function createMatrix() {
+ var array = new Float64Array(alignWidth * alignHeight);
+ return new ndarray(array, [alignWidth, alignHeight]);
+}
diff --git a/dockers/texcmp/texcmp.sh b/dockers/texcmp/texcmp.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+set -x
+imgname=katex/texcmp
+tag=1.1
+imgid=$(docker images | awk "/${imgname//\//\\/} *${tag//./\\.}/{print \$3}")
+cd "$(dirname "$0")" || exit $?
+npm install || exit $?
+if [[ -z ${imgid} ]]; then
+ docker build -t "${imgname}:${tag}" . || exit $?
+fi
+base=$(cd ../..; pwd)
+docker run --rm \
+ -v "${base}":/KaTeX \
+ -w /KaTeX/dockers/texcmp \
+ "${imgname}:${tag}" \
+ nodejs texcmp.js "$@"
diff --git a/test/screenshotter/test.tex b/test/screenshotter/test.tex
@@ -0,0 +1,32 @@
+\documentclass[10pt]{article}
+
+\usepackage{amsmath,amssymb}
+\usepackage[papersize={133pt,100pt},margin=0.5pt]{geometry}
+\usepackage{color}
+\usepackage{etoolbox}
+\setlength{\parindent}{0pt}
+\pagestyle{empty}
+
+\newcommand{\blue}[1]{\textcolor{blue}{#1}}
+
+% Thanks to http://tex.stackexchange.com/a/26017/16923
+\newtoks\kasizetoks
+\def\kasizecollect#1{\afterassignment\kasizeapply\kasizetoks=\bgroup#1$}
+\def\kasizeapply{\mbox{\the\kasizetoks$\egroup}}
+\def\kasize#1{\csletcs{ka#1}{#1}%
+\csdef{#1}{\ifmmode\expandafter\kasizecollect\csname ka#1\endcsname%
+\else\csname ka#1\endcsname\fi}}
+\kasize{tiny}
+\kasize{scriptsize}
+\kasize{footnotesize}
+\kasize{small}
+\kasize{normalsize}
+\kasize{large}
+\kasize{Large}
+\kasize{LARGE}
+\kasize{huge}
+\kasize{Huge}
+
+\begin{document}
+$...$
+\end{document}