8000 Add a pass to simplify single-character choices by markw65 · Pull Request #425 · peggyjs/peggy · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

Add a pass to simplify single-character choices #425

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Sep 7, 2023
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ Released: TBD

### Minor Changes

- [#425](https://github.com/peggyjs/peggy/pull/425) Add a pass to simplify single-character choices
- [#420](https://github.com/peggyjs/peggy/pull/420) Updated dependencies to
avoid audit warnings. From @hildjj.
- [#404](https://github.com/peggyjs/peggy/issues/404) Add support for -w/--watch
Expand Down
2 changes: 1 addition & 1 deletion docs/js/benchmark-bundle.min.js

Large diffs are not rendered by default.

30 changes: 7 additions & 23 deletions docs/js/examples.js
8000
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ function peg$parse(input, options) {
var peg$r0 = /^[a-z]/;
var peg$r1 = /^[^a-z]/i;
var peg$r2 = /^[0-9]/;
var peg$r3 = /^[a-c]/;

var peg$e0 = peg$literalExpectation("foo", false);
var peg$e1 = peg$literalExpectation("foo", true);
Expand All @@ -202,7 +203,8 @@ function peg$parse(input, options) {
var peg$e10 = peg$literalExpectation("bar", true);
var peg$e11 = peg$literalExpectation(" ", false);
var peg$e12 = peg$literalExpectation("c", false);
var peg$e13 = peg$otherExpectation("The rest of the input");
var peg$e13 = peg$classExpectation([["a", "c"]], false, false);
var peg$e14 = peg$otherExpectation("The rest of the input");

var peg$f0 = function(match, rest) { return {match, rest}; };
var peg$f1 = function(match, rest) { return {match, rest}; };
Expand Down Expand Up @@ -1440,30 +1442,12 @@ function peg$parse(input, options) {
var s0, s1, s2;

s0 = peg$currPos;
if (input.charCodeAt(peg$currPos) === 97) {
s1 = peg$c3;
if (peg$r3.test(input.charAt(peg$currPos))) {
s1 = input.charAt(peg$currPos);
peg$currPos++;
} else {
s1 = peg$FAILED;
if (peg$silentFails === 0) { peg$fail(peg$e8); }
}
if (s1 === peg$FAILED) {
if (input.charCodeAt(peg$currPos) === 98) {
s1 = peg$c4;
peg$currPos++;
} else {
s1 = peg$FAILED;
if (peg$silentFails === 0) { peg$fail(peg$e9); }
}
if (s1 === peg$FAILED) {
if (input.charCodeAt(peg$currPos) === 99) {
s1 = peg$c7;
peg$currPos++;
} else {
s1 = peg$FAILED;
if (peg$silentFails === 0) { peg$fail(peg$e12); }
}
}
if (peg$silentFails === 0) { peg$fail(peg$e13); }
}
if (s1 !== peg$FAILED) {
s2 = peg$parserest();
Expand Down Expand Up @@ -1503,7 +1487,7 @@ function peg$parse(input, options) {
s0 = input.substring(s0, peg$currPos);
peg$silentFails--;
s1 = peg$FAILED;
if (peg$silentFails === 0) { peg$fail(peg$e13); }
if (peg$silentFails === 0) { peg$fail(peg$e14); }

return s0;
}
Expand Down
2 changes: 1 addition & 1 deletion docs/js/test-bundle.min.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/vendor/peggy/peggy.min.js

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions lib/compiler/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ const generateBytecode = require("./passes/generate-bytecode");
const generateJS = require("./passes/generate-js");
const inferenceMatchResult = require("./passes/inference-match-result");
const removeProxyRules = require("./passes/remove-proxy-rules");
const mergeCharacterClasses = require("./passes/merge-character-classes");
const reportDuplicateLabels = require("./passes/report-duplicate-labels");
const reportDuplicateRules = require("./passes/report-duplicate-rules");
const reportInfiniteRecursion = require("./passes/report-infinite-recursion");
Expand Down Expand Up @@ -58,6 +59,7 @@ const compiler = {
],
transform: [
removeProxyRules,
mergeCharacterClasses,
inferenceMatchResult,
],
generate: [
Expand Down
191 changes: 191 additions & 0 deletions lib/compiler/passes/merge-character-classes.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
// @ts-check
"use strict";

/**
* @typedef {import("../../peg")} PEG
*/

/** @type {PEG.compiler.visitor} */
const visitor = require("../visitor");

/**
* @param {unknown} target
* @param {unknown} source
*/
function cloneOver(target, source) {
const t = /** @type {Record<string,unknown>} */ (target);
const s = /** @type {Record<string,unknown>} */ (source);
Object.keys(t).forEach(key => delete t[key]);
Object.keys(s).forEach(key => { t[key] = s[key]; });
}

/**
* Clean up the parts array of a `class` node, by sorting,
* then removing "contained" ranges, and merging overlapping
* or adjacent ranges.
*
* @param {PEG.ast.CharacterClass["parts"]} parts
*/
function cleanParts(parts) {
// Sort parts on increasing start, and then decreasing end.
parts.sort((a, b) => {
const [aStart, aEnd] = Array.isArray(a) ? a : [a, a];
const [bStart, bEnd] = Array.isArray(b) ? b : [b, b];
if (aStart !== bStart) {
return aStart < bStart ? -1 : 1;
}
if (aEnd !== bEnd) {
return aEnd > bEnd ? -1 : 1;
}
return 0;
});

let prevStart = "";
let prevEnd = "";
for (let i = 0; i < parts.length; i++) {
const part = parts[i];
const [curStart, curEnd] = Array.isArray(part) ? part : [part, part];
if (curEnd <= prevEnd) {
// Current range is contained in previous range,
// so drop it.
parts.splice(i--, 1);
continue;
}
if (prevEnd.charCodeAt(0) + 1 >= curStart.charCodeAt(0)) {
// Current and previous ranges overlap, or are adjacent.
// Drop the current, and extend the previous range.
parts.splice(i--, 1);
parts[i] = [prevStart, prevEnd = curEnd];
continue;
}
prevStart = curStart;
prevEnd = curEnd;
}
return parts;
}

/**
* Merges a choice character classes into a character class
* @param {PEG.ast.Grammar} ast
*/
function mergeCharacterClasses(ast) {
// Build a map from rule names to rules for quick lookup of
// ref_rules.
const rules = Object.create(null);
ast.rules.forEach(rule => (rules[rule.name] = rule.expression));
// Keep a map of which rules have been processed, so that when
// we find a ref_rule, we can make sure its processed, before we
// try to use it.
const processedRules = Object.create(null);
const [asClass, merge] = [
/**
* Determine whether a node can be represented as a simple character class,
* and return that class if so.
*
* @param {PEG.ast.Expression} node - the node to inspect
* @param {boolean} [clone] - if true, always return a new node that
* can be modified by the caller
* @returns {PEG.ast.CharacterClass | null}
*/
(node, clone) => {
if (node.type === "class" && !node.inverted) {
if (clone) {
node = { ...node };
node.parts = [...node.parts];
}
return node;
}
if (node.type === "literal" && node.value.length === 1) {
return {
type: "class",
parts: [node.value],
inverted: false,
ignoreCase: node.ignoreCase,
location: node.location,
};
}
if (node.type === "rule_ref") {
const ref = rules[node.name];
if (ref) {
if (!processedRules[node.name]) {
processedRules[node.name] = true;
merge(ref);
}
const cls = asClass(ref, true);
if (cls) {
cls.location = node.location;
}
return cls;
}
}
return null;
},
visitor.build({
choice(node) {
/** @type {PEG.ast.CharacterClass | null} */
let prev = null;
let changed = false;
node.alternatives.forEach((alt, i) => {
merge(alt);
const cls = asClass(alt);
if (!cls) {
prev = null;
return;
}
if (prev && prev.ignoreCase === cls.ignoreCase) {
prev.parts.push(...cls.parts);
node.alternatives[i - 1] = prev;
node.alternatives[i] = prev;
prev.location = {
source: prev.location.source,
start: prev.location.start,
end: cls.location.end,
};
changed = true;
} else {
prev = cls;
}
});
if (changed) {
node.alternatives = node.alternatives.filter(
(alt, i, arr) => !i || alt !== arr[i - 1]
);
node.alternatives.forEach((alt, i) => {
if (alt.type === "class") {
alt.parts = cleanParts(alt.parts);
if (alt.parts.length === 1
&& !Array.isArray(alt.parts[0])
&& !alt.inverted) {
node.alternatives[i] = {
type: "literal",
value: alt.parts[0],
ignoreCase: alt.ignoreCase,
location: alt.location,
};
}
}
});
if (node.alternatives.length === 1) {
cloneOver(node, node.alternatives[0]);
}
}
},
text(node) {
merge(node.expression);
if (node.expression.type === "class"
|| node.expression.type === "literal") {
const location = node.location;
cloneOver(node, node.expression);
node.location = location;
}
},
}),
];

ast.rules.forEach(rule => {
processedRules[rule.name] = true;
merge(rule.expression);
});
}

module.exports = mergeCharacterClasses;
Loading
0