Building a fast SCSS-like rule expander for CSS using fuzzy parsing
The primary advantage of a CSS preprocessor is the shorthand of nested rule expansion. For example in LESS or SCSS you can do:
.container {
h1 {
font-size: 15px;
}
a, a:visited {
color: blue;
}
}
And after compiling it the nested rules will be flattened and expanded correctly with the right selector:
.container h1 {
font-size: 15px;
}
.container a, .container a:visited {
color: blue;
}
But bringing in LESS or SCSS was historically somewhat complicated, the projects themselves large, and I love a good parsing project.
After a few attempts at writing a full-fledged parser, I gave up and
decided to try something new: fuzzy parsing. Rather than trying to
understand every type of CSS selector and every CSS property-value
syntax, I'd just look for key markers like {
,
:
, ;
and }
and assume
everything around them was valid CSS.
The code for this project is on Github.
Parsing
The core logic for parsing selectors and values is a
parseToken
function that reads characters one at a time
until it encounters some ending marker. It needs to be aware of
special bracket and string pairs like in
input[type="button"]
and background:
url('../picture.jpg');
. So if it sees one of those pair
characters it reads all characters (even ending marker characters)
until it finds the ending character for the pair.
function parseToken(
input: string,
i: number,
endMarker: Array
): [string, number] {
let token = '';
i = eatWhitespace(input, i);
while (!endMarker.includes(input[i])) {
guard(input, i, 'Waiting for ' + JSON.stringify(endMarker));
if (input[i] === "'") {
token += input[i];
i++;
while (input[i] !== "'") {
guard(input, i, 'Waiting for closing single quote');
token += input[i];
i++;
}
} else if (input[i] === '"') {
token += input[i];
i++;
while (input[i] !== '"') {
guard(input, i, 'Waiting for closing double quote');
token += input[i];
i++;
}
} else if (input[i] === '[') {
token += input[i];
i++;
while (input[i] !== ']') {
guard(input, i, 'Waiting for closing bracket');
token += input[i];
i++;
}
}
token += input[i];
i++;
}
return [token.trim(), i];
}
This in turn is called by a fuzzy parseRule
function
that looks for selectors, property-value pairs, and nested
selectors.
export interface Declaration {
type: 'declaration';
property: string;
value: string;
}
export interface Rule {
type: 'rule';
selectors: Array;
declarations: Array;
}
function parseRule(input: string, i: number): [Rule, number] {
let token = '';
let rule: Rule = { selectors: [], declarations: [], type: 'rule' };
guard(input, i, 'Waiting for EOL');
i = eatWhitespace(input, i);
let prev = ',';
while (true) {
guard(input, i, 'Waiting for comma');
[token, i] = parseToken(input, i, ['{', ',']);
rule.selectors.push(token);
i = eatWhitespace(input, i);
prev = input[i];
if (prev === '{') {
break;
}
i++; // Skip past ,
}
i++; // Skip past {
while (input[i] !== '}') {
guard(input, i, 'Waiting for closing brace');
const declaration: Declaration = {
type: 'declaration',
property: '',
value: '',
};
i = eatWhitespace(input, i);
const possibleInnerDeclarationStartingPoint = i;
token = '';
let foundInner = false;
while (input[i] !== ':') {
guard(
input,
i,
'Waiting for colon ' +
(rule.declarations.length > 0
? 'after ' +
JSON.stringify(
rule.declarations[rule.declarations.length - 1],
null,
2
)
: 'after first declaration')
);
if (input[i] === '{') {
const [nested, newI] = parseRule(
input,
possibleInnerDeclarationStartingPoint
);
rule.declarations.push(nested);
i = newI;
foundInner = true;
break;
} else {
token += input[i];
i++;
}
}
if (foundInner) {
i = eatWhitespace(input, i);
continue;
}
i++; // Skip past :
declaration.property = token.trim();
i = eatWhitespace(input, i);
[token, i] = parseToken(input, i, [';']);
i++; // Skip past ;
declaration.value = token.trim();
rule.declarations.push(declaration);
debug('Found declaration', declaration);
i = eatWhitespace(input, i);
}
i++; // Skip past }
debug('Found rule', rule);
return [rule, i];
}
This function in turn is called by parse()
until we
find all rules.
function parse(input: string, i = 0) {
const rules: Rule[] = [];
while (i < input.length) {
i = eatWhitespace(input, i);
const [rule, newI] = parseRule(input, i);
rules.push(rule);
i = eatWhitespace(input, newI);
}
return rules;
}
Expanding nested rules
Next we need to flatten all nested rules. When doing this we'll take the cartesian product of the selectors so that we produce every combination of comma-separated selectors (order matters, the outer selectors must come first).
If a selector starts with @
then we will not flatten it
since it is a CSS entity like media queries or keyframes that has
valid nested CSS rules.
function cartesian(...a: string[][]): string[][] {
return a.reduce(
(a, b) =>
a.map((x) => b.map((y) => x.concat(y))).reduce((c, d) => c.concat(d), []),
[[]] as string[][]
);
}
function flatten(rules: Rule[]) {
for (let i = 0; i < rules.length; i++) {
const rule = rules[i];
rule.declarations.forEach(function flattenDecl(decl, di) {
if (decl.type === 'rule' && !rule.selectors[0].startsWith('@')) {
// Insert into global rules after this one with correct selector
rules.splice(i + 1, 0, {
...decl,
selectors: cartesian(rule.selectors, decl.selectors).map((inner) =>
inner.join(' ')
),
});
i++; // Skip past added rule
// Remove from here
rule.declarations.splice(di, 1);
}
});
}
}
At the moment this only supports a single level of nesting rules but that will be fixed in a future release.
Code generation
After flattening, we can write out all rules.
function write(rules: Rule[], indent = '') {
const out: string[] = [];
rules.forEach(function writeRule(rule) {
const declarations = [indent + rule.selectors.join(',\n') + ' {'];
rule.declarations.forEach(function writeDecl(decl) {
if (decl.type === 'rule') {
const rules = write([decl], indent + ' ');
declarations.push(rules);
} else {
declarations.push(
indent + ' ' + decl.property + ': ' + decl.value + ';'
);
}
});
declarations.push(indent + '}');
out.push(declarations.join('\n'));
});
return out.join('\n\n');
}
And finally we can wrap this in a nice interface and include the other helper functions we used:
export const SETTINGS = {
DEBUG: false,
};
function eatWhitespace(input: string, start: number) {
while (/\s/.test(input[start])) {
start++;
}
// Eat comments
if (input[start] === '/' && input[start + 1] === '*') {
// Skip past /*
start += 2;
while (!(input[start] === '*' && input[start + 1] === '/')) {
start++;
}
// Skip past */
start += 2;
}
while (/\s/.test(input[start])) {
start++;
}
return start;
}
function debug(msg: string, ...rest: Array) {
if (SETTINGS.DEBUG) {
console.log('[Debug] ' + msg, ...rest);
}
}
function guard(input: string, i: number, msg: string) {
debug(msg);
if (input[i] === undefined) {
throw new Error(msg + ' failed');
}
}
export function transform(cssp: string) {
const rules = parse(cssp);
flatten(rules);
return write(rules);
}
Now we've got a basic CSS rule expander in <300 LoC using fuzzy parsing! Let's benchmark it.
Benchmark
This is an extremely simple benchmark to test out how long it takes this library to generate a CSS file from a SCSS file compared to the sass project.
We'll create a large file by repeating the following string a few 10,000 times:
$ ls -lah test.scss
-rw-r--r-- 1 phil phil 372K Oct 31 17:27 test.scss
$ head -n 7 test.scss
div.outer {
color: black;
div.inner {
color: white;
}
}
We'll create three simple transform scripts; using this library, using sass (written in JavaScript), and node-sass (written in C++).
$ yarn add yarn add https://github.com/multiprocessio/cssplus sass node-sass
$ cat cssplus.js
const { transform } = require('cssplus');
const fs = require('fs');
const f = fs.readFileSync(process.argv[2]).toString();
console.log(transform(f));
$ cat js-scss.js
const sass = require('sass');
console.log(sass.renderSync({file: process.argv[2]}).css.toString());
$ cat native-scss.js
const sass = require('node-sass');
console.log(sass.renderSync({file: process.argv[2]}).css.toString());
Now we run and time them.
$ time node js-scss.js test.scss > js-scssout
node js-scss.js test.scss > js-scssout 1.22s user 0.05s system 156% cpu 0.814 total
$ time node native-scss.js test.scss > native-scssout
node native-scss.js test.scss > native-scssout 0.31s user 0.03s system 101% cpu 0.333 total
$ time node cssplus.js test.scss > cssout
node cssplus.js test.scss > cssout 0.29s user 0.02s system 134% cpu 0.234 total
And diff the output files for proof of correctness (ignoring whitespace since they differ here):
$ diff -B cssout js-scssout
Nice! This library certainly does significantly less than SCSS or LESS do but at least it was extremely simple to code it up and we actually see a decent performance gain compared to the JavaScript implementation of SCSS, probably because of how stripped down the parsing is we're doing.
Share
Using fuzzy parsing you can build a CSS expander a la SCSS in under 300 lines of JavaScript code with performance on par with the native implementation of SCSS (albeit doing significantly less).
— DataStation (@multiprocessio) October 31, 2021
Check out the library & blog post!https://t.co/OLMpWpuXcXhttps://t.co/PhUG4SLpw1 pic.twitter.com/Jfu5qX2uiF
With questions, criticism or ideas, email or Tweet me.
Also, check out DataStation and dsq.