From b6db78d8c996d46c842f25199521f36ffc7c104b Mon Sep 17 00:00:00 2001 From: Luke Karrys Date: Wed, 14 Jun 2023 19:00:40 -0700 Subject: [PATCH] fix: better handling of whitespace --- classes/comparator.js | 3 +- classes/range.js | 64 ++++++++++++++++------------ classes/semver.js | 2 +- functions/coerce.js | 2 +- internal/re.js | 11 +++++ package.json | 2 +- test/integration/whitespace.js | 39 +++++++++++++++++ test/internal/re.js | 8 +++- test/map.js | 77 ++++++++++++++++------------------ 9 files changed, 136 insertions(+), 72 deletions(-) create mode 100644 test/integration/whitespace.js diff --git a/classes/comparator.js b/classes/comparator.js index 2146c884..3d39c0ee 100644 --- a/classes/comparator.js +++ b/classes/comparator.js @@ -16,6 +16,7 @@ class Comparator { } } + comp = comp.trim().split(/\s+/).join(' ') debug('comparator', comp, options) this.options = options this.loose = !!options.loose @@ -133,7 +134,7 @@ class Comparator { module.exports = Comparator const parseOptions = require('../internal/parse-options') -const { re, t } = require('../internal/re') +const { safeRe: re, t } = require('../internal/re') const cmp = require('../functions/cmp') const debug = require('../internal/debug') const SemVer = require('./semver') diff --git a/classes/range.js b/classes/range.js index d9e866de..53c2540f 100644 --- a/classes/range.js +++ b/classes/range.js @@ -26,19 +26,26 @@ class Range { this.loose = !!options.loose this.includePrerelease = !!options.includePrerelease - // First, split based on boolean or || + // First reduce all whitespace as much as possible so we do not have to rely + // on potentially slow regexes like \s*. This is then stored and used for + // future error messages as well. this.raw = range - this.set = range + .trim() + .split(/\s+/) + .join(' ') + + // First, split on || + this.set = this.raw .split('||') // map the range to a 2d array of comparators - .map(r => this.parseRange(r.trim())) + .map(r => this.parseRange(r)) // throw out any comparator lists that are empty // this generally means that it was not a valid range, which is allowed // in loose mode, but will still throw if the WHOLE range is invalid. .filter(c => c.length) if (!this.set.length) { - throw new TypeError(`Invalid SemVer Range: ${range}`) + throw new TypeError(`Invalid SemVer Range: ${this.raw}`) } // if we have any that are not the null set, throw out null sets. @@ -64,9 +71,7 @@ class Range { format () { this.range = this.set - .map((comps) => { - return comps.join(' ').trim() - }) + .map((comps) => comps.join(' ').trim()) .join('||') .trim() return this.range @@ -77,8 +82,6 @@ class Range { } parseRange (range) { - range = range.trim() - // memoize range parsing for performance. // this is a very hot path, and fully deterministic. const memoOpts = @@ -105,9 +108,6 @@ class Range { // `^ 1.2.3` => `^1.2.3` range = range.replace(re[t.CARETTRIM], caretTrimReplace) - // normalize spaces - range = range.split(/\s+/).join(' ') - // At this point, the range is completely trimmed and // ready to be split into comparators. @@ -203,7 +203,7 @@ const Comparator = require('./comparator') const debug = require('../internal/debug') const SemVer = require('./semver') const { - re, + safeRe: re, t, comparatorTrimReplace, tildeTrimReplace, @@ -257,10 +257,13 @@ const isX = id => !id || id.toLowerCase() === 'x' || id === '*' // ~1.2.3, ~>1.2.3 --> >=1.2.3 <1.3.0-0 // ~1.2.0, ~>1.2.0 --> >=1.2.0 <1.3.0-0 // ~0.0.1 --> >=0.0.1 <0.1.0-0 -const replaceTildes = (comp, options) => - comp.trim().split(/\s+/).map((c) => { - return replaceTilde(c, options) - }).join(' ') +const replaceTildes = (comp, options) => { + return comp + .trim() + .split(/\s+/) + .map((c) => replaceTilde(c, options)) + .join(' ') +} const replaceTilde = (comp, options) => { const r = options.loose ? re[t.TILDELOOSE] : re[t.TILDE] @@ -298,10 +301,13 @@ const replaceTilde = (comp, options) => { // ^1.2.0 --> >=1.2.0 <2.0.0-0 // ^0.0.1 --> >=0.0.1 <0.0.2-0 // ^0.1.0 --> >=0.1.0 <0.2.0-0 -const replaceCarets = (comp, options) => - comp.trim().split(/\s+/).map((c) => { - return replaceCaret(c, options) - }).join(' ') +const replaceCarets = (comp, options) => { + return comp + .trim() + .split(/\s+/) + .map((c) => replaceCaret(c, options)) + .join(' ') +} const replaceCaret = (comp, options) => { debug('caret', comp, options) @@ -358,9 +364,10 @@ const replaceCaret = (comp, options) => { const replaceXRanges = (comp, options) => { debug('replaceXRanges', comp, options) - return comp.split(/\s+/).map((c) => { - return replaceXRange(c, options) - }).join(' ') + return comp + .split(/\s+/) + .map((c) => replaceXRange(c, options)) + .join(' ') } const replaceXRange = (comp, options) => { @@ -443,12 +450,15 @@ const replaceXRange = (comp, options) => { const replaceStars = (comp, options) => { debug('replaceStars', comp, options) // Looseness is ignored here. star is always as loose as it gets! - return comp.trim().replace(re[t.STAR], '') + return comp + .trim() + .replace(re[t.STAR], '') } const replaceGTE0 = (comp, options) => { debug('replaceGTE0', comp, options) - return comp.trim() + return comp + .trim() .replace(re[options.includePrerelease ? t.GTE0PRE : t.GTE0], '') } @@ -486,7 +496,7 @@ const hyphenReplace = incPr => ($0, to = `<=${to}` } - return (`${from} ${to}`).trim() + return `${from} ${to}`.trim() } const testSet = (set, version, options) => { diff --git a/classes/semver.js b/classes/semver.js index 99dbe82d..e1208fe6 100644 --- a/classes/semver.js +++ b/classes/semver.js @@ -1,6 +1,6 @@ const debug = require('../internal/debug') const { MAX_LENGTH, MAX_SAFE_INTEGER } = require('../internal/constants') -const { re, t } = require('../internal/re') +const { safeRe: re, t } = require('../internal/re') const parseOptions = require('../internal/parse-options') const { compareIdentifiers } = require('../internal/identifiers') diff --git a/functions/coerce.js b/functions/coerce.js index 2e01452f..febbff9c 100644 --- a/functions/coerce.js +++ b/functions/coerce.js @@ -1,6 +1,6 @@ const SemVer = require('../classes/semver') const parse = require('./parse') -const { re, t } = require('../internal/re') +const { safeRe: re, t } = require('../internal/re') const coerce = (version, options) => { if (version instanceof SemVer) { diff --git a/internal/re.js b/internal/re.js index ed88398a..f73ef1aa 100644 --- a/internal/re.js +++ b/internal/re.js @@ -4,16 +4,27 @@ exports = module.exports = {} // The actual regexps go on exports.re const re = exports.re = [] +const safeRe = exports.safeRe = [] const src = exports.src = [] const t = exports.t = {} let R = 0 const createToken = (name, value, isGlobal) => { + // Replace all greedy whitespace to prevent regex dos issues. These regex are + // used internally via the safeRe object since all inputs in this library get + // normalized first to trim and collapse all extra whitespace. The original + // regexes are exported for userland consumption and lower level usage. A + // future breaking change could export the safer regex only with a note that + // all input should have extra whitespace removed. + const safe = value + .split('\\s*').join('\\s{0,1}') + .split('\\s+').join('\\s') const index = R++ debug(name, index, value) t[name] = index src[index] = value re[index] = new RegExp(value, isGlobal ? 'g' : undefined) + safeRe[index] = new RegExp(safe, isGlobal ? 'g' : undefined) } // The following Regular Expressions can be used for tokenizing, diff --git a/package.json b/package.json index 204e0086..72077036 100644 --- a/package.json +++ b/package.json @@ -37,7 +37,7 @@ "range.bnf" ], "tap": { - "check-coverage": true, + "timeout": 30, "coverage-map": "map.js", "nyc-arg": [ "--exclude", diff --git a/test/integration/whitespace.js b/test/integration/whitespace.js new file mode 100644 index 00000000..eb9744d9 --- /dev/null +++ b/test/integration/whitespace.js @@ -0,0 +1,39 @@ +const { test } = require('tap') +const Range = require('../../classes/range') +const SemVer = require('../../classes/semver') +const Comparator = require('../../classes/comparator') +const validRange = require('../../ranges/valid') +const minVersion = require('../../ranges/min-version') +const minSatisfying = require('../../ranges/min-satisfying') +const maxSatisfying = require('../../ranges/max-satisfying') + +const s = (n = 500000) => ' '.repeat(n) + +test('regex dos via range whitespace', (t) => { + // a range with this much whitespace would take a few minutes to process if + // any redos susceptible regexes were used. there is a global tap timeout per + // file set in the package.json that will error if this test takes too long. + const r = `1.2.3 ${s()} <1.3.0` + + t.equal(new Range(r).range, '1.2.3 <1.3.0') + t.equal(validRange(r), '1.2.3 <1.3.0') + t.equal(minVersion(r).version, '1.2.3') + t.equal(minSatisfying(['1.2.3'], r), '1.2.3') + t.equal(maxSatisfying(['1.2.3'], r), '1.2.3') + + t.end() +}) + +test('semver version', (t) => { + const v = `${s(125)}1.2.3${s(125)}` + const tooLong = `${s()}1.2.3${s()}` + t.equal(new SemVer(v).version, '1.2.3') + t.throws(() => new SemVer(tooLong)) + t.end() +}) + +test('comparator', (t) => { + const c = `${s()}<${s()}1.2.3${s()}` + t.equal(new Comparator(c).value, '<1.2.3') + t.end() +}) diff --git a/test/internal/re.js b/test/internal/re.js index 1aad22ba..2851b325 100644 --- a/test/internal/re.js +++ b/test/internal/re.js @@ -1,5 +1,5 @@ const { test } = require('tap') -const { src, re } = require('../../internal/re') +const { src, re, safeRe } = require('../../internal/re') const semver = require('../../') test('has a list of src, re, and tokens', (t) => { @@ -13,5 +13,11 @@ test('has a list of src, re, and tokens', (t) => { for (const i in semver.tokens) { t.match(semver.tokens[i], Number, 'tokens are numbers') } + + safeRe.forEach(r => { + t.notMatch(r.source, '\\s+', 'safe regex do not contain greedy whitespace') + t.notMatch(r.source, '\\s*', 'safe regex do not contain greedy whitespace') + }) + t.end() }) diff --git a/test/map.js b/test/map.js index 8179e71e..5c36eb7d 100644 --- a/test/map.js +++ b/test/map.js @@ -1,49 +1,46 @@ const t = require('tap') +const { resolve, join, relative, extname, dirname, basename } = require('path') +const { statSync, readdirSync } = require('fs') +const map = require('../map.js') +const pkg = require('../package.json') -// ensure that the coverage map maps all coverage -const ignore = [ - '.git', - '.github', - '.commitlintrc.js', - '.eslintrc.js', - '.eslintrc.local.js', - 'node_modules', - 'coverage', - 'tap-snapshots', - 'test', - 'fixtures', -] +const ROOT = resolve(__dirname, '..') +const TEST = join(ROOT, 'test') +const IGNORE_DIRS = ['fixtures', 'integration'] -const { statSync, readdirSync } = require('fs') -const find = (folder, set = [], root = true) => { - const ent = readdirSync(folder) - set.push(...ent.filter(f => !ignore.includes(f) && /\.m?js$/.test(f)).map(f => folder + '/' + f)) - for (const e of ent.filter(f => !ignore.includes(f) && !/\.m?js$/.test(f))) { - if (statSync(folder + '/' + e).isDirectory()) { - find(folder + '/' + e, set, false) +const getFile = (f) => { + try { + if (statSync(f).isFile()) { + return extname(f) === '.js' ? [f] : [] } + } catch { + return [] } - if (!root) { - return - } - return set.map(f => f.slice(folder.length + 1) - .replace(/\\/g, '/')) - .sort((a, b) => a.localeCompare(b)) } -const { resolve } = require('path') -const root = resolve(__dirname, '..') +const walk = (item, res = []) => getFile(item) || readdirSync(item) + .map(f => join(item, f)) + .reduce((acc, f) => acc.concat(statSync(f).isDirectory() ? walk(f, res) : getFile(f)), []) + .filter(Boolean) -const sut = find(root) -const tests = find(root + '/test') -t.strictSame(sut, tests, 'test files should match system files') -const map = require('../map.js') +const walkAll = (items, relativeTo) => items + .reduce((acc, f) => acc.concat(walk(join(ROOT, f))), []) + .map((f) => relative(relativeTo, f)) + .sort() -for (const testFile of tests) { - t.test(testFile, t => { - t.plan(1) - // cast to an array, since map() can return a string or array - const systemFiles = [].concat(map(testFile)) - t.ok(systemFiles.some(sys => sut.includes(sys)), 'test covers a file') - }) -} +t.test('tests match system', t => { + const sut = walkAll([pkg.tap['coverage-map'], ...pkg.files], ROOT) + const tests = walkAll([basename(TEST)], TEST) + .filter(f => !IGNORE_DIRS.includes(dirname(f))) + + t.strictSame(sut, tests, 'test files should match system files') + + for (const f of tests) { + t.test(f, t => { + t.plan(1) + t.ok(sut.includes(map(f)), 'test covers a file') + }) + } + + t.end() +})