Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

mknichel/dedupe strings in module layers #18340

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
48 changes: 48 additions & 0 deletions lib/ModuleLayerCache.js
@@ -0,0 +1,48 @@
/*
MIT License http://www.opensource.org/licenses/mit-license.php
Author @mknichel
*/

"use strict";

/** @type {WeakMap<Object, Map<string, string | Buffer>>} */
const weaklyHeldCaches = new WeakMap();

/**
* Fetches a cached value if it exists or adds it to the cache and returns the
* value. This is a weak cache, meaning that it will be collected as soon as
* `associatedObjectForCache` is no longer reachable. This cache is useful
* for storing information that could be shared by modules across different
* layers. This can be useful for memory management since the same file that
* appears in multiple layers may duplicate large strings, such as the contents
* of the source file.
*
* @param {Object} associatedObjectForCache The object to associate the cached data with so that the data is collected when this object is no longer reachable.
* @param {string} key The cache key for the value.
* @param {string | Buffer} value The target value. This is used to see if the cached value is exactly the same or to add the value to the cache.
* @returns {string | Buffer} The cached value if it exists and matches or the original value.
*/
function maybeUseOrSetCachedValue(associatedObjectForCache, key, value) {
if (typeof associatedObjectForCache !== "object") {
throw new Error("`associatedObjectForCache` must be an object");
}
if (!weaklyHeldCaches.has(associatedObjectForCache)) {
weaklyHeldCaches.set(associatedObjectForCache, new Map());
}
const map = weaklyHeldCaches.get(associatedObjectForCache);
if (map.has(key)) {
const cachedValue = map.get(key);
if (
(Buffer.isBuffer(value) &&
Buffer.isBuffer(cachedValue) &&
value.equals(cachedValue)) ||
value === cachedValue
) {
return cachedValue;
}
} else {
map.set(key, value);
}
return value;
}
module.exports = maybeUseOrSetCachedValue;
11 changes: 11 additions & 0 deletions lib/NormalModule.js
Expand Up @@ -21,6 +21,7 @@ const Module = require("./Module");
const ModuleBuildError = require("./ModuleBuildError");
const ModuleError = require("./ModuleError");
const ModuleGraphConnection = require("./ModuleGraphConnection");
const maybeUseOrSetCachedValue = require("./ModuleLayerCache");
const ModuleParseError = require("./ModuleParseError");
const { JAVASCRIPT_MODULE_TYPE_AUTO } = require("./ModuleTypeConstants");
const ModuleWarning = require("./ModuleWarning");
Expand Down Expand Up @@ -780,6 +781,16 @@ class NormalModule extends Module {
* @returns {Source} the created source
*/
createSource(context, content, sourceMap, associatedObjectForCache) {
// If this module exists in a layer, try to reuse the value for the
// source string so it is not duplicated in multiple modules.
if (this.layer && this.resource) {
content = maybeUseOrSetCachedValue(
associatedObjectForCache,
this.resource,
content
);
}

if (Buffer.isBuffer(content)) {
return new RawSource(content);
}
Expand Down
2 changes: 1 addition & 1 deletion lib/json/JsonModulesPlugin.js
Expand Up @@ -44,7 +44,7 @@ class JsonModulesPlugin {
.tap(PLUGIN_NAME, parserOptions => {
validate(parserOptions);

return new JsonParser(parserOptions);
return new JsonParser(parserOptions, compilation);
});
normalModuleFactory.hooks.createGenerator
.for(JSON_MODULE_TYPE)
Expand Down
19 changes: 18 additions & 1 deletion lib/json/JsonParser.js
Expand Up @@ -5,6 +5,7 @@

"use strict";

const maybeUseOrSetCachedValue = require("../ModuleLayerCache");
const Parser = require("../Parser");
const JsonExportsDependency = require("../dependencies/JsonExportsDependency");
const memoize = require("../util/memoize");
Expand All @@ -22,10 +23,12 @@ const getParseJson = memoize(() => require("json-parse-even-better-errors"));
class JsonParser extends Parser {
/**
* @param {JsonModulesPluginParserOptions} options parser options
* @param {Object} associatedObjectForCache An object to associate cached data with.
*/
constructor(options) {
constructor(options, associatedObjectForCache) {
super();
this.options = options || {};
this.associatedObjectForCache = associatedObjectForCache;
}

/**
Expand Down Expand Up @@ -53,6 +56,20 @@ class JsonParser extends Parser {
} catch (e) {
throw new Error(`Cannot parse JSON: ${/** @type {Error} */ (e).message}`);
}

// If the module is associated with a layer, try to reuse cached data instead
// of duplicating the data multiple times.
const module = state.module;
if (module && module.resource && module.layer && Buffer.isBuffer(data)) {
data = /** @type {Buffer} */ (
maybeUseOrSetCachedValue(
this.associatedObjectForCache,
module.resource,
data
)
);
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we move it to method, to avoid duplicate logic (for example to the compilation class)

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done. Sorry for the delay. Let me know if this is what you had in mind.


const jsonData = new JsonData(/** @type {Buffer | RawJsonData} */ (data));
const buildInfo = /** @type {BuildInfo} */ (state.module.buildInfo);
buildInfo.jsonData = jsonData;
Expand Down