Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: microsoft/lage
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: 3e65d7f50cc95412a79bb8a689c65b8cb39dd9b9
Choose a base ref
...
head repository: microsoft/lage
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: 10cda62c08de4bd604d80b7d0ebb9aefbb865d70
Choose a head ref
  • 1 commit
  • 9 files changed
  • 1 contributor

Commits on Mar 8, 2025

  1. Adding global input hashes in lage server worker (#861)

    * cheat on optimization by leverage the fact that 'info' command is called before anything else ALWAYS in BXL
    
    * Change files
    
    * Change files
    
    * cleaning up
    
    * make it relative
    
    * speeding things up by not using the glob() call at all in the lageservice init step
    kenotron authored Mar 8, 2025
    Copy the full SHA
    10cda62 View commit details
25 changes: 25 additions & 0 deletions change/change-1ac49201-a1a8-4f31-a6a4-b5d05e1d76a0.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{
"changes": [
{
"type": "minor",
"comment": "cheat on optimization by leverage the fact that 'info' command is called before anything else ALWAYS in BXL",
"packageName": "@lage-run/cli",
"email": "kchau@microsoft.com",
"dependentChangeType": "patch"
},
{
"type": "minor",
"comment": "cheat on optimization by leverage the fact that 'info' command is called before anything else ALWAYS in BXL",
"packageName": "@lage-run/hasher",
"email": "kchau@microsoft.com",
"dependentChangeType": "patch"
},
{
"type": "minor",
"comment": "cheat on optimization by leverage the fact that 'info' command is called before anything else ALWAYS in BXL",
"packageName": "@lage-run/rpc",
"email": "kchau@microsoft.com",
"dependentChangeType": "patch"
}
]
}
11 changes: 11 additions & 0 deletions change/change-be167829-f523-4d7e-8fe1-e50eac6ba277.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"changes": [
{
"type": "minor",
"comment": "cheat on optimization by leverage the fact that 'info' command is called before anything else ALWAYS in BXL",
"packageName": "lage",
"email": "kchau@microsoft.com",
"dependentChangeType": "patch"
}
]
}
4 changes: 3 additions & 1 deletion packages/cli/src/commands/exec/executeRemotely.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import path from "path";
import type { Logger } from "@lage-run/logger";
import createLogger from "@lage-run/logger";
import { initializeReporters } from "../initializeReporters.js";
@@ -140,7 +141,8 @@ export async function executeRemotely(options: ExecRemotelyOptions, command: Com
process.exitCode = response.exitCode;

// we will simulate file access even if exit code may be non-zero
await simulateFileAccess(logger, [...response.inputs, ...response.globalInputs], response.outputs);
const relativeGlobalInputsForTarget = path.relative(root, path.join(response.cwd, response.globalInputHashFile));
await simulateFileAccess(logger, [...response.inputs, relativeGlobalInputsForTarget], response.outputs);
} else {
process.exitCode = 1;
}
68 changes: 63 additions & 5 deletions packages/cli/src/commands/info/action.ts
Original file line number Diff line number Diff line change
@@ -7,16 +7,21 @@ import { type PackageInfos, getPackageInfos, getWorkspaceRoot } from "workspace-
import { getFilteredPackages } from "../../filter/getFilteredPackages.js";
import createLogger from "@lage-run/logger";
import path from "path";
import fs from "fs";
import { parse } from "shell-quote";

import type { ReporterInitOptions } from "../../types/ReporterInitOptions.js";
import type { Target } from "@lage-run/target-graph";
import { type Target, getStartTargetId } from "@lage-run/target-graph";
import { initializeReporters } from "../initializeReporters.js";
import { TargetRunnerPicker } from "@lage-run/runners";
import { getBinPaths } from "../../getBinPaths.js";
import { runnerPickerOptions } from "../../runnerPickerOptions.js";
import { parseServerOption } from "../parseServerOption.js";
import { optimizeTargetGraph } from "../../optimizeTargetGraph.js";
import { glob } from "@lage-run/globby";
import { FileHasher } from "@lage-run/hasher/lib/FileHasher.js";
import { hashStrings } from "@lage-run/hasher";
import { getGlobalInputHashFilePath } from "../targetHashFilePath.js";

interface InfoActionOptions extends ReporterInitOptions {
dependencies: boolean;
@@ -147,6 +152,58 @@ export async function infoAction(options: InfoActionOptions, command: Command) {
generatePackageTask(target, taskArgs, config, options, binPaths, packageInfos, tasks)
);

// In worker server mode, we need to actually speed up the BuildXL runs with presupplied global input hashes so that it doesn't try to read it over and over again
// This is an important optimization for BuildXL for large amount of env glob matches in non-well-behaved monorepos
// (e.g. repos that have files listed in env glob to avoid circular dependencies in package graph)
if (shouldRunWorkersAsService(options)) {
// For each target in the target graph, we need to create a global input hash file in this kind of location:
// ${target.cwd}/.lage/global_inputs_hash
// We will use glob for these files and use the FileHasher to generate these hashes.
const fileHasher = new FileHasher({
root,
});

const globHashCache = new Map<string, string>();
const globHashWithCache = (patterns: string[], options: { cwd: string }) => {
const key = patterns.join("###");
if (globHashCache.has(key)) {
return globHashCache.get(key)!;
}

const files = glob(patterns, options);
const hash = hashStrings(Object.values(fileHasher.hash(files.map((file) => path.join(root, file)))));

globHashCache.set(key, hash);

return hash;
};

const globalInputs = config.cacheOptions?.environmentGlob
? glob(config.cacheOptions?.environmentGlob, { cwd: root })
: ["lage.config.js"];

for (const target of optimizedTargets) {
if (target.id === getStartTargetId()) {
continue;
}

const targetGlobalInputsHash = target.environmentGlob
? globHashWithCache(target.environmentGlob, { cwd: root })
: globHashWithCache(globalInputs, { cwd: root });

const targetGlobalInputsHashFile = path.join(target.cwd, getGlobalInputHashFilePath(target));
const targetGlobalInputsHashFileDir = path.dirname(targetGlobalInputsHashFile);

// Make sure the directory exists
if (!fs.existsSync(targetGlobalInputsHashFileDir)) {
fs.mkdirSync(targetGlobalInputsHashFileDir, { recursive: true });
}

// Write the hash to the file
fs.writeFileSync(targetGlobalInputsHashFile, targetGlobalInputsHash);
}
}

logger.info("info", {
command: command.args,
scope,
@@ -188,6 +245,10 @@ function generatePackageTask(
return packageTask;
}

function shouldRunWorkersAsService(options: InfoActionOptions) {
return (typeof process.env.LAGE_WORKER_SERVER === "string" && process.env.LAGE_WORKER_SERVER !== "false") || !!options.server;
}

function generateCommand(
target: Target,
taskArgs: string[],
@@ -197,9 +258,6 @@ function generateCommand(
packageInfos: PackageInfos,
tasks: string[]
) {
const shouldRunWorkersAsService =
(typeof process.env.LAGE_WORKER_SERVER === "string" && process.env.LAGE_WORKER_SERVER !== "false") || !!options.server;

if (target.type === "npmScript") {
const script = target.packageName !== undefined ? packageInfos[target.packageName]?.scripts?.[target.task] : undefined;

@@ -215,7 +273,7 @@ function generateCommand(
const npmClient = config.npmClient ?? "npm";
const command = [npmClient, ...getNpmArgs(target.task, taskArgs)];
return command;
} else if (target.type === "worker" && shouldRunWorkersAsService) {
} else if (target.type === "worker" && shouldRunWorkersAsService(options)) {
const { host, port } = parseServerOption(options.server);
const command = [binPaths["lage"], "exec", "--tasks", ...tasks, "--server", `${host}:${port}`];
if (options.concurrency) {
38 changes: 15 additions & 23 deletions packages/cli/src/commands/server/lageService.ts
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
import { type ConfigOptions, getConfig, getConcurrency, getMaxWorkersPerTask } from "@lage-run/config";
import type { Logger } from "@lage-run/logger";
import { ConnectError, Code, type ILageService } from "@lage-run/rpc";
import { getStartTargetId, getTargetId, type Target, type TargetGraph } from "@lage-run/target-graph";
import { getStartTargetId, getTargetId, type TargetGraph } from "@lage-run/target-graph";
import { type DependencyMap, getPackageInfos, getWorkspaceRoot } from "workspace-tools";
import { createTargetGraph } from "../run/createTargetGraph.js";
import { type Readable } from "stream";
import { type Pool, AggregatedPool } from "@lage-run/worker-threads-pool";
import { getInputFiles, type PackageTree, TargetHasher } from "@lage-run/hasher";
import { getOutputFiles } from "./getOutputFiles.js";
import { glob } from "@lage-run/globby";
import { MemoryStream } from "./MemoryStream.js";
import { runnerPickerOptions } from "../../runnerPickerOptions.js";
import { filterPipelineDefinitions } from "../run/filterPipelineDefinitions.js";
import type { TargetRun } from "@lage-run/scheduler-types";
import { formatDuration, hrToSeconds, hrtimeDiff } from "@lage-run/format-hrtime";
import path from "path";
import fs from "fs";
import { getGlobalInputHashFilePath, getHashFilePath } from "../targetHashFilePath.js";

interface LageServiceContext {
config: ConfigOptions;
@@ -24,7 +24,6 @@ interface LageServiceContext {
dependencyMap: DependencyMap;
root: string;
pool: Pool;
globalInputs: string[];
targetHasher: TargetHasher;
}

@@ -133,14 +132,8 @@ async function createInitializedPromise({ cwd, logger, serverControls, nodeArg,
serverControls.countdownToShutdown();
});

const globalInputs = config.cacheOptions?.environmentGlob
? glob(config.cacheOptions?.environmentGlob, { cwd: root })
: ["lage.config.js"];

logger.info(`Environment glob inputs: \n${JSON.stringify(globalInputs)}\n-------`);

logger.info("done initializing");
return { config, targetGraph, packageTree, dependencyMap, root, pool, globalInputs, targetHasher };
return { config, targetGraph, packageTree, dependencyMap, root, pool, targetHasher };
}

/**
@@ -162,10 +155,6 @@ interface CreateLageServiceOptions {
tasks: string[];
}

function getHashFilePath(target: Target) {
return path.join(`node_modules/.lage/hash_${target.task}`);
}

export async function createLageService({
cwd,
serverControls,
@@ -188,7 +177,7 @@ export async function createLageService({
// THIS IS A BIG ASSUMPTION; TODO: memoize based on the parameters of the initialize() call
// The first request sets up the nodeArg and taskArgs - we are assuming that all requests to run this target are coming from the same
// `lage info` call
const { config, targetGraph, dependencyMap, packageTree, root, pool, globalInputs, targetHasher } = await initialize({
const { config, targetGraph, dependencyMap, packageTree, root, pool, targetHasher } = await initialize({
cwd,
logger,
nodeArg: request.nodeOptions,
@@ -233,18 +222,17 @@ export async function createLageService({
threadId: 0,
};

const targetGlobalInputs = target.environmentGlob ? glob(target.environmentGlob, { cwd: root }) : globalInputs;

let results: {
packageName?: string;
task: string;
cwd: string;
exitCode: number;
inputs: string[];
outputs: string[];
stdout: string;
stderr: string;
id: string;
globalInputs: string[];
globalInputHashFile: string;
};

const inputs = getInputFiles(target, dependencyMap, packageTree);
@@ -258,6 +246,7 @@ export async function createLageService({
inputs.push(path.join(path.relative(root, depTarget.cwd), getHashFilePath(depTarget)).replace(/\\/g, "/"));
}

// Write the target hash to a file for its dependants to use
const targetHashFile = getHashFilePath(target);
const targetHashFullPath = path.join(target.cwd, targetHashFile);

@@ -271,6 +260,8 @@ export async function createLageService({
throw new ConnectError(`Error writing target hash file: ${targetHashFullPath}`, Code.Internal);
}

const targetGlobalInputHashRelativePath = getGlobalInputHashFilePath(target);

try {
await pool.exec(
task,
@@ -317,13 +308,14 @@ export async function createLageService({
results = {
packageName: request.packageName,
task: request.task,
cwd: target.cwd,
exitCode: 0,
inputs,
outputs,
stdout: writableStdout.toString(),
stderr: writableStderr.toString(),
id,
globalInputs: targetGlobalInputs,
globalInputHashFile: targetGlobalInputHashRelativePath,
};
} catch (e) {
const outputs = getOutputFiles(root, target, config.cacheOptions?.outputGlob, packageTree);
@@ -336,13 +328,14 @@ export async function createLageService({
results = {
packageName: request.packageName,
task: request.task,
cwd: target.cwd,
exitCode: 1,
inputs,
outputs,
stdout: "",
stderr: e instanceof Error ? e.toString() : "",
id,
globalInputs: targetGlobalInputs,
globalInputHashFile: targetGlobalInputHashRelativePath,
};
}

@@ -351,13 +344,12 @@ export async function createLageService({
{
packageName: results.packageName,
task: results.task,
cwd: results.cwd,
exitCode: results.exitCode,
inputs: results.inputs,
outputs: results.outputs,
id: results.id,
globalInputs: `(${target.environmentGlob ? "custom target env glob used" : "general global inputs used"}): ${
results.globalInputs.length
} files`,
globalInputHashFile: targetGlobalInputHashRelativePath,
},
null,
2
9 changes: 9 additions & 0 deletions packages/cli/src/commands/targetHashFilePath.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import path from "path";

export function getHashFilePath(target: { task: string }) {
return path.join(`node_modules/.lage/hash_${target.task}`);
}

export function getGlobalInputHashFilePath(target: { task: string }) {
return path.join(`node_modules/.lage/global_inputs_hash_${target.task}`);
}
2 changes: 2 additions & 0 deletions packages/hasher/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
export { TargetHasher } from "./TargetHasher.js";
export { PackageTree } from "./PackageTree.js";
export { getInputFiles } from "./getInputFiles.js";
export { FileHasher } from "./FileHasher.js";
export { hashStrings } from "./hashStrings.js";
15 changes: 8 additions & 7 deletions packages/rpc/proto/lage/v1/lage.proto
Original file line number Diff line number Diff line change
@@ -13,13 +13,14 @@ message RunTargetRequest {
message RunTargetResponse {
string id = 1;
optional string package_name = 2;
string task = 3;
int32 exit_code = 4;
repeated string inputs = 5;
repeated string outputs = 6;
string stdout = 7;
string stderr = 8;
repeated string global_inputs = 9;
string cwd = 3;
string task = 4;
int32 exit_code = 5;
repeated string inputs = 6;
repeated string outputs = 7;
string stdout = 8;
string stderr = 9;
string global_input_hash_file = 10;
}

message PingRequest {}
Loading