Skip to content

Commit 951c8f6

Browse files
committed
feat: move content hashing to a child process
I noticed in profiles that this was actually a bottleneck for Jest tests (in #214) when running as a cluster. I wanted to use a worker thread for it, but it looks like there's an issue in vscode preventing that[1] for the moment. This cuts the time-to-first-breakpoint in half for the jest tests, which is fairly nice. The child process is killed after 30 seconds of inactivity. I may do an algorithmic optimization pass on the hash in the future. In particular, Node/V8 now has native bigint support, which is almost certainly faster than the `long` library. 1. microsoft/vscode#88386
1 parent 35d9a5d commit 951c8f6

File tree

7 files changed

+122
-14
lines changed

7 files changed

+122
-14
lines changed

gulpfile.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,7 @@ async function runWebpack(packages) {
189189
gulp.task('package:webpack-bundle', async () => {
190190
const packages = [
191191
{ entry: `${buildSrcDir}/extension.js`, library: true },
192+
{ entry: `${buildSrcDir}/common/hash/hash.js`, library: false },
192193
{ entry: `${buildSrcDir}/${nodeTargetsDir}/bootloader.js`, library: false },
193194
{ entry: `${buildSrcDir}/${nodeTargetsDir}/watchdog.js`, library: false },
194195
];

src/common/hash.ts renamed to src/common/hash/hash.ts

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,13 @@
22
* Copyright (C) Microsoft Corporation. All rights reserved.
33
*--------------------------------------------------------*/
44
import Long from 'long';
5+
import { readFileRaw } from '../fsUtils';
56

6-
export function calculateHash(input: Buffer): string {
7+
/**
8+
* An implementation of the Chrome content hashing algorithm used to verify
9+
* whether files on disk are the same as those in the debug session.
10+
*/
11+
function calculateHash(input: Buffer): string {
712
const prime = [
813
new Long(0x3fb75161, 0, true),
914
new Long(0xab1f4e4f, 0, true),
@@ -99,3 +104,33 @@ function normalize(buffer: Buffer): Buffer {
99104
function utf8ToUtf16(buffer: Buffer) {
100105
return Buffer.from(buffer.toString('utf8'), 'utf16le');
101106
}
107+
108+
/**
109+
* Message sent to the hash worker.
110+
*/
111+
export type HashRequest = { id: number; file: string } | { id: number; data: string | Buffer };
112+
113+
/**
114+
* Message received in the hash response.
115+
*/
116+
export type HashResponse = { id: number; hash?: string };
117+
118+
function startWorker(send: (message: HashResponse) => void) {
119+
process.on('message', (msg: HashRequest) => {
120+
if ('file' in msg) {
121+
const file = msg.file;
122+
readFileRaw(file)
123+
.then(data => send({ id: msg.id, hash: calculateHash(data) }))
124+
.catch(() => send({ id: msg.id }));
125+
} else if ('data' in msg) {
126+
send({
127+
id: msg.id,
128+
hash: calculateHash(msg.data instanceof Buffer ? msg.data : Buffer.from(msg.data, 'utf-8')),
129+
});
130+
}
131+
});
132+
}
133+
134+
if (process.send) {
135+
startWorker(process.send.bind(process));
136+
}

src/common/hash/index.ts

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
/*---------------------------------------------------------
2+
* Copyright (C) Microsoft Corporation. All rights reserved.
3+
*--------------------------------------------------------*/
4+
5+
import { ChildProcess, fork } from 'child_process';
6+
import { join } from 'path';
7+
import { HashRequest, HashResponse } from './hash';
8+
import { debounce } from '../objUtils';
9+
10+
let instance: ChildProcess | undefined;
11+
let messageId = 0;
12+
13+
const cleanup = debounce(30 * 1000, () => {
14+
instance?.kill();
15+
instance = undefined;
16+
});
17+
18+
const create = () => {
19+
if (instance) {
20+
return instance;
21+
}
22+
23+
instance = fork(join(__dirname, 'hash.js'), [], { env: {}, silent: true });
24+
instance.setMaxListeners(Infinity);
25+
return instance;
26+
};
27+
28+
const send = (req: HashRequest): Promise<string | undefined> => {
29+
const cp = create();
30+
cleanup();
31+
32+
return new Promise(resolve => {
33+
const listener = (res: HashResponse) => {
34+
if (res.id === req.id) {
35+
resolve(res.hash);
36+
cp.removeListener('message', listener);
37+
}
38+
};
39+
40+
cp.addListener('message', listener);
41+
cp.send(req);
42+
});
43+
};
44+
45+
/**
46+
* Gets the Chrome content hash of script contents.
47+
*/
48+
export const hashBytes = (data: string | Buffer) => send({ data, id: messageId++ });
49+
50+
/**
51+
* Gets the Chrome content hash of a file.
52+
*/
53+
export const hashFile = (file: string) => send({ file, id: messageId++ });

src/common/objUtils.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ export function debounce(duration: number, fn: () => void): (() => void) & { cle
172172
let timeout: NodeJS.Timer | void;
173173
const debounced = () => {
174174
if (timeout !== undefined) {
175-
return;
175+
clearTimeout(timeout);
176176
}
177177

178178
timeout = setTimeout(() => {

src/common/sourceUtils.ts

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,10 @@ import * as sourceMap from 'source-map';
77
import * as ts from 'typescript';
88
import * as urlUtils from './urlUtils';
99
import * as fsUtils from './fsUtils';
10-
import { calculateHash } from './hash';
1110
import { SourceMap, ISourceMapMetadata } from './sourceMaps/sourceMap';
1211
import { logger } from './logging/logger';
1312
import { LogTag } from './logging';
13+
import { hashBytes, hashFile } from './hash';
1414

1515
export async function prettyPrintAsSourceMap(
1616
fileName: string,
@@ -310,12 +310,11 @@ export async function checkContentHash(
310310
const exists = await fsUtils.exists(absolutePath);
311311
return exists ? absolutePath : undefined;
312312
}
313-
const content =
313+
const hash =
314314
typeof contentOverride === 'string'
315-
? Buffer.from(contentOverride, 'utf8')
316-
: await fsUtils.readFileRaw(absolutePath);
315+
? await hashBytes(contentOverride)
316+
: await hashFile(absolutePath);
317317

318-
const hash = calculateHash(content);
319318
return hash === contentHash ? absolutePath : undefined;
320319
}
321320

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
1d9f277f134f31935a286ff810acdf571af3498e
2+
1d9f277f134f31935a286ff810acdf571af3498e
3+
1d9f277f134f31935a286ff810acdf571af3498e
4+
1d9f277f134f31935a286ff810acdf571af3498e

src/test/sources/sourcesTest.ts

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,11 @@
22
* Copyright (C) Microsoft Corporation. All rights reserved.
33
*--------------------------------------------------------*/
44

5-
import { TestP } from '../test';
5+
import { TestP, createFileTree, testFixturesDir } from '../test';
66
import Dap from '../../dap/api';
7-
import { calculateHash } from '../../common/hash';
7+
import { hashBytes, hashFile } from '../../common/hash';
88
import { itIntegrates } from '../testIntegrationUtils';
9+
import { join } from 'path';
910

1011
describe('sources', () => {
1112
async function dumpSource(p: TestP, event: Dap.LoadedSourceEventParams, name: string) {
@@ -149,10 +150,25 @@ describe('sources', () => {
149150
0x31, 0x00, 0x31, 0x00, 0x31, 0x00, 0x22, 0x00]);
150151

151152
itIntegrates('hash bom', async ({ r }) => {
152-
r.log(calculateHash(utf8NoBOM));
153-
r.log(calculateHash(utf8BOM));
154-
r.log(calculateHash(utf16BigEndianBOM));
155-
r.log(calculateHash(utf16LittleEndianBOM));
153+
r.log(await hashBytes(utf8NoBOM));
154+
r.log(await hashBytes(utf8BOM));
155+
r.log(await hashBytes(utf16BigEndianBOM));
156+
r.log(await hashBytes(utf16LittleEndianBOM));
157+
r.assertLog();
158+
});
159+
160+
itIntegrates('hash from file', async ({ r }) => {
161+
createFileTree(testFixturesDir, {
162+
utf8NoBOM,
163+
utf8BOM,
164+
utf16BigEndianBOM,
165+
utf16LittleEndianBOM,
166+
});
167+
168+
r.log(await hashFile(join(testFixturesDir, 'utf8NoBOM')));
169+
r.log(await hashFile(join(testFixturesDir, 'utf8BOM')));
170+
r.log(await hashFile(join(testFixturesDir, 'utf16BigEndianBOM')));
171+
r.log(await hashFile(join(testFixturesDir, 'utf16LittleEndianBOM')));
156172
r.assertLog();
157173
});
158174

@@ -173,7 +189,7 @@ describe('sources', () => {
173189
0x75, 0x72, 0x6E, 0x20, 0x32, 0x35, 0x3B, 0x0D, 0x0A, 0x7D]);
174190

175191
itIntegrates('hash code points', async ({ r }) => {
176-
r.log(calculateHash(multiByteCodePoints));
192+
r.log(await hashBytes(multiByteCodePoints.toString('utf-8')));
177193
r.assertLog();
178194
});
179195
});

0 commit comments

Comments
 (0)