From 814380ffd14927dc9b13b3d277fde3061301ee5b Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Wed, 28 May 2025 17:22:06 +0200 Subject: [PATCH 01/18] init gearhash package --- packages/gearhash/README.md | 3 ++ packages/gearhash/asconfig.json | 22 ++++++++++++++ packages/gearhash/assembly/index.ts | 5 ++++ packages/gearhash/assembly/tsconfig.json | 6 ++++ packages/gearhash/build/.gitignore | 2 ++ packages/gearhash/index.html | 10 +++++++ packages/gearhash/package.json | 25 ++++++++++++++++ packages/gearhash/pnpm-lock.yaml | 38 ++++++++++++++++++++++++ packages/gearhash/tests/index.js | 4 +++ pnpm-workspace.yaml | 1 + 10 files changed, 116 insertions(+) create mode 100644 packages/gearhash/README.md create mode 100644 packages/gearhash/asconfig.json create mode 100644 packages/gearhash/assembly/index.ts create mode 100644 packages/gearhash/assembly/tsconfig.json create mode 100644 packages/gearhash/build/.gitignore create mode 100644 packages/gearhash/index.html create mode 100644 packages/gearhash/package.json create mode 100644 packages/gearhash/pnpm-lock.yaml create mode 100644 packages/gearhash/tests/index.js diff --git a/packages/gearhash/README.md b/packages/gearhash/README.md new file mode 100644 index 0000000000..e6f17fa01f --- /dev/null +++ b/packages/gearhash/README.md @@ -0,0 +1,3 @@ +JS and WASM implementations of https://github.com/srijs/rust-gearhash + +Using [AssemblyScript](https://www.assemblyscript.org/) to generate a lean WASM. \ No newline at end of file diff --git a/packages/gearhash/asconfig.json b/packages/gearhash/asconfig.json new file mode 100644 index 0000000000..8776597856 --- /dev/null +++ b/packages/gearhash/asconfig.json @@ -0,0 +1,22 @@ +{ + "targets": { + "debug": { + "outFile": "build/debug.wasm", + "textFile": "build/debug.wat", + "sourceMap": true, + "debug": true + }, + "release": { + "outFile": "build/release.wasm", + "textFile": "build/release.wat", + "sourceMap": true, + "optimizeLevel": 3, + "shrinkLevel": 0, + "converge": false, + "noAssert": false + } + }, + "options": { + "bindings": "esm" + } +} \ No newline at end of file diff --git a/packages/gearhash/assembly/index.ts b/packages/gearhash/assembly/index.ts new file mode 100644 index 0000000000..66a9aafd88 --- /dev/null +++ b/packages/gearhash/assembly/index.ts @@ -0,0 +1,5 @@ +// The entry file of your WebAssembly module. + +export function add(a: i32, b: i32): i32 { + return a + b; +} diff --git a/packages/gearhash/assembly/tsconfig.json b/packages/gearhash/assembly/tsconfig.json new file mode 100644 index 0000000000..f81c3d55e6 --- /dev/null +++ b/packages/gearhash/assembly/tsconfig.json @@ -0,0 +1,6 @@ +{ + "extends": "../node_modules/.pnpm/assemblyscript@0.27.36/node_modules/assemblyscript/std/assembly.json", + "include": [ + "./**/*.ts" + ] +} \ No newline at end of file diff --git a/packages/gearhash/build/.gitignore b/packages/gearhash/build/.gitignore new file mode 100644 index 0000000000..d6b7ef32c8 --- /dev/null +++ b/packages/gearhash/build/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore diff --git a/packages/gearhash/index.html b/packages/gearhash/index.html new file mode 100644 index 0000000000..c170ddeb9a --- /dev/null +++ b/packages/gearhash/index.html @@ -0,0 +1,10 @@ + + + + + + + diff --git a/packages/gearhash/package.json b/packages/gearhash/package.json new file mode 100644 index 0000000000..c11bf7aeca --- /dev/null +++ b/packages/gearhash/package.json @@ -0,0 +1,25 @@ +{ + "name": "@huggingface/gearhash", + "version": "0.0.1", + "scripts": { + "build": "tsc", + "asbuild:debug": "asc assembly/index.ts --target debug", + "asbuild:release": "asc assembly/index.ts --target release", + "asbuild": "npm run asbuild:debug && npm run asbuild:release", + "test": "node tests", + "start": "npx serve ." + }, + "dependencies": { + "assemblyscript": "^0.27.36" + }, + "type": "module", + "exports": { + ".": { + "import": "./build/release.js", + "types": "./build/release.d.ts" + } + }, + "devDependencies": { + "assemblyscript": "^0.27.36" + } +} \ No newline at end of file diff --git a/packages/gearhash/pnpm-lock.yaml b/packages/gearhash/pnpm-lock.yaml new file mode 100644 index 0000000000..f0edbf2226 --- /dev/null +++ b/packages/gearhash/pnpm-lock.yaml @@ -0,0 +1,38 @@ +lockfileVersion: '9.0' + +settings: + autoInstallPeers: true + excludeLinksFromLockfile: false + +importers: + + .: + dependencies: + assemblyscript: + specifier: ^0.27.36 + version: 0.27.36 + +packages: + + assemblyscript@0.27.36: + resolution: {integrity: sha512-1qX2zf6p7l/mNYv8r21jC/Yft7kX7XKR3xUHw41zvV4xad5lyC8w7jZiwZBGoy64VKZLc+bTDJDWi8Kb70YrHA==} + engines: {node: '>=18', npm: '>=10'} + hasBin: true + + binaryen@116.0.0-nightly.20240114: + resolution: {integrity: sha512-0GZrojJnuhoe+hiwji7QFaL3tBlJoA+KFUN7ouYSDGZLSo9CKM8swQX8n/UcbR0d1VuZKU+nhogNzv423JEu5A==} + hasBin: true + + long@5.3.2: + resolution: {integrity: sha512-mNAgZ1GmyNhD7AuqnTG3/VQ26o760+ZYBPKjPvugO8+nLbYfX6TVpJPseBvopbdY+qpZ/lKUnmEc1LeZYS3QAA==} + +snapshots: + + assemblyscript@0.27.36: + dependencies: + binaryen: 116.0.0-nightly.20240114 + long: 5.3.2 + + binaryen@116.0.0-nightly.20240114: {} + + long@5.3.2: {} diff --git a/packages/gearhash/tests/index.js b/packages/gearhash/tests/index.js new file mode 100644 index 0000000000..769a0b0bff --- /dev/null +++ b/packages/gearhash/tests/index.js @@ -0,0 +1,4 @@ +import assert from "assert"; +import { add } from "../build/debug.js"; +assert.strictEqual(add(1, 2), 3); +console.log("ok"); diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml index 08e651bb73..5d89bd2418 100644 --- a/pnpm-workspace.yaml +++ b/pnpm-workspace.yaml @@ -14,3 +14,4 @@ packages: - "packages/ollama-utils" - "packages/mcp-client" - "packages/tiny-agents" + - "packages/gearhash" From 68b32be01eba3b8162a5897cdd0a0a2bc66f0f10 Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Wed, 28 May 2025 17:35:56 +0200 Subject: [PATCH 02/18] fix build command --- packages/gearhash/package.json | 47 +++++++++++++++++----------------- 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/packages/gearhash/package.json b/packages/gearhash/package.json index c11bf7aeca..ad2419b2e8 100644 --- a/packages/gearhash/package.json +++ b/packages/gearhash/package.json @@ -1,25 +1,24 @@ { - "name": "@huggingface/gearhash", - "version": "0.0.1", - "scripts": { - "build": "tsc", - "asbuild:debug": "asc assembly/index.ts --target debug", - "asbuild:release": "asc assembly/index.ts --target release", - "asbuild": "npm run asbuild:debug && npm run asbuild:release", - "test": "node tests", - "start": "npx serve ." - }, - "dependencies": { - "assemblyscript": "^0.27.36" - }, - "type": "module", - "exports": { - ".": { - "import": "./build/release.js", - "types": "./build/release.d.ts" - } - }, - "devDependencies": { - "assemblyscript": "^0.27.36" - } -} \ No newline at end of file + "name": "@huggingface/gearhash", + "version": "0.0.1", + "scripts": { + "build:debug": "asc assembly/index.ts --target debug", + "build:release": "asc assembly/index.ts --target release", + "build": "npm run build:debug && npm run build:release", + "test": "node tests", + "start": "npx serve ." + }, + "dependencies": { + "assemblyscript": "^0.27.36" + }, + "type": "module", + "exports": { + ".": { + "import": "./build/release.js", + "types": "./build/release.d.ts" + } + }, + "devDependencies": { + "assemblyscript": "^0.27.36" + } +} From 14a9ef41f0cd21b39aebde716bcea2c3f62fbe82 Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Fri, 30 May 2025 15:52:35 +0200 Subject: [PATCH 03/18] Create gearhash function from rust source --- packages/gearhash/assembly/index.ts | 18 ++++++++- packages/gearhash/assembly/table.ts | 57 +++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+), 2 deletions(-) create mode 100644 packages/gearhash/assembly/table.ts diff --git a/packages/gearhash/assembly/index.ts b/packages/gearhash/assembly/index.ts index 66a9aafd88..659e958263 100644 --- a/packages/gearhash/assembly/index.ts +++ b/packages/gearhash/assembly/index.ts @@ -1,5 +1,19 @@ // The entry file of your WebAssembly module. -export function add(a: i32, b: i32): i32 { - return a + b; +import { DEFAULT_TABLE } from "./table"; + +export { DEFAULT_TABLE }; + +// Function to find the next match in the buffer +export function nextMatch(hash: u64, buf: Uint8Array, mask: u64, table: StaticArray = DEFAULT_TABLE): i32 { + for (let i = 0; i < buf.length; i++) { + const b = buf[i]; + hash = (hash << 1) + table[b]; + + if ((hash & mask) == 0) { + return i + 1; + } + } + + return -1; // Return -1 to indicate no match found (equivalent to None in Rust) } diff --git a/packages/gearhash/assembly/table.ts b/packages/gearhash/assembly/table.ts new file mode 100644 index 0000000000..22a9e52df9 --- /dev/null +++ b/packages/gearhash/assembly/table.ts @@ -0,0 +1,57 @@ +/* eslint-disable @typescript-eslint/no-loss-of-precision */ + +// Define the Table type as a static array of u64 values +export const DEFAULT_TABLE: StaticArray = [ + 0xb088d3a9e840f559, 0x5652c7f739ed20d6, 0x45b28969898972ab, 0x6b0a89d5b68ec777, 0x368f573e8b7a31b7, + 0x1dc636dce936d94b, 0x207a4c4e5554d5b6, 0xa474b34628239acb, 0x3b06a83e1ca3b912, 0x90e78d6c2f02baf7, + 0xe1c92df7150d9a8a, 0x8e95053a1086d3ad, 0x5a2ef4f1b83a0722, 0xa50fac949f807fae, 0x0e7303eb80d8d681, + 0x99b07edc1570ad0f, 0x689d2fb555fd3076, 0x00005082119ea468, 0xc4b08306a88fcc28, 0x3eb0678af6374afd, + 0xf19f87ab86ad7436, 0xf2129fbfbe6bc736, 0x481149575c98a4ed, 0x0000010695477bc5, 0x1fba37801a9ceacc, + 0x3bf06fd663a49b6d, 0x99687e9782e3874b, 0x79a10673aa50d8e3, 0xe4accf9e6211f420, 0x2520e71f87579071, + 0x2bd5d3fd781a8a9b, 0x00de4dcddd11c873, 0xeaa9311c5a87392f, 0xdb748eb617bc40ff, 0xaf579a8df620bf6f, + 0x86a6e5da1b09c2b1, 0xcc2fc30ac322a12e, 0x355e2afec1f74267, 0x2d99c8f4c021a47b, 0xbade4b4a9404cfc3, + 0xf7b518721d707d69, 0x3286b6587bf32c20, 0x0000b68886af270c, 0xa115d6e4db8a9079, 0x484f7e9c97b2e199, + 0xccca7bb75713e301, 0xbf2584a62bb0f160, 0xade7e813625dbcc8, 0x000070940d87955a, 0x8ae69108139e626f, + 0xbd776ad72fde38a2, 0xfb6b001fc2fcc0cf, 0xc7a474b8e67bc427, 0xbaf6f11610eb5d58, 0x09cb1f5b6de770d1, + 0xb0b219e6977d4c47, 0x00ccbc386ea7ad4a, 0xcc849d0adf973f01, 0x73a3ef7d016af770, 0xc807d2d386bdbdfe, + 0x7f2ac9966c791730, 0xd037a86bc6c504da, 0xf3f17c661eaa609d, 0xaca626b04daae687, 0x755a99374f4a5b07, + 0x90837ee65b2caede, 0x6ee8ad93fd560785, 0x0000d9e11053edd8, 0x9e063bb2d21cdbd7, 0x07ab77f12a01d2b2, + 0xec550255e6641b44, 0x78fb94a8449c14c6, 0xc7510e1bc6c0f5f5, 0x0000320b36e4cae3, 0x827c33262c8b1a2d, + 0x14675f0b48ea4144, 0x267bd3a6498deceb, 0xf1916ff982f5035e, 0x86221b7ff434fb88, 0x9dbecee7386f49d8, + 0xea58f8cac80f8f4a, 0x008d198692fc64d8, 0x6d38704fbabf9a36, 0xe032cb07d1e7be4c, 0x228d21f6ad450890, + 0x635cb1bfc02589a5, 0x4620a1739ca2ce71, 0xa7e7dfe3aae5fb58, 0x0c10ca932b3c0deb, 0x2727fee884afed7b, + 0xa2df1c6df9e2ab1f, 0x4dcdd1ac0774f523, 0x000070ffad33e24e, 0xa2ace87bc5977816, 0x9892275ab4286049, + 0xc2861181ddf18959, 0xbb9972a042483e19, 0xef70cd3766513078, 0x00000513abfc9864, 0xc058b61858c94083, + 0x09e850859725e0de, 0x9197fb3bf83e7d94, 0x7e1e626d12b64bce, 0x520c54507f7b57d1, 0xbee1797174e22416, + 0x6fd9ac3222e95587, 0x0023957c9adfbf3e, 0xa01c7d7e234bbe15, 0xaba2c758b8a38cbb, 0x0d1fa0ceec3e2b30, + 0x0bb6a58b7e60b991, 0x4333dd5b9fa26635, 0xc2fd3b7d4001c1a3, 0xfb41802454731127, 0x65a56185a50d18cb, + 0xf67a02bd8784b54f, 0x696f11dd67e65063, 0x00002022fca814ab, 0x8cd6be912db9d852, 0x695189b6e9ae8a57, + 0xee9453b50ada0c28, 0xd8fc5ea91a78845e, 0xab86bf191a4aa767, 0x0000c6b5c86415e5, 0x267310178e08a22e, + 0xed2d101b078bca25, 0x3b41ed84b226a8fb, 0x13e622120f28dc06, 0xa315f5ebfb706d26, 0x8816c34e3301bace, + 0xe9395b9cbb71fdae, 0x002ce9202e721648, 0x4283db1d2bb3c91c, 0xd77d461ad2b1a6a5, 0xe2ec17e46eeb866b, + 0xb8e0be4039fbc47c, 0xdea160c4d5299d04, 0x7eec86c8d28c3634, 0x2119ad129f98a399, 0xa6ccf46b61a283ef, + 0x2c52cedef658c617, 0x2db4871169acdd83, 0x0000f0d6f39ecbe9, 0x3dd5d8c98d2f9489, 0x8a1872a22b01f584, + 0xf282a4c40e7b3cf2, 0x8020ec2ccb1ba196, 0x6693b6e09e59e313, 0x0000ce19cc7c83eb, 0x20cb5735f6479c3b, + 0x762ebf3759d75a5b, 0x207bfe823d693975, 0xd77dc112339cd9d5, 0x9ba7834284627d03, 0x217dc513e95f51e9, + 0xb27b1a29fc5e7816, 0x00d5cd9831bb662d, 0x71e39b806d75734c, 0x7e572af006fb1a23, 0xa2734f2f6ae91f85, + 0xbf82c6b5022cddf2, 0x5c3beac60761a0de, 0xcdc893bb47416998, 0x6d1085615c187e01, 0x77f8ae30ac277c5d, + 0x917c6b81122a2c91, 0x5b75b699add16967, 0x0000cf6ae79a069b, 0xf3c40afa60de1104, 0x2063127aa59167c3, + 0x621de62269d1894d, 0xd188ac1de62b4726, 0x107036e2154b673c, 0x0000b85f28553a1d, 0xf2ef4e4c18236f3d, + 0xd9d6de6611b9f602, 0xa1fc7955fb47911c, 0xeb85fd032f298dbd, 0xbe27502fb3befae1, 0xe3034251c4cd661e, + 0x441364d354071836, 0x0082b36c75f2983e, 0xb145910316fa66f0, 0x021c069c9847caf7, 0x2910dfc75a4b5221, + 0x735b353e1c57a8b5, 0xce44312ce98ed96c, 0xbc942e4506bdfa65, 0xf05086a71257941b, 0xfec3b215d351cead, + 0x00ae1055e0144202, 0xf54b40846f42e454, 0x00007fd9c8bcbcc8, 0xbfbd9ef317de9bfe, 0xa804302ff2854e12, + 0x39ce4957a5e5d8d4, 0xffb9e2a45637ba84, 0x55b9ad1d9ea0818b, 0x00008acbf319178a, 0x48e2bfc8d0fbfb38, + 0x8be39841e848b5e8, 0x0e2712160696a08b, 0xd51096e84b44242a, 0x1101ba176792e13a, 0xc22e770f4531689d, + 0x1689eff272bbc56c, 0x00a92a197f5650ec, 0xbc765990bda1784e, 0xc61441e392fcb8ae, 0x07e13a2ced31e4a0, + 0x92cbe984234e9d4d, 0x8f4ff572bb7d8ac5, 0x0b9670c00b963bd0, 0x62955a581a03eb01, 0x645f83e5ea000254, + 0x41fce516cd88f299, 0xbbda9748da7a98cf, 0x0000aab2fe4845fa, 0x19761b069bf56555, 0x8b8f5e8343b6ad56, + 0x3e5d1cfd144821d9, 0xec5c1e2ca2b0cd8f, 0xfaf7e0fea7fbb57f, 0x000000d3ba12961b, 0xda3f90178401b18e, + 0x70ff906de33a5feb, 0x0527d5a7c06970e7, 0x22d8e773607c13e9, 0xc9ab70df643c3bac, 0xeda4c6dc8abe12e3, + 0xecef1f410033e78a, 0x0024c2b274ac72cb, 0x06740d954fa900b4, 0x1d7a299b323d6304, 0xb3c37cb298cbead5, + 0xc986e3c76178739b, 0x9fabea364b46f58a, 0x6da214c5af85cc56, 0x17a43ed8b7a38f84, 0x6eccec511d9adbeb, + 0xf9cab30913335afb, 0x4a5e60c5f415eed2, 0x00006967503672b4, 0x9da51d121454bb87, 0x84321e13b9bbc816, + 0xfb3d6fb6ab2fdd8d, 0x60305eed8e160a8d, 0xcbbf4b14e9946ce8, 0x00004f63381b10c3, 0x07d5b7816fcc4e10, + 0xe5a536726a6a8155, 0x57afb23447a07fdd, 0x18f346f7abc9d394, 0x636dc655d61ad33d, 0xcc8bab4939f7f3f6, + 0x63c7a906c1dd187b, +]; From a2cb917cc88c13d403707f79a47695ae4c2ac24c Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Fri, 30 May 2025 16:37:30 +0200 Subject: [PATCH 04/18] switch to gearhash-wasm package --- .../{gearhash => gearhash-wasm}/README.md | 0 .../{gearhash => gearhash-wasm}/asconfig.json | 0 packages/gearhash-wasm/assembly/index.ts | 2 + packages/gearhash-wasm/assembly/next-match.ts | 17 ++++++ .../assembly/table.ts | 0 .../assembly/tsconfig.json | 0 .../build/.gitignore | 0 .../{gearhash => gearhash-wasm}/index.html | 0 .../{gearhash => gearhash-wasm}/package.json | 2 +- .../pnpm-lock.yaml | 0 packages/gearhash-wasm/tests/index.js | 4 ++ packages/gearhash/assembly/index.ts | 55 ++++++++++++++++++- packages/gearhash/tests/index.js | 4 -- pnpm-workspace.yaml | 2 +- 14 files changed, 78 insertions(+), 8 deletions(-) rename packages/{gearhash => gearhash-wasm}/README.md (100%) rename packages/{gearhash => gearhash-wasm}/asconfig.json (100%) create mode 100644 packages/gearhash-wasm/assembly/index.ts create mode 100644 packages/gearhash-wasm/assembly/next-match.ts rename packages/{gearhash => gearhash-wasm}/assembly/table.ts (100%) rename packages/{gearhash => gearhash-wasm}/assembly/tsconfig.json (100%) rename packages/{gearhash => gearhash-wasm}/build/.gitignore (100%) rename packages/{gearhash => gearhash-wasm}/index.html (100%) rename packages/{gearhash => gearhash-wasm}/package.json (92%) rename packages/{gearhash => gearhash-wasm}/pnpm-lock.yaml (100%) create mode 100644 packages/gearhash-wasm/tests/index.js delete mode 100644 packages/gearhash/tests/index.js diff --git a/packages/gearhash/README.md b/packages/gearhash-wasm/README.md similarity index 100% rename from packages/gearhash/README.md rename to packages/gearhash-wasm/README.md diff --git a/packages/gearhash/asconfig.json b/packages/gearhash-wasm/asconfig.json similarity index 100% rename from packages/gearhash/asconfig.json rename to packages/gearhash-wasm/asconfig.json diff --git a/packages/gearhash-wasm/assembly/index.ts b/packages/gearhash-wasm/assembly/index.ts new file mode 100644 index 0000000000..3b224ae616 --- /dev/null +++ b/packages/gearhash-wasm/assembly/index.ts @@ -0,0 +1,2 @@ +export { DEFAULT_TABLE } from "./table"; +export { nextMatch } from "./next-match"; diff --git a/packages/gearhash-wasm/assembly/next-match.ts b/packages/gearhash-wasm/assembly/next-match.ts new file mode 100644 index 0000000000..17c65b887c --- /dev/null +++ b/packages/gearhash-wasm/assembly/next-match.ts @@ -0,0 +1,17 @@ +// The entry file of your WebAssembly module. + +import { DEFAULT_TABLE } from "./table"; + +// Function to find the next match in the buffer +export function nextMatch(buf: Uint8Array, mask: u64, hash: u64 = 0, table: StaticArray = DEFAULT_TABLE): i32 { + for (let i = 0; i < buf.length; i++) { + const b = buf[i]; + hash = (hash << 1) + table[b]; + + if ((hash & mask) == 0) { + return i + 1; + } + } + + return -1; // Return -1 to indicate no match found (equivalent to None in Rust) +} diff --git a/packages/gearhash/assembly/table.ts b/packages/gearhash-wasm/assembly/table.ts similarity index 100% rename from packages/gearhash/assembly/table.ts rename to packages/gearhash-wasm/assembly/table.ts diff --git a/packages/gearhash/assembly/tsconfig.json b/packages/gearhash-wasm/assembly/tsconfig.json similarity index 100% rename from packages/gearhash/assembly/tsconfig.json rename to packages/gearhash-wasm/assembly/tsconfig.json diff --git a/packages/gearhash/build/.gitignore b/packages/gearhash-wasm/build/.gitignore similarity index 100% rename from packages/gearhash/build/.gitignore rename to packages/gearhash-wasm/build/.gitignore diff --git a/packages/gearhash/index.html b/packages/gearhash-wasm/index.html similarity index 100% rename from packages/gearhash/index.html rename to packages/gearhash-wasm/index.html diff --git a/packages/gearhash/package.json b/packages/gearhash-wasm/package.json similarity index 92% rename from packages/gearhash/package.json rename to packages/gearhash-wasm/package.json index ad2419b2e8..3417394eca 100644 --- a/packages/gearhash/package.json +++ b/packages/gearhash-wasm/package.json @@ -1,5 +1,5 @@ { - "name": "@huggingface/gearhash", + "name": "@huggingface/gearhash-wasms", "version": "0.0.1", "scripts": { "build:debug": "asc assembly/index.ts --target debug", diff --git a/packages/gearhash/pnpm-lock.yaml b/packages/gearhash-wasm/pnpm-lock.yaml similarity index 100% rename from packages/gearhash/pnpm-lock.yaml rename to packages/gearhash-wasm/pnpm-lock.yaml diff --git a/packages/gearhash-wasm/tests/index.js b/packages/gearhash-wasm/tests/index.js new file mode 100644 index 0000000000..c7f06f4c03 --- /dev/null +++ b/packages/gearhash-wasm/tests/index.js @@ -0,0 +1,4 @@ +import assert from "assert"; +import { nextMatch } from "../build/debug.js"; +assert.strictEqual(nextMatch(new Uint8Array([1, 2, 3]), 0xaf2900n), 3); +console.log("ok"); diff --git a/packages/gearhash/assembly/index.ts b/packages/gearhash/assembly/index.ts index 659e958263..1635f22c39 100644 --- a/packages/gearhash/assembly/index.ts +++ b/packages/gearhash/assembly/index.ts @@ -1,8 +1,12 @@ // The entry file of your WebAssembly module. -import { DEFAULT_TABLE } from "./table"; +import type { StaticArray } from "@assemblyscript/runtime"; -export { DEFAULT_TABLE }; +// Define the Table type as a static array of u64 values +export const DEFAULT_TABLE: StaticArray = [ + 0xb088d3a9e840f559, 0x5652c7f739ed20d6, 0x45b28969898972ab, 0x6b0a89d5b68ec777, 0x368f573e8b7a31b7, + // ... existing code ... +]; // Function to find the next match in the buffer export function nextMatch(hash: u64, buf: Uint8Array, mask: u64, table: StaticArray = DEFAULT_TABLE): i32 { @@ -17,3 +21,50 @@ export function nextMatch(hash: u64, buf: Uint8Array, mask: u64, table: StaticAr return -1; // Return -1 to indicate no match found (equivalent to None in Rust) } + +// Hasher class that maintains hash state +export class Hasher { + private hash: u64; + private table: StaticArray; + + constructor(table: StaticArray = DEFAULT_TABLE) { + this.table = table; + this.hash = 0; + } + + // Update the hash state by processing all the bytes in the given slice + update(buf: Uint8Array): void { + for (let i = 0; i < buf.length; i++) { + const b = buf[i]; + this.hash = (this.hash << 1) + this.table[b]; + } + } + + // Match the current hash state against the given mask + isMatch(mask: u64): boolean { + return (this.hash & mask) == 0; + } + + // Process the given byte slice until a match is found for the given mask + nextMatch(buf: Uint8Array, mask: u64): i32 { + for (let i = 0; i < buf.length; i++) { + const b = buf[i]; + this.hash = (this.hash << 1) + this.table[b]; + + if ((this.hash & mask) == 0) { + return i + 1; + } + } + return -1; + } + + // Get the current hash value + getHash(): u64 { + return this.hash; + } + + // Set the hash value to the given integer + setHash(hash: u64): void { + this.hash = hash; + } +} diff --git a/packages/gearhash/tests/index.js b/packages/gearhash/tests/index.js deleted file mode 100644 index 769a0b0bff..0000000000 --- a/packages/gearhash/tests/index.js +++ /dev/null @@ -1,4 +0,0 @@ -import assert from "assert"; -import { add } from "../build/debug.js"; -assert.strictEqual(add(1, 2), 3); -console.log("ok"); diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml index 5d89bd2418..e6016bff8c 100644 --- a/pnpm-workspace.yaml +++ b/pnpm-workspace.yaml @@ -14,4 +14,4 @@ packages: - "packages/ollama-utils" - "packages/mcp-client" - "packages/tiny-agents" - - "packages/gearhash" + - "packages/gearhash-wasm" From 97f3e0bf047d364380551cc353e87a4ddfd9d17f Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Fri, 30 May 2025 17:17:40 +0200 Subject: [PATCH 05/18] v 1.0 --- packages/gearhash-wasm/README.md | 56 ++++++++++++++++++- packages/gearhash-wasm/assembly/next-match.ts | 14 +++-- packages/gearhash-wasm/tests/index.js | 18 +++++- 3 files changed, 82 insertions(+), 6 deletions(-) diff --git a/packages/gearhash-wasm/README.md b/packages/gearhash-wasm/README.md index e6f17fa01f..14942afc2e 100644 --- a/packages/gearhash-wasm/README.md +++ b/packages/gearhash-wasm/README.md @@ -1,3 +1,57 @@ JS and WASM implementations of https://github.com/srijs/rust-gearhash -Using [AssemblyScript](https://www.assemblyscript.org/) to generate a lean WASM. \ No newline at end of file +Using [AssemblyScript](https://www.assemblyscript.org/) to generate a lean WASM. + +## Usage + +```javascript +import { nextMatch } from '@huggingface/gearhash-wasm'; + +// Create a Uint8Array of data to search through +const data = new Uint8Array(1000000); // Example: 1MB of data +// ... fill data with your content ... + +// Search for a pattern with a specific mask +const mask = 0x0000d90003530000n; // Example mask as a BigInt +const matchResult = nextMatch(data, mask); + +// matchIndex will be the position where the pattern was found +// or -1 if no match was found +``` + +The `nextMatch` function takes two parameters: +- `data`: A Uint8Array containing the data to search through +- `mask`: A BigInt representing the pattern mask to search for + +The function returns an object with the `position` (i32) and `hash` (u64) properties + +You can continuously feed data like this: + +```javascript +let hash = 0n; +const mask = 0x0000d90003530000n; + +let position = 0; +for await (const chunk of dataSource) { + let index = 0; + while (1) { + let match = nextMatch(chunk.subArray(index), mask, hash); + + if (match.position !== -1) { + console.log({ + position: match.position + position, + hash: match.hash + }) + + index += match.position; + position = 0; + hash = 0n; + } else { + position += chunk.length - index; + break; + } + } +} + +console.log(position, "bytes without a match, ending hash: ", hash); +``` \ No newline at end of file diff --git a/packages/gearhash-wasm/assembly/next-match.ts b/packages/gearhash-wasm/assembly/next-match.ts index 17c65b887c..180d11a6a4 100644 --- a/packages/gearhash-wasm/assembly/next-match.ts +++ b/packages/gearhash-wasm/assembly/next-match.ts @@ -2,16 +2,22 @@ import { DEFAULT_TABLE } from "./table"; +// Interface for the match result +export class MatchResult { + position: i32 = -1; + hash: u64 = 0; +} + // Function to find the next match in the buffer -export function nextMatch(buf: Uint8Array, mask: u64, hash: u64 = 0, table: StaticArray = DEFAULT_TABLE): i32 { +export function nextMatch(buf: Uint8Array, mask: u64, hash: u64 = 0): MatchResult { for (let i = 0; i < buf.length; i++) { const b = buf[i]; - hash = (hash << 1) + table[b]; + hash = (hash << 1) + DEFAULT_TABLE[b]; if ((hash & mask) == 0) { - return i + 1; + return { position: i + 1, hash }; } } - return -1; // Return -1 to indicate no match found (equivalent to None in Rust) + return { position: -1, hash }; // Return -1 position to indicate no match found, along with the final hash } diff --git a/packages/gearhash-wasm/tests/index.js b/packages/gearhash-wasm/tests/index.js index c7f06f4c03..1b962c543c 100644 --- a/packages/gearhash-wasm/tests/index.js +++ b/packages/gearhash-wasm/tests/index.js @@ -1,4 +1,20 @@ import assert from "assert"; import { nextMatch } from "../build/debug.js"; -assert.strictEqual(nextMatch(new Uint8Array([1, 2, 3]), 0xaf2900n), 3); + +// Simple seeded random number generator +function seededRandom(seed) { + return function () { + seed = (seed * 16807) % 2147483647; + return (seed - 1) / 2147483646; + }; +} + +// Create seeded random data +const seed = 12345; // Fixed seed for deterministic results +const random = seededRandom(seed); +const randomData = new Uint8Array(1000000).map(() => Math.floor(random() * 256)); + +// Test with a known mask +assert.deepStrictEqual(nextMatch(randomData, 0xaf2900n), { position: 128, hash: 11757411513747408525n }); +assert.deepStrictEqual(nextMatch(randomData.subarray(128), 0xaf2900n), { position: 184, hash: 7438883163016807155n }); console.log("ok"); From 512801f4f33fe95e13d87b67170651a07849a564 Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Fri, 30 May 2025 17:20:40 +0200 Subject: [PATCH 06/18] remove extra file --- packages/gearhash/assembly/index.ts | 70 ----------------------------- 1 file changed, 70 deletions(-) delete mode 100644 packages/gearhash/assembly/index.ts diff --git a/packages/gearhash/assembly/index.ts b/packages/gearhash/assembly/index.ts deleted file mode 100644 index 1635f22c39..0000000000 --- a/packages/gearhash/assembly/index.ts +++ /dev/null @@ -1,70 +0,0 @@ -// The entry file of your WebAssembly module. - -import type { StaticArray } from "@assemblyscript/runtime"; - -// Define the Table type as a static array of u64 values -export const DEFAULT_TABLE: StaticArray = [ - 0xb088d3a9e840f559, 0x5652c7f739ed20d6, 0x45b28969898972ab, 0x6b0a89d5b68ec777, 0x368f573e8b7a31b7, - // ... existing code ... -]; - -// Function to find the next match in the buffer -export function nextMatch(hash: u64, buf: Uint8Array, mask: u64, table: StaticArray = DEFAULT_TABLE): i32 { - for (let i = 0; i < buf.length; i++) { - const b = buf[i]; - hash = (hash << 1) + table[b]; - - if ((hash & mask) == 0) { - return i + 1; - } - } - - return -1; // Return -1 to indicate no match found (equivalent to None in Rust) -} - -// Hasher class that maintains hash state -export class Hasher { - private hash: u64; - private table: StaticArray; - - constructor(table: StaticArray = DEFAULT_TABLE) { - this.table = table; - this.hash = 0; - } - - // Update the hash state by processing all the bytes in the given slice - update(buf: Uint8Array): void { - for (let i = 0; i < buf.length; i++) { - const b = buf[i]; - this.hash = (this.hash << 1) + this.table[b]; - } - } - - // Match the current hash state against the given mask - isMatch(mask: u64): boolean { - return (this.hash & mask) == 0; - } - - // Process the given byte slice until a match is found for the given mask - nextMatch(buf: Uint8Array, mask: u64): i32 { - for (let i = 0; i < buf.length; i++) { - const b = buf[i]; - this.hash = (this.hash << 1) + this.table[b]; - - if ((this.hash & mask) == 0) { - return i + 1; - } - } - return -1; - } - - // Get the current hash value - getHash(): u64 { - return this.hash; - } - - // Set the hash value to the given integer - setHash(hash: u64): void { - this.hash = hash; - } -} From 07a384d278075b5807abea05893ee3ba09533668 Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Fri, 30 May 2025 17:23:01 +0200 Subject: [PATCH 07/18] bigger mask --- packages/gearhash-wasm/tests/index.js | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/packages/gearhash-wasm/tests/index.js b/packages/gearhash-wasm/tests/index.js index 1b962c543c..5f70c7ca7c 100644 --- a/packages/gearhash-wasm/tests/index.js +++ b/packages/gearhash-wasm/tests/index.js @@ -15,6 +15,9 @@ const random = seededRandom(seed); const randomData = new Uint8Array(1000000).map(() => Math.floor(random() * 256)); // Test with a known mask -assert.deepStrictEqual(nextMatch(randomData, 0xaf2900n), { position: 128, hash: 11757411513747408525n }); -assert.deepStrictEqual(nextMatch(randomData.subarray(128), 0xaf2900n), { position: 184, hash: 7438883163016807155n }); +assert.deepStrictEqual(nextMatch(randomData, 0x0000d90003530000n), { position: 459, hash: 9546224108073667431n }); +assert.deepStrictEqual(nextMatch(randomData.subarray(128), 0x0000d90003530000n), { + position: 331, + hash: 9546224108073667431n, +}); console.log("ok"); From 3306f2c22a542313b83acbfb6689e1f1a57e2614 Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Fri, 30 May 2025 17:49:12 +0200 Subject: [PATCH 08/18] add nextMatches function --- packages/gearhash-wasm/README.md | 47 ++++++-- packages/gearhash-wasm/assembly/index.ts | 2 +- packages/gearhash-wasm/assembly/next-match.ts | 23 ++++ packages/gearhash-wasm/tests/index.js | 104 +++++++++++++++++- 4 files changed, 160 insertions(+), 16 deletions(-) diff --git a/packages/gearhash-wasm/README.md b/packages/gearhash-wasm/README.md index 14942afc2e..cf72dafbd3 100644 --- a/packages/gearhash-wasm/README.md +++ b/packages/gearhash-wasm/README.md @@ -13,10 +13,8 @@ const data = new Uint8Array(1000000); // Example: 1MB of data // Search for a pattern with a specific mask const mask = 0x0000d90003530000n; // Example mask as a BigInt -const matchResult = nextMatch(data, mask); - -// matchIndex will be the position where the pattern was found -// or -1 if no match was found +const match = nextMatch(data, mask); +const allMatches = nextMatches(data, mask).matches; ``` The `nextMatch` function takes two parameters: @@ -31,7 +29,7 @@ You can continuously feed data like this: let hash = 0n; const mask = 0x0000d90003530000n; -let position = 0; +let length = 0; // extra length not processed for await (const chunk of dataSource) { let index = 0; while (1) { @@ -39,19 +37,48 @@ for await (const chunk of dataSource) { if (match.position !== -1) { console.log({ - position: match.position + position, + length: match.position + length, hash: match.hash }) index += match.position; - position = 0; + length = 0; hash = 0n; } else { - position += chunk.length - index; + length += chunk.length - index; break; } } } -console.log(position, "bytes without a match, ending hash: ", hash); -``` \ No newline at end of file +console.log(length, "bytes without a match, ending hash: ", hash); +``` + +or, more performant with `nextMatches`: + +```javascript +let hash = 0n; +const mask = 0x0000d90003530000n; + +let length = 0; +for await (const chunk of dataSource) { + const result = nextMatches(chunk, mask, hash); + let lastPosition = 0; + for (const match of result.matches) { + console.log({ + length: match.position - lastPosition + length, + hash: match.hash + }); + + length = 0; + lastPosition = match.position; + } + length = result.remaining; + hash = result.hash; +} + +console.log(length, "bytes without a match, ending hash: ", hash); +``` + +## Possible improvements + diff --git a/packages/gearhash-wasm/assembly/index.ts b/packages/gearhash-wasm/assembly/index.ts index 3b224ae616..447e7776f7 100644 --- a/packages/gearhash-wasm/assembly/index.ts +++ b/packages/gearhash-wasm/assembly/index.ts @@ -1,2 +1,2 @@ export { DEFAULT_TABLE } from "./table"; -export { nextMatch } from "./next-match"; +export { nextMatch, nextMatches } from "./next-match"; diff --git a/packages/gearhash-wasm/assembly/next-match.ts b/packages/gearhash-wasm/assembly/next-match.ts index 180d11a6a4..1093f77a80 100644 --- a/packages/gearhash-wasm/assembly/next-match.ts +++ b/packages/gearhash-wasm/assembly/next-match.ts @@ -21,3 +21,26 @@ export function nextMatch(buf: Uint8Array, mask: u64, hash: u64 = 0): MatchResul return { position: -1, hash }; // Return -1 position to indicate no match found, along with the final hash } + +export class NextMatchesResult { + matches: MatchResult[] = []; + hash: u64 = 0; + remaining: i32 = 0; +} + +export function nextMatches(buf: Uint8Array, mask: u64, hash: u64 = 0): NextMatchesResult { + const result = new NextMatchesResult(); + + let match = nextMatch(buf, mask, hash); + let position = 0; + while (match.position !== -1) { + result.matches.push(match); + position += match.position; + match = nextMatch(buf.subarray(position), mask, 0); + } + + result.remaining = buf.length - position; + result.hash = match.hash; + + return result; +} diff --git a/packages/gearhash-wasm/tests/index.js b/packages/gearhash-wasm/tests/index.js index 5f70c7ca7c..d3d220da00 100644 --- a/packages/gearhash-wasm/tests/index.js +++ b/packages/gearhash-wasm/tests/index.js @@ -1,5 +1,5 @@ import assert from "assert"; -import { nextMatch } from "../build/debug.js"; +import { nextMatch, nextMatches } from "../build/debug.js"; // Simple seeded random number generator function seededRandom(seed) { @@ -12,12 +12,106 @@ function seededRandom(seed) { // Create seeded random data const seed = 12345; // Fixed seed for deterministic results const random = seededRandom(seed); -const randomData = new Uint8Array(1000000).map(() => Math.floor(random() * 256)); +const randomData = new Uint8Array(150_000).map(() => Math.floor(random() * 256)); // Test with a known mask assert.deepStrictEqual(nextMatch(randomData, 0x0000d90003530000n), { position: 459, hash: 9546224108073667431n }); -assert.deepStrictEqual(nextMatch(randomData.subarray(128), 0x0000d90003530000n), { - position: 331, - hash: 9546224108073667431n, +assert.deepStrictEqual(nextMatch(randomData.subarray(459), 0x0000d90003530000n), { + position: 3658, + hash: 4043712133052525799n, }); + +assert.deepStrictEqual(nextMatches(randomData, 0x0000d90003530000n), { + remaining: 1206, + hash: 18262966296195680063n, + matches: [ + { position: 459, hash: 9546224108073667431n }, + { position: 3658, hash: 4043712133052525799n }, + { position: 2013, hash: 6111702085179831561n }, + { position: 1593, hash: 12901166541873917249n }, + { position: 1566, hash: 7692186462913612151n }, + { position: 211, hash: 16543980755458487441n }, + { position: 1778, hash: 15644384556715661587n }, + { position: 566, hash: 9793366463237592247n }, + { position: 2079, hash: 11221321116171663064n }, + { position: 2940, hash: 1564726223525919786n }, + { position: 809, hash: 15395839328876515337n }, + { position: 946, hash: 10585747199093122759n }, + { position: 854, hash: 4479393852251501569n }, + { position: 436, hash: 15702966577303948694n }, + { position: 2165, hash: 17148900940125069205n }, + { position: 273, hash: 11505890591385615424n }, + { position: 1459, hash: 10774060112464860369n }, + { position: 158, hash: 2233823235057951370n }, + { position: 7, hash: 1983310208686139647n }, + { position: 1926, hash: 4499661659570185271n }, + { position: 1529, hash: 16090517590946392505n }, + { position: 1751, hash: 12536054222087023458n }, + { position: 1222, hash: 334146166487300408n }, + { position: 2230, hash: 6981431015531396608n }, + { position: 826, hash: 11877997991061156988n }, + { position: 33, hash: 8454422284689001989n }, + { position: 1731, hash: 15095819886766624527n }, + { position: 8842, hash: 6362744947164356842n }, + { position: 928, hash: 3627691864743766239n }, + { position: 684, hash: 1137480049753900759n }, + { position: 5301, hash: 10541554813326859395n }, + { position: 2546, hash: 14704288147532701373n }, + { position: 11856, hash: 9653226176528805511n }, + { position: 650, hash: 12714262162290274678n }, + { position: 1346, hash: 2525679969999819421n }, + { position: 353, hash: 2532749299807420736n }, + { position: 1091, hash: 693561665209300041n }, + { position: 729, hash: 11014435606385442344n }, + { position: 1204, hash: 10083883741570968570n }, + { position: 1671, hash: 12308901096302322810n }, + { position: 1362, hash: 13399339535394154305n }, + { position: 1858, hash: 792389713896955383n }, + { position: 2248, hash: 15568664728418446816n }, + { position: 1790, hash: 4328805983976714464n }, + { position: 634, hash: 722305044694988273n }, + { position: 741, hash: 17978970776495983968n }, + { position: 901, hash: 5911861036065769110n }, + { position: 302, hash: 1334790489764850513n }, + { position: 1435, hash: 16174119877357924758n }, + { position: 61, hash: 12103430617785210167n }, + { position: 1, hash: 35334639850667n }, + { position: 2074, hash: 7449519750512442798n }, + { position: 2061, hash: 1805950971475184864n }, + { position: 1612, hash: 5837797879339327135n }, + { position: 3281, hash: 6649572008787195357n }, + { position: 39, hash: 16137242368496690753n }, + { position: 263, hash: 8133543763164586431n }, + { position: 2333, hash: 17019949823094703325n }, + { position: 1160, hash: 8949503946391874147n }, + { position: 641, hash: 18344573417262448121n }, + { position: 2588, hash: 13345294745157777411n }, + { position: 3116, hash: 7832639641689314418n }, + { position: 4671, hash: 13762161036402935807n }, + { position: 276, hash: 10924644382434953404n }, + { position: 4430, hash: 9045519457622973922n }, + { position: 32, hash: 4188636638659752674n }, + { position: 2470, hash: 1184167847892138852n }, + { position: 694, hash: 11699508361075635892n }, + { position: 1703, hash: 9012268790677532920n }, + { position: 47, hash: 6528251874505412319n }, + { position: 2672, hash: 8484789019946020371n }, + { position: 202, hash: 1365160724288031760n }, + { position: 467, hash: 10426152000837661087n }, + { position: 496, hash: 3605417399306471847n }, + { position: 3777, hash: 8410473338876477323n }, + { position: 80, hash: 3693273711429567121n }, + { position: 813, hash: 9224216742837123228n }, + { position: 3115, hash: 5150752707627454542n }, + { position: 806, hash: 8797260981186887018n }, + { position: 4915, hash: 1483374079741560715n }, + { position: 2118, hash: 1742900153494554703n }, + { position: 1515, hash: 4635371751468227093n }, + { position: 2393, hash: 15282968615371427111n }, + { position: 4331, hash: 4659818917792066036n }, + { position: 1188, hash: 3862441883651577693n }, + { position: 2663, hash: 8524789558855117254n }, + ], +}); + console.log("ok"); From 12f9e97c89ad3bdf261b5c030ceb8ce3a8647254 Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Wed, 4 Jun 2025 12:31:25 +0200 Subject: [PATCH 09/18] (wip) xet chunk code generated by cursor to fix --- .../gearhash-wasm/assembly/xet-chunker.ts | 172 ++++++++++++++++++ 1 file changed, 172 insertions(+) create mode 100644 packages/gearhash-wasm/assembly/xet-chunker.ts diff --git a/packages/gearhash-wasm/assembly/xet-chunker.ts b/packages/gearhash-wasm/assembly/xet-chunker.ts new file mode 100644 index 0000000000..2e501cc815 --- /dev/null +++ b/packages/gearhash-wasm/assembly/xet-chunker.ts @@ -0,0 +1,172 @@ +import { nextMatch } from "./next-match"; + +// Constants +const TARGET_CHUNK_SIZE: usize = 64 * 1024; // 64KB +const MINIMUM_CHUNK_DIVISOR: usize = 8; +const MAXIMUM_CHUNK_MULTIPLIER: usize = 2; +const HASH_WINDOW_SIZE: usize = 64; + +export class Chunk { + hash: Uint8Array; + data: Uint8Array; + + constructor(hash: Uint8Array, data: Uint8Array) { + this.hash = hash; + this.data = data; + } +} + +// Type for the next() method return value +export class NextResult { + chunk: Chunk | null; + bytesConsumed: usize; + + constructor(chunk: Chunk | null, bytesConsumed: usize) { + this.chunk = chunk; + this.bytesConsumed = bytesConsumed; + } +} + +export class XetChunker { + private minimumChunk: usize; + private maximumChunk: usize; + private mask: u64; + private chunkBuf: Uint8Array; + private curChunkLen: usize; + private hash: u64; + + constructor(targetChunkSize: usize = TARGET_CHUNK_SIZE) { + // Validate target chunk size is a power of 2 + assert((targetChunkSize & (targetChunkSize - 1)) == 0, "Target chunk size must be a power of 2"); + assert(targetChunkSize > HASH_WINDOW_SIZE, "Target chunk size must be greater than hash window size"); + assert(targetChunkSize < u32.MAX_VALUE, "Target chunk size must be less than u32.MAX_VALUE"); + + let mask = (targetChunkSize - 1) as u64; + // Shift mask left by leading zeros count + mask = mask << (64 - clz(mask)); + + this.minimumChunk = targetChunkSize / MINIMUM_CHUNK_DIVISOR; + this.maximumChunk = targetChunkSize * MAXIMUM_CHUNK_MULTIPLIER; + this.mask = mask; + this.chunkBuf = new Uint8Array(this.maximumChunk); + this.curChunkLen = 0; + this.hash = 0; + } + + next(data: Uint8Array, isFinal: boolean): NextResult { + const nBytes = data.length; + let createChunk = false; + let consumeLen: usize = 0; + + if (nBytes != 0) { + // Skip minimum chunk size + if (this.curChunkLen + HASH_WINDOW_SIZE < this.minimumChunk) { + const maxAdvance = min(this.minimumChunk - this.curChunkLen - HASH_WINDOW_SIZE - 1, nBytes - consumeLen); + consumeLen += maxAdvance; + this.curChunkLen += maxAdvance; + } + + // Calculate read end + const readEnd = min(nBytes, consumeLen + this.maximumChunk - this.curChunkLen); + + let bytesToNextBoundary: usize; + const matchResult = nextMatch(data.subarray(consumeLen, readEnd), this.mask, this.hash); + + if (matchResult.position != -1) { + bytesToNextBoundary = matchResult.position; + createChunk = true; + this.hash = matchResult.hash; + } else { + bytesToNextBoundary = readEnd - consumeLen; + this.hash = matchResult.hash; + } + + // Check if we hit maximum chunk + if (bytesToNextBoundary + this.curChunkLen >= this.maximumChunk) { + bytesToNextBoundary = this.maximumChunk - this.curChunkLen; + createChunk = true; + } + + this.curChunkLen += bytesToNextBoundary; + consumeLen += bytesToNextBoundary; + + // Copy data to chunk buffer + this.chunkBuf.set(data.subarray(0, consumeLen), this.curChunkLen - consumeLen); + } + + if (createChunk || (isFinal && this.curChunkLen > 0)) { + const chunkData = this.chunkBuf.subarray(0, this.curChunkLen); + const chunk = new Chunk(computeDataHash(chunkData), chunkData); + this.curChunkLen = 0; + this.hash = 0; + return new NextResult(chunk, consumeLen); + } + + return new NextResult(null, consumeLen); + } + + nextBlock(data: Uint8Array, isFinal: boolean): Chunk[] { + const chunks: Chunk[] = []; + let pos: usize = 0; + + while (pos < data.length) { + const result = this.next(data.subarray(pos), isFinal); + if (result.chunk) { + chunks.push(result.chunk); + } + pos += result.bytesConsumed; + } + + return chunks; + } + + finish(): Chunk | null { + return this.next(new Uint8Array(0), true).chunk; + } +} + +// Simple SHA-256 implementation for data hashing +function computeDataHash(data: Uint8Array): Uint8Array { + // TODO: Replace with actual SHA-256 implementation + // For now, using a simple hash function for demonstration + const hash = new Uint8Array(32); + for (let i = 0; i < data.length; i++) { + hash[i % 32] ^= data[i]; + } + return hash; +} + +// Helper function to find minimum of two numbers +function min(a: usize, b: usize): usize { + return a < b ? a : b; +} + +// Helper function to count leading zeros +function clz(x: u64): u32 { + let n: u32 = 0; + if (x == 0) return 64; + if ((x & 0xffffffff00000000) == 0) { + n += 32; + x <<= 32; + } + if ((x & 0xffff000000000000) == 0) { + n += 16; + x <<= 16; + } + if ((x & 0xff00000000000000) == 0) { + n += 8; + x <<= 8; + } + if ((x & 0xf000000000000000) == 0) { + n += 4; + x <<= 4; + } + if ((x & 0xc000000000000000) == 0) { + n += 2; + x <<= 2; + } + if ((x & 0x8000000000000000) == 0) { + n += 1; + } + return n; +} From 36348feb5c5d9bdc9974bf6da3bd5ad6bec28919 Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Wed, 4 Jun 2025 15:11:45 +0200 Subject: [PATCH 10/18] add blake3-wasm and xetchunk-wasm --- packages/blake3-wasm/assembly/blake3.ts | 361 ++++++++++++++++++ packages/blake3-wasm/assembly/index.ts | 2 + packages/blake3-wasm/assembly/tsconfig.json | 4 + packages/blake3-wasm/package.json | 23 ++ packages/blake3-wasm/pnpm-lock.yaml | 38 ++ packages/gearhash-wasm/assembly/blake3.ts | 357 +++++++++++++++++ packages/gearhash-wasm/package.json | 11 +- packages/gearhash-wasm/pnpm-lock.yaml | 6 + packages/xetchunk-wasm/assembly/index.ts | 2 + packages/xetchunk-wasm/assembly/next-match.ts | 28 ++ packages/xetchunk-wasm/assembly/tsconfig.json | 4 + .../assembly/xet-chunker.ts | 10 +- packages/xetchunk-wasm/package.json | 23 ++ packages/xetchunk-wasm/pnpm-lock.yaml | 42 ++ pnpm-workspace.yaml | 2 + tsconfig.json | 13 + 16 files changed, 918 insertions(+), 8 deletions(-) create mode 100644 packages/blake3-wasm/assembly/blake3.ts create mode 100644 packages/blake3-wasm/assembly/index.ts create mode 100644 packages/blake3-wasm/assembly/tsconfig.json create mode 100644 packages/blake3-wasm/package.json create mode 100644 packages/blake3-wasm/pnpm-lock.yaml create mode 100644 packages/gearhash-wasm/assembly/blake3.ts create mode 100644 packages/xetchunk-wasm/assembly/index.ts create mode 100644 packages/xetchunk-wasm/assembly/next-match.ts create mode 100644 packages/xetchunk-wasm/assembly/tsconfig.json rename packages/{gearhash-wasm => xetchunk-wasm}/assembly/xet-chunker.ts (94%) create mode 100644 packages/xetchunk-wasm/package.json create mode 100644 packages/xetchunk-wasm/pnpm-lock.yaml create mode 100644 tsconfig.json diff --git a/packages/blake3-wasm/assembly/blake3.ts b/packages/blake3-wasm/assembly/blake3.ts new file mode 100644 index 0000000000..46c19b4db3 --- /dev/null +++ b/packages/blake3-wasm/assembly/blake3.ts @@ -0,0 +1,361 @@ +// Import AssemblyScript types +import type { usize, u32, u8, u64 } from "assemblyscript"; +import { StaticArray } from "assemblyscript"; + +// Constants from the reference implementation +const OUT_LEN: usize = 32; +// const KEY_LEN: usize = 32; +const BLOCK_LEN: usize = 64; +const CHUNK_LEN: usize = 1024; + +const CHUNK_START: u32 = 1 << 0; +const CHUNK_END: u32 = 1 << 1; +const PARENT: u32 = 1 << 2; +const ROOT: u32 = 1 << 3; +//const KEYED_HASH: u32 = 1 << 4; +//const DERIVE_KEY_CONTEXT: u32 = 1 << 5; +// const DERIVE_KEY_MATERIAL: u32 = 1 << 6; + +const IV: StaticArray = [ + 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19, +]; + +const MSG_PERMUTATION: StaticArray = [2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8]; + +// The mixing function, G, which mixes either a column or a diagonal. +function g(state: StaticArray, a: usize, b: usize, c: usize, d: usize, mx: u32, my: u32): void { + state[a] = state[a] + state[b] + mx; + state[d] = rotl32(state[d] ^ state[a], 16); + state[c] = state[c] + state[d]; + state[b] = rotl32(state[b] ^ state[c], 12); + state[a] = state[a] + state[b] + my; + state[d] = rotl32(state[d] ^ state[a], 8); + state[c] = state[c] + state[d]; + state[b] = rotl32(state[b] ^ state[c], 7); +} + +// Rotate left by n bits +function rotl32(x: u32, n: u32): u32 { + return (x << n) | (x >>> (32 - n)); +} + +function round(state: StaticArray, m: StaticArray): void { + // Mix the columns. + g(state, 0, 4, 8, 12, m[0], m[1]); + g(state, 1, 5, 9, 13, m[2], m[3]); + g(state, 2, 6, 10, 14, m[4], m[5]); + g(state, 3, 7, 11, 15, m[6], m[7]); + // Mix the diagonals. + g(state, 0, 5, 10, 15, m[8], m[9]); + g(state, 1, 6, 11, 12, m[10], m[11]); + g(state, 2, 7, 8, 13, m[12], m[13]); + g(state, 3, 4, 9, 14, m[14], m[15]); +} + +function permute(m: StaticArray): void { + const permuted = new StaticArray(16); + for (let i = 0; i < 16; i++) { + permuted[i] = m[MSG_PERMUTATION[i]]; + } + for (let i = 0; i < 16; i++) { + m[i] = permuted[i]; + } +} + +function compress( + chaining_value: StaticArray, + block_words: StaticArray, + counter: u64, + block_len: u32, + flags: u32 +): StaticArray { + const counter_low = counter as u32; + const counter_high = (counter >> 32) as u32; + const state = new StaticArray(16); + + // Initialize state + for (let i = 0; i < 8; i++) { + state[i] = chaining_value[i]; + state[i + 8] = IV[i]; + } + state[12] = counter_low; + state[13] = counter_high; + state[14] = block_len; + state[15] = flags; + + const block = new StaticArray(16); + for (let i = 0; i < 16; i++) { + block[i] = block_words[i]; + } + + // Apply rounds + round(state, block); + permute(block); + round(state, block); + permute(block); + round(state, block); + permute(block); + round(state, block); + permute(block); + round(state, block); + permute(block); + round(state, block); + permute(block); + round(state, block); + + // Final mixing + for (let i = 0; i < 8; i++) { + state[i] ^= state[i + 8]; + state[i + 8] ^= chaining_value[i]; + } + + return state; +} + +function words_from_little_endian_bytes(bytes: Uint8Array, words: StaticArray): void { + for (let i = 0; i < words.length; i++) { + const offset = i * 4; + words[i] = bytes[offset] | (bytes[offset + 1] << 8) | (bytes[offset + 2] << 16) | (bytes[offset + 3] << 24); + } +} + +export class Blake3Hasher { + private chunk_state: ChunkState; + private key_words: StaticArray; + private cv_stack: StaticArray>; + private cv_stack_len: u8; + private flags: u32; + + constructor() { + this.key_words = new StaticArray(8); + for (let i = 0; i < 8; i++) { + this.key_words[i] = IV[i]; + } + this.chunk_state = new ChunkState(this.key_words, 0, 0); + this.cv_stack = new StaticArray>(54); + for (let i = 0; i < 54; i++) { + this.cv_stack[i] = new StaticArray(8); + } + this.cv_stack_len = 0; + this.flags = 0; + } + + update(input: Uint8Array): void { + let inputPos = 0; + while (inputPos < input.length) { + if (this.chunk_state.len() == CHUNK_LEN) { + const chunk_cv = this.chunk_state.output().chaining_value(); + const total_chunks = this.chunk_state.chunk_counter + 1; + this.add_chunk_chaining_value(chunk_cv, total_chunks); + this.chunk_state = new ChunkState(this.key_words, total_chunks, this.flags); + } + + const want = CHUNK_LEN - this.chunk_state.len(); + const take = min(want, input.length - inputPos); + this.chunk_state.update(input.subarray(inputPos, inputPos + take)); + inputPos += take; + } + } + + finalize(out: Uint8Array): void { + let output = this.chunk_state.output(); + let parent_nodes_remaining = this.cv_stack_len; + + while (parent_nodes_remaining > 0) { + parent_nodes_remaining--; + output = parent_output( + this.cv_stack[parent_nodes_remaining], + output.chaining_value(), + this.key_words, + this.flags + ); + } + + output.root_output_bytes(out); + } + + private add_chunk_chaining_value(new_cv: StaticArray, total_chunks: u64): void { + let mut_new_cv = new_cv; + let mut_total_chunks = total_chunks; + + while ((mut_total_chunks & 1) == 0) { + mut_new_cv = parent_cv(this.pop_stack(), mut_new_cv, this.key_words, this.flags); + mut_total_chunks >>= 1; + } + + this.push_stack(mut_new_cv); + } + + private push_stack(cv: StaticArray): void { + for (let i = 0; i < 8; i++) { + this.cv_stack[this.cv_stack_len][i] = cv[i]; + } + this.cv_stack_len++; + } + + private pop_stack(): StaticArray { + this.cv_stack_len--; + return this.cv_stack[this.cv_stack_len]; + } +} + +class ChunkState { + chaining_value: StaticArray; + chunk_counter: u64; + block: Uint8Array; + block_len: u8; + blocks_compressed: u8; + flags: u32; + + constructor(key_words: StaticArray, chunk_counter: u64, flags: u32) { + this.chaining_value = new StaticArray(8); + for (let i = 0; i < 8; i++) { + this.chaining_value[i] = key_words[i]; + } + this.chunk_counter = chunk_counter; + this.block = new Uint8Array(BLOCK_LEN); + this.block_len = 0; + this.blocks_compressed = 0; + this.flags = flags; + } + + len(): usize { + return BLOCK_LEN * this.blocks_compressed + this.block_len; + } + + start_flag(): u32 { + return this.blocks_compressed == 0 ? CHUNK_START : 0; + } + + update(input: Uint8Array): void { + let inputPos = 0; + while (inputPos < input.length) { + if (this.block_len == BLOCK_LEN) { + const block_words = new StaticArray(16); + words_from_little_endian_bytes(this.block, block_words); + const compressed = compress( + this.chaining_value, + block_words, + this.chunk_counter, + BLOCK_LEN, + this.flags | this.start_flag() + ); + for (let i = 0; i < 8; i++) { + this.chaining_value[i] = compressed[i]; + } + this.blocks_compressed++; + this.block = new Uint8Array(BLOCK_LEN); + this.block_len = 0; + } + + const want = BLOCK_LEN - this.block_len; + const take = min(want, input.length - inputPos); + for (let i = 0; i < take; i++) { + this.block[this.block_len + i] = input[inputPos + i]; + } + this.block_len += take; + inputPos += take; + } + } + + output(): Output { + const block_words = new StaticArray(16); + words_from_little_endian_bytes(this.block, block_words); + return new Output( + this.chaining_value, + block_words, + this.chunk_counter, + this.block_len, + this.flags | this.start_flag() | CHUNK_END + ); + } +} + +class Output { + input_chaining_value: StaticArray; + block_words: StaticArray; + counter: u64; + block_len: u32; + flags: u32; + + constructor( + input_chaining_value: StaticArray, + block_words: StaticArray, + counter: u64, + block_len: u32, + flags: u32 + ) { + this.input_chaining_value = input_chaining_value; + this.block_words = block_words; + this.counter = counter; + this.block_len = block_len; + this.flags = flags; + } + + chaining_value(): StaticArray { + const compressed = compress(this.input_chaining_value, this.block_words, this.counter, this.block_len, this.flags); + const result = new StaticArray(8); + for (let i = 0; i < 8; i++) { + result[i] = compressed[i]; + } + return result; + } + + root_output_bytes(out: Uint8Array): void { + let output_block_counter: u64 = 0; + for (let i = 0; i < out.length; i += 2 * OUT_LEN) { + const words = compress( + this.input_chaining_value, + this.block_words, + output_block_counter, + this.block_len, + this.flags | ROOT + ); + const out_block = out.subarray(i, i + 2 * OUT_LEN); + for (let j = 0; j < words.length; j++) { + const word = words[j]; + const offset = j * 4; + if (offset < out_block.length) { + out_block[offset] = word & 0xff; + if (offset + 1 < out_block.length) { + out_block[offset + 1] = (word >> 8) & 0xff; + if (offset + 2 < out_block.length) { + out_block[offset + 2] = (word >> 16) & 0xff; + if (offset + 3 < out_block.length) { + out_block[offset + 3] = (word >> 24) & 0xff; + } + } + } + } + } + output_block_counter++; + } + } +} + +function parent_output( + left_child_cv: StaticArray, + right_child_cv: StaticArray, + key_words: StaticArray, + flags: u32 +): Output { + const block_words = new StaticArray(16); + for (let i = 0; i < 8; i++) { + block_words[i] = left_child_cv[i]; + block_words[i + 8] = right_child_cv[i]; + } + return new Output(key_words, block_words, 0, BLOCK_LEN, PARENT | flags); +} + +function parent_cv( + left_child_cv: StaticArray, + right_child_cv: StaticArray, + key_words: StaticArray, + flags: u32 +): StaticArray { + return parent_output(left_child_cv, right_child_cv, key_words, flags).chaining_value(); +} + +function min(a: usize, b: usize): usize { + return a < b ? a : b; +} diff --git a/packages/blake3-wasm/assembly/index.ts b/packages/blake3-wasm/assembly/index.ts new file mode 100644 index 0000000000..8183303929 --- /dev/null +++ b/packages/blake3-wasm/assembly/index.ts @@ -0,0 +1,2 @@ +// Re-export everything from blake3.ts +export * from "./blake3"; diff --git a/packages/blake3-wasm/assembly/tsconfig.json b/packages/blake3-wasm/assembly/tsconfig.json new file mode 100644 index 0000000000..8131d68a0a --- /dev/null +++ b/packages/blake3-wasm/assembly/tsconfig.json @@ -0,0 +1,4 @@ +{ + "extends": "../node_modules/.pnpm/assemblyscript@0.27.36/node_modules/assemblyscript/std/assembly.json", + "include": ["./**/*.ts"] +} diff --git a/packages/blake3-wasm/package.json b/packages/blake3-wasm/package.json new file mode 100644 index 0000000000..7d258a5372 --- /dev/null +++ b/packages/blake3-wasm/package.json @@ -0,0 +1,23 @@ +{ + "name": "@huggingface/blake3-wasm", + "version": "0.0.1", + "scripts": { + "build:debug": "asc assembly/index.ts --target debug", + "build:release": "asc assembly/index.ts --target release", + "build": "npm run build:debug && npm run build:release", + "test": "node tests" + }, + "dependencies": { + "assemblyscript": "^0.27.36" + }, + "type": "module", + "exports": { + ".": { + "import": "./build/release.js", + "types": "./build/release.d.ts" + } + }, + "devDependencies": { + "assemblyscript": "^0.27.36" + } +} diff --git a/packages/blake3-wasm/pnpm-lock.yaml b/packages/blake3-wasm/pnpm-lock.yaml new file mode 100644 index 0000000000..d18c304a0e --- /dev/null +++ b/packages/blake3-wasm/pnpm-lock.yaml @@ -0,0 +1,38 @@ +lockfileVersion: '9.0' + +settings: + autoInstallPeers: true + excludeLinksFromLockfile: false + +importers: + + .: + dependencies: + assemblyscript: + specifier: ^0.27.36 + version: 0.27.37 + +packages: + + assemblyscript@0.27.37: + resolution: {integrity: sha512-YtY5k3PiV3SyUQ6gRlR2OCn8dcVRwkpiG/k2T5buoL2ymH/Z/YbaYWbk/f9mO2HTgEtGWjPiAQrIuvA7G/63Gg==} + engines: {node: '>=18', npm: '>=10'} + hasBin: true + + binaryen@116.0.0-nightly.20240114: + resolution: {integrity: sha512-0GZrojJnuhoe+hiwji7QFaL3tBlJoA+KFUN7ouYSDGZLSo9CKM8swQX8n/UcbR0d1VuZKU+nhogNzv423JEu5A==} + hasBin: true + + long@5.3.2: + resolution: {integrity: sha512-mNAgZ1GmyNhD7AuqnTG3/VQ26o760+ZYBPKjPvugO8+nLbYfX6TVpJPseBvopbdY+qpZ/lKUnmEc1LeZYS3QAA==} + +snapshots: + + assemblyscript@0.27.37: + dependencies: + binaryen: 116.0.0-nightly.20240114 + long: 5.3.2 + + binaryen@116.0.0-nightly.20240114: {} + + long@5.3.2: {} diff --git a/packages/gearhash-wasm/assembly/blake3.ts b/packages/gearhash-wasm/assembly/blake3.ts new file mode 100644 index 0000000000..44239e5390 --- /dev/null +++ b/packages/gearhash-wasm/assembly/blake3.ts @@ -0,0 +1,357 @@ +// Constants from the reference implementation +const OUT_LEN: usize = 32; +const KEY_LEN: usize = 32; +const BLOCK_LEN: usize = 64; +const CHUNK_LEN: usize = 1024; + +const CHUNK_START: u32 = 1 << 0; +const CHUNK_END: u32 = 1 << 1; +const PARENT: u32 = 1 << 2; +const ROOT: u32 = 1 << 3; +const KEYED_HASH: u32 = 1 << 4; +const DERIVE_KEY_CONTEXT: u32 = 1 << 5; +const DERIVE_KEY_MATERIAL: u32 = 1 << 6; + +const IV: StaticArray = [ + 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19, +]; + +const MSG_PERMUTATION: StaticArray = [2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8]; + +// The mixing function, G, which mixes either a column or a diagonal. +function g(state: StaticArray, a: usize, b: usize, c: usize, d: usize, mx: u32, my: u32): void { + state[a] = state[a] + state[b] + mx; + state[d] = rotl32(state[d] ^ state[a], 16); + state[c] = state[c] + state[d]; + state[b] = rotl32(state[b] ^ state[c], 12); + state[a] = state[a] + state[b] + my; + state[d] = rotl32(state[d] ^ state[a], 8); + state[c] = state[c] + state[d]; + state[b] = rotl32(state[b] ^ state[c], 7); +} + +// Rotate left by n bits +function rotl32(x: u32, n: u32): u32 { + return (x << n) | (x >>> (32 - n)); +} + +function round(state: StaticArray, m: StaticArray): void { + // Mix the columns. + g(state, 0, 4, 8, 12, m[0], m[1]); + g(state, 1, 5, 9, 13, m[2], m[3]); + g(state, 2, 6, 10, 14, m[4], m[5]); + g(state, 3, 7, 11, 15, m[6], m[7]); + // Mix the diagonals. + g(state, 0, 5, 10, 15, m[8], m[9]); + g(state, 1, 6, 11, 12, m[10], m[11]); + g(state, 2, 7, 8, 13, m[12], m[13]); + g(state, 3, 4, 9, 14, m[14], m[15]); +} + +function permute(m: StaticArray): void { + const permuted = new StaticArray(16); + for (let i = 0; i < 16; i++) { + permuted[i] = m[MSG_PERMUTATION[i]]; + } + for (let i = 0; i < 16; i++) { + m[i] = permuted[i]; + } +} + +function compress( + chaining_value: StaticArray, + block_words: StaticArray, + counter: u64, + block_len: u32, + flags: u32 +): StaticArray { + const counter_low = counter as u32; + const counter_high = (counter >> 32) as u32; + const state = new StaticArray(16); + + // Initialize state + for (let i = 0; i < 8; i++) { + state[i] = chaining_value[i]; + state[i + 8] = IV[i]; + } + state[12] = counter_low; + state[13] = counter_high; + state[14] = block_len; + state[15] = flags; + + const block = new StaticArray(16); + for (let i = 0; i < 16; i++) { + block[i] = block_words[i]; + } + + // Apply rounds + round(state, block); + permute(block); + round(state, block); + permute(block); + round(state, block); + permute(block); + round(state, block); + permute(block); + round(state, block); + permute(block); + round(state, block); + permute(block); + round(state, block); + + // Final mixing + for (let i = 0; i < 8; i++) { + state[i] ^= state[i + 8]; + state[i + 8] ^= chaining_value[i]; + } + + return state; +} + +function words_from_little_endian_bytes(bytes: Uint8Array, words: StaticArray): void { + for (let i = 0; i < words.length; i++) { + const offset = i * 4; + words[i] = bytes[offset] | (bytes[offset + 1] << 8) | (bytes[offset + 2] << 16) | (bytes[offset + 3] << 24); + } +} + +export class Blake3Hasher { + private chunk_state: ChunkState; + private key_words: StaticArray; + private cv_stack: StaticArray>; + private cv_stack_len: u8; + private flags: u32; + + constructor() { + this.key_words = new StaticArray(8); + for (let i = 0; i < 8; i++) { + this.key_words[i] = IV[i]; + } + this.chunk_state = new ChunkState(this.key_words, 0, 0); + this.cv_stack = new StaticArray>(54); + for (let i = 0; i < 54; i++) { + this.cv_stack[i] = new StaticArray(8); + } + this.cv_stack_len = 0; + this.flags = 0; + } + + update(input: Uint8Array): void { + let inputPos = 0; + while (inputPos < input.length) { + if (this.chunk_state.len() == CHUNK_LEN) { + const chunk_cv = this.chunk_state.output().chaining_value(); + const total_chunks = this.chunk_state.chunk_counter + 1; + this.add_chunk_chaining_value(chunk_cv, total_chunks); + this.chunk_state = new ChunkState(this.key_words, total_chunks, this.flags); + } + + const want = CHUNK_LEN - this.chunk_state.len(); + const take = min(want, input.length - inputPos); + this.chunk_state.update(input.subarray(inputPos, inputPos + take)); + inputPos += take; + } + } + + finalize(out: Uint8Array): void { + let output = this.chunk_state.output(); + let parent_nodes_remaining = this.cv_stack_len; + + while (parent_nodes_remaining > 0) { + parent_nodes_remaining--; + output = parent_output( + this.cv_stack[parent_nodes_remaining], + output.chaining_value(), + this.key_words, + this.flags + ); + } + + output.root_output_bytes(out); + } + + private add_chunk_chaining_value(new_cv: StaticArray, total_chunks: u64): void { + let mut_new_cv = new_cv; + let mut_total_chunks = total_chunks; + + while ((mut_total_chunks & 1) == 0) { + mut_new_cv = parent_cv(this.pop_stack(), mut_new_cv, this.key_words, this.flags); + mut_total_chunks >>= 1; + } + + this.push_stack(mut_new_cv); + } + + private push_stack(cv: StaticArray): void { + for (let i = 0; i < 8; i++) { + this.cv_stack[this.cv_stack_len][i] = cv[i]; + } + this.cv_stack_len++; + } + + private pop_stack(): StaticArray { + this.cv_stack_len--; + return this.cv_stack[this.cv_stack_len]; + } +} + +class ChunkState { + chaining_value: StaticArray; + chunk_counter: u64; + block: Uint8Array; + block_len: u8; + blocks_compressed: u8; + flags: u32; + + constructor(key_words: StaticArray, chunk_counter: u64, flags: u32) { + this.chaining_value = new StaticArray(8); + for (let i = 0; i < 8; i++) { + this.chaining_value[i] = key_words[i]; + } + this.chunk_counter = chunk_counter; + this.block = new Uint8Array(BLOCK_LEN); + this.block_len = 0; + this.blocks_compressed = 0; + this.flags = flags; + } + + len(): usize { + return BLOCK_LEN * this.blocks_compressed + this.block_len; + } + + start_flag(): u32 { + return this.blocks_compressed == 0 ? CHUNK_START : 0; + } + + update(input: Uint8Array): void { + let inputPos = 0; + while (inputPos < input.length) { + if (this.block_len == BLOCK_LEN) { + const block_words = new StaticArray(16); + words_from_little_endian_bytes(this.block, block_words); + const compressed = compress( + this.chaining_value, + block_words, + this.chunk_counter, + BLOCK_LEN, + this.flags | this.start_flag() + ); + for (let i = 0; i < 8; i++) { + this.chaining_value[i] = compressed[i]; + } + this.blocks_compressed++; + this.block = new Uint8Array(BLOCK_LEN); + this.block_len = 0; + } + + const want = BLOCK_LEN - this.block_len; + const take = min(want, input.length - inputPos); + for (let i = 0; i < take; i++) { + this.block[this.block_len + i] = input[inputPos + i]; + } + this.block_len += take; + inputPos += take; + } + } + + output(): Output { + const block_words = new StaticArray(16); + words_from_little_endian_bytes(this.block, block_words); + return new Output( + this.chaining_value, + block_words, + this.chunk_counter, + this.block_len, + this.flags | this.start_flag() | CHUNK_END + ); + } +} + +class Output { + input_chaining_value: StaticArray; + block_words: StaticArray; + counter: u64; + block_len: u32; + flags: u32; + + constructor( + input_chaining_value: StaticArray, + block_words: StaticArray, + counter: u64, + block_len: u32, + flags: u32 + ) { + this.input_chaining_value = input_chaining_value; + this.block_words = block_words; + this.counter = counter; + this.block_len = block_len; + this.flags = flags; + } + + chaining_value(): StaticArray { + const compressed = compress(this.input_chaining_value, this.block_words, this.counter, this.block_len, this.flags); + const result = new StaticArray(8); + for (let i = 0; i < 8; i++) { + result[i] = compressed[i]; + } + return result; + } + + root_output_bytes(out: Uint8Array): void { + let output_block_counter: u64 = 0; + for (let i = 0; i < out.length; i += 2 * OUT_LEN) { + const words = compress( + this.input_chaining_value, + this.block_words, + output_block_counter, + this.block_len, + this.flags | ROOT + ); + const out_block = out.subarray(i, i + 2 * OUT_LEN); + for (let j = 0; j < words.length; j++) { + const word = words[j]; + const offset = j * 4; + if (offset < out_block.length) { + out_block[offset] = word & 0xff; + if (offset + 1 < out_block.length) { + out_block[offset + 1] = (word >> 8) & 0xff; + if (offset + 2 < out_block.length) { + out_block[offset + 2] = (word >> 16) & 0xff; + if (offset + 3 < out_block.length) { + out_block[offset + 3] = (word >> 24) & 0xff; + } + } + } + } + } + output_block_counter++; + } + } +} + +function parent_output( + left_child_cv: StaticArray, + right_child_cv: StaticArray, + key_words: StaticArray, + flags: u32 +): Output { + const block_words = new StaticArray(16); + for (let i = 0; i < 8; i++) { + block_words[i] = left_child_cv[i]; + block_words[i + 8] = right_child_cv[i]; + } + return new Output(key_words, block_words, 0, BLOCK_LEN, PARENT | flags); +} + +function parent_cv( + left_child_cv: StaticArray, + right_child_cv: StaticArray, + key_words: StaticArray, + flags: u32 +): StaticArray { + return parent_output(left_child_cv, right_child_cv, key_words, flags).chaining_value(); +} + +function min(a: usize, b: usize): usize { + return a < b ? a : b; +} diff --git a/packages/gearhash-wasm/package.json b/packages/gearhash-wasm/package.json index 3417394eca..766b1f7b8c 100644 --- a/packages/gearhash-wasm/package.json +++ b/packages/gearhash-wasm/package.json @@ -1,5 +1,5 @@ { - "name": "@huggingface/gearhash-wasms", + "name": "@huggingface/gearhash-wasm", "version": "0.0.1", "scripts": { "build:debug": "asc assembly/index.ts --target debug", @@ -9,8 +9,15 @@ "start": "npx serve ." }, "dependencies": { - "assemblyscript": "^0.27.36" + "assemblyscript": "^0.27.36", + "@huggingface/blake3-wasm": "workspace:*", + "@huggingface/xetchunk-wasm": "workspace:*" }, + "keywords": [ + "gearhash", + "assemblyscript", + "wasm" + ], "type": "module", "exports": { ".": { diff --git a/packages/gearhash-wasm/pnpm-lock.yaml b/packages/gearhash-wasm/pnpm-lock.yaml index f0edbf2226..c86f2350d3 100644 --- a/packages/gearhash-wasm/pnpm-lock.yaml +++ b/packages/gearhash-wasm/pnpm-lock.yaml @@ -8,6 +8,12 @@ importers: .: dependencies: + '@huggingface/blake3-wasm': + specifier: workspace:* + version: link:../blake3-wasm + '@huggingface/xetchunk-wasm': + specifier: workspace:* + version: link:../xetchunk-wasm assemblyscript: specifier: ^0.27.36 version: 0.27.36 diff --git a/packages/xetchunk-wasm/assembly/index.ts b/packages/xetchunk-wasm/assembly/index.ts new file mode 100644 index 0000000000..e4bd372b33 --- /dev/null +++ b/packages/xetchunk-wasm/assembly/index.ts @@ -0,0 +1,2 @@ +export * from "./xet-chunker"; +export * from "./next-match"; diff --git a/packages/xetchunk-wasm/assembly/next-match.ts b/packages/xetchunk-wasm/assembly/next-match.ts new file mode 100644 index 0000000000..5cf17752c1 --- /dev/null +++ b/packages/xetchunk-wasm/assembly/next-match.ts @@ -0,0 +1,28 @@ +export class MatchResult { + position: i32; + hash: u64; + + constructor(position: i32, hash: u64) { + this.position = position; + this.hash = hash; + } +} + +export function nextMatch(data: Uint8Array, mask: u64, hash: u64): MatchResult { + const nBytes = data.length; + let pos: usize = 0; + + while (pos < nBytes) { + // Update hash with next byte + hash = ((hash << 1) | data[pos]) & mask; + + // Check if we found a match + if (hash == 0) { + return new MatchResult(pos, hash); + } + + pos++; + } + + return new MatchResult(-1, hash); +} diff --git a/packages/xetchunk-wasm/assembly/tsconfig.json b/packages/xetchunk-wasm/assembly/tsconfig.json new file mode 100644 index 0000000000..8131d68a0a --- /dev/null +++ b/packages/xetchunk-wasm/assembly/tsconfig.json @@ -0,0 +1,4 @@ +{ + "extends": "../node_modules/.pnpm/assemblyscript@0.27.36/node_modules/assemblyscript/std/assembly.json", + "include": ["./**/*.ts"] +} diff --git a/packages/gearhash-wasm/assembly/xet-chunker.ts b/packages/xetchunk-wasm/assembly/xet-chunker.ts similarity index 94% rename from packages/gearhash-wasm/assembly/xet-chunker.ts rename to packages/xetchunk-wasm/assembly/xet-chunker.ts index 2e501cc815..2054a20694 100644 --- a/packages/gearhash-wasm/assembly/xet-chunker.ts +++ b/packages/xetchunk-wasm/assembly/xet-chunker.ts @@ -1,4 +1,5 @@ import { nextMatch } from "./next-match"; +import { Blake3Hasher } from "@huggingface/blake3-wasm"; // Constants const TARGET_CHUNK_SIZE: usize = 64 * 1024; // 64KB @@ -125,14 +126,11 @@ export class XetChunker { } } -// Simple SHA-256 implementation for data hashing function computeDataHash(data: Uint8Array): Uint8Array { - // TODO: Replace with actual SHA-256 implementation - // For now, using a simple hash function for demonstration + const hasher = new Blake3Hasher(); + hasher.update(data); const hash = new Uint8Array(32); - for (let i = 0; i < data.length; i++) { - hash[i % 32] ^= data[i]; - } + hasher.finalize(hash); return hash; } diff --git a/packages/xetchunk-wasm/package.json b/packages/xetchunk-wasm/package.json new file mode 100644 index 0000000000..ee20e8a094 --- /dev/null +++ b/packages/xetchunk-wasm/package.json @@ -0,0 +1,23 @@ +{ + "name": "@huggingface/xetchunk-wasm", + "version": "0.0.1", + "scripts": { + "build:debug": "asc assembly/index.ts --target debug", + "build:release": "asc assembly/index.ts --target release", + "build": "npm run build:debug && npm run build:release", + "test": "node tests" + }, + "dependencies": { + "@huggingface/blake3-wasm": "workspace:*" + }, + "type": "module", + "exports": { + ".": { + "import": "./build/release.js", + "types": "./build/release.d.ts" + } + }, + "devDependencies": { + "assemblyscript": "^0.27.36" + } +} diff --git a/packages/xetchunk-wasm/pnpm-lock.yaml b/packages/xetchunk-wasm/pnpm-lock.yaml new file mode 100644 index 0000000000..23a82a8008 --- /dev/null +++ b/packages/xetchunk-wasm/pnpm-lock.yaml @@ -0,0 +1,42 @@ +lockfileVersion: '9.0' + +settings: + autoInstallPeers: true + excludeLinksFromLockfile: false + +importers: + + .: + dependencies: + '@huggingface/blake3-wasm': + specifier: workspace:* + version: link:../blake3-wasm + devDependencies: + assemblyscript: + specifier: ^0.27.36 + version: 0.27.37 + +packages: + + assemblyscript@0.27.37: + resolution: {integrity: sha512-YtY5k3PiV3SyUQ6gRlR2OCn8dcVRwkpiG/k2T5buoL2ymH/Z/YbaYWbk/f9mO2HTgEtGWjPiAQrIuvA7G/63Gg==} + engines: {node: '>=18', npm: '>=10'} + hasBin: true + + binaryen@116.0.0-nightly.20240114: + resolution: {integrity: sha512-0GZrojJnuhoe+hiwji7QFaL3tBlJoA+KFUN7ouYSDGZLSo9CKM8swQX8n/UcbR0d1VuZKU+nhogNzv423JEu5A==} + hasBin: true + + long@5.3.2: + resolution: {integrity: sha512-mNAgZ1GmyNhD7AuqnTG3/VQ26o760+ZYBPKjPvugO8+nLbYfX6TVpJPseBvopbdY+qpZ/lKUnmEc1LeZYS3QAA==} + +snapshots: + + assemblyscript@0.27.37: + dependencies: + binaryen: 116.0.0-nightly.20240114 + long: 5.3.2 + + binaryen@116.0.0-nightly.20240114: {} + + long@5.3.2: {} diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml index e6016bff8c..bc118ffdff 100644 --- a/pnpm-workspace.yaml +++ b/pnpm-workspace.yaml @@ -15,3 +15,5 @@ packages: - "packages/mcp-client" - "packages/tiny-agents" - "packages/gearhash-wasm" + - "packages/blake3-wasm" + - "packages/xetchunk-wasm" diff --git a/tsconfig.json b/tsconfig.json new file mode 100644 index 0000000000..fbe8ff6fda --- /dev/null +++ b/tsconfig.json @@ -0,0 +1,13 @@ +{ + "compilerOptions": { + "target": "ESNext", + "module": "ESNext", + "moduleResolution": "node", + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true, + "forceConsistentCasingInFileNames": true, + "lib": ["ESNext"], + "types": ["assemblyscript"] + } +} From 07b115dc5a0427f2d9a32fdbc2c334af986ff44c Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Wed, 4 Jun 2025 16:22:41 +0200 Subject: [PATCH 11/18] fix TS refs --- packages/README.md | 3 + packages/blake3-wasm/assembly/blake3.ts | 4 - packages/blake3-wasm/assembly/tsconfig.json | 2 +- packages/blake3-wasm/package.json | 5 +- packages/gearhash-wasm/assembly/blake3.ts | 357 ------------------ packages/gearhash-wasm/package.json | 4 +- packages/xetchunk-wasm/assembly/tsconfig.json | 2 +- packages/xetchunk-wasm/package.json | 5 +- 8 files changed, 13 insertions(+), 369 deletions(-) create mode 100644 packages/README.md delete mode 100644 packages/gearhash-wasm/assembly/blake3.ts diff --git a/packages/README.md b/packages/README.md new file mode 100644 index 0000000000..020157a990 --- /dev/null +++ b/packages/README.md @@ -0,0 +1,3 @@ +A WASM implementation of blake3 using assemblyscript. + +Implementation based on https://github.com/BLAKE3-team/BLAKE3/blob/master/reference_impl/reference_impl.rs \ No newline at end of file diff --git a/packages/blake3-wasm/assembly/blake3.ts b/packages/blake3-wasm/assembly/blake3.ts index 46c19b4db3..8f1ddb537f 100644 --- a/packages/blake3-wasm/assembly/blake3.ts +++ b/packages/blake3-wasm/assembly/blake3.ts @@ -1,7 +1,3 @@ -// Import AssemblyScript types -import type { usize, u32, u8, u64 } from "assemblyscript"; -import { StaticArray } from "assemblyscript"; - // Constants from the reference implementation const OUT_LEN: usize = 32; // const KEY_LEN: usize = 32; diff --git a/packages/blake3-wasm/assembly/tsconfig.json b/packages/blake3-wasm/assembly/tsconfig.json index 8131d68a0a..33daff5dac 100644 --- a/packages/blake3-wasm/assembly/tsconfig.json +++ b/packages/blake3-wasm/assembly/tsconfig.json @@ -1,4 +1,4 @@ { - "extends": "../node_modules/.pnpm/assemblyscript@0.27.36/node_modules/assemblyscript/std/assembly.json", + "extends": "../node_modules/.pnpm/assemblyscript@0.27.37/node_modules/assemblyscript/std/assembly.json", "include": ["./**/*.ts"] } diff --git a/packages/blake3-wasm/package.json b/packages/blake3-wasm/package.json index 7d258a5372..cdb861f379 100644 --- a/packages/blake3-wasm/package.json +++ b/packages/blake3-wasm/package.json @@ -4,8 +4,9 @@ "scripts": { "build:debug": "asc assembly/index.ts --target debug", "build:release": "asc assembly/index.ts --target release", - "build": "npm run build:debug && npm run build:release", - "test": "node tests" + "build": "pnpm run build:debug && npm run build:release", + "test": "node tests", + "prepare": "pnpm run build" }, "dependencies": { "assemblyscript": "^0.27.36" diff --git a/packages/gearhash-wasm/assembly/blake3.ts b/packages/gearhash-wasm/assembly/blake3.ts deleted file mode 100644 index 44239e5390..0000000000 --- a/packages/gearhash-wasm/assembly/blake3.ts +++ /dev/null @@ -1,357 +0,0 @@ -// Constants from the reference implementation -const OUT_LEN: usize = 32; -const KEY_LEN: usize = 32; -const BLOCK_LEN: usize = 64; -const CHUNK_LEN: usize = 1024; - -const CHUNK_START: u32 = 1 << 0; -const CHUNK_END: u32 = 1 << 1; -const PARENT: u32 = 1 << 2; -const ROOT: u32 = 1 << 3; -const KEYED_HASH: u32 = 1 << 4; -const DERIVE_KEY_CONTEXT: u32 = 1 << 5; -const DERIVE_KEY_MATERIAL: u32 = 1 << 6; - -const IV: StaticArray = [ - 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19, -]; - -const MSG_PERMUTATION: StaticArray = [2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8]; - -// The mixing function, G, which mixes either a column or a diagonal. -function g(state: StaticArray, a: usize, b: usize, c: usize, d: usize, mx: u32, my: u32): void { - state[a] = state[a] + state[b] + mx; - state[d] = rotl32(state[d] ^ state[a], 16); - state[c] = state[c] + state[d]; - state[b] = rotl32(state[b] ^ state[c], 12); - state[a] = state[a] + state[b] + my; - state[d] = rotl32(state[d] ^ state[a], 8); - state[c] = state[c] + state[d]; - state[b] = rotl32(state[b] ^ state[c], 7); -} - -// Rotate left by n bits -function rotl32(x: u32, n: u32): u32 { - return (x << n) | (x >>> (32 - n)); -} - -function round(state: StaticArray, m: StaticArray): void { - // Mix the columns. - g(state, 0, 4, 8, 12, m[0], m[1]); - g(state, 1, 5, 9, 13, m[2], m[3]); - g(state, 2, 6, 10, 14, m[4], m[5]); - g(state, 3, 7, 11, 15, m[6], m[7]); - // Mix the diagonals. - g(state, 0, 5, 10, 15, m[8], m[9]); - g(state, 1, 6, 11, 12, m[10], m[11]); - g(state, 2, 7, 8, 13, m[12], m[13]); - g(state, 3, 4, 9, 14, m[14], m[15]); -} - -function permute(m: StaticArray): void { - const permuted = new StaticArray(16); - for (let i = 0; i < 16; i++) { - permuted[i] = m[MSG_PERMUTATION[i]]; - } - for (let i = 0; i < 16; i++) { - m[i] = permuted[i]; - } -} - -function compress( - chaining_value: StaticArray, - block_words: StaticArray, - counter: u64, - block_len: u32, - flags: u32 -): StaticArray { - const counter_low = counter as u32; - const counter_high = (counter >> 32) as u32; - const state = new StaticArray(16); - - // Initialize state - for (let i = 0; i < 8; i++) { - state[i] = chaining_value[i]; - state[i + 8] = IV[i]; - } - state[12] = counter_low; - state[13] = counter_high; - state[14] = block_len; - state[15] = flags; - - const block = new StaticArray(16); - for (let i = 0; i < 16; i++) { - block[i] = block_words[i]; - } - - // Apply rounds - round(state, block); - permute(block); - round(state, block); - permute(block); - round(state, block); - permute(block); - round(state, block); - permute(block); - round(state, block); - permute(block); - round(state, block); - permute(block); - round(state, block); - - // Final mixing - for (let i = 0; i < 8; i++) { - state[i] ^= state[i + 8]; - state[i + 8] ^= chaining_value[i]; - } - - return state; -} - -function words_from_little_endian_bytes(bytes: Uint8Array, words: StaticArray): void { - for (let i = 0; i < words.length; i++) { - const offset = i * 4; - words[i] = bytes[offset] | (bytes[offset + 1] << 8) | (bytes[offset + 2] << 16) | (bytes[offset + 3] << 24); - } -} - -export class Blake3Hasher { - private chunk_state: ChunkState; - private key_words: StaticArray; - private cv_stack: StaticArray>; - private cv_stack_len: u8; - private flags: u32; - - constructor() { - this.key_words = new StaticArray(8); - for (let i = 0; i < 8; i++) { - this.key_words[i] = IV[i]; - } - this.chunk_state = new ChunkState(this.key_words, 0, 0); - this.cv_stack = new StaticArray>(54); - for (let i = 0; i < 54; i++) { - this.cv_stack[i] = new StaticArray(8); - } - this.cv_stack_len = 0; - this.flags = 0; - } - - update(input: Uint8Array): void { - let inputPos = 0; - while (inputPos < input.length) { - if (this.chunk_state.len() == CHUNK_LEN) { - const chunk_cv = this.chunk_state.output().chaining_value(); - const total_chunks = this.chunk_state.chunk_counter + 1; - this.add_chunk_chaining_value(chunk_cv, total_chunks); - this.chunk_state = new ChunkState(this.key_words, total_chunks, this.flags); - } - - const want = CHUNK_LEN - this.chunk_state.len(); - const take = min(want, input.length - inputPos); - this.chunk_state.update(input.subarray(inputPos, inputPos + take)); - inputPos += take; - } - } - - finalize(out: Uint8Array): void { - let output = this.chunk_state.output(); - let parent_nodes_remaining = this.cv_stack_len; - - while (parent_nodes_remaining > 0) { - parent_nodes_remaining--; - output = parent_output( - this.cv_stack[parent_nodes_remaining], - output.chaining_value(), - this.key_words, - this.flags - ); - } - - output.root_output_bytes(out); - } - - private add_chunk_chaining_value(new_cv: StaticArray, total_chunks: u64): void { - let mut_new_cv = new_cv; - let mut_total_chunks = total_chunks; - - while ((mut_total_chunks & 1) == 0) { - mut_new_cv = parent_cv(this.pop_stack(), mut_new_cv, this.key_words, this.flags); - mut_total_chunks >>= 1; - } - - this.push_stack(mut_new_cv); - } - - private push_stack(cv: StaticArray): void { - for (let i = 0; i < 8; i++) { - this.cv_stack[this.cv_stack_len][i] = cv[i]; - } - this.cv_stack_len++; - } - - private pop_stack(): StaticArray { - this.cv_stack_len--; - return this.cv_stack[this.cv_stack_len]; - } -} - -class ChunkState { - chaining_value: StaticArray; - chunk_counter: u64; - block: Uint8Array; - block_len: u8; - blocks_compressed: u8; - flags: u32; - - constructor(key_words: StaticArray, chunk_counter: u64, flags: u32) { - this.chaining_value = new StaticArray(8); - for (let i = 0; i < 8; i++) { - this.chaining_value[i] = key_words[i]; - } - this.chunk_counter = chunk_counter; - this.block = new Uint8Array(BLOCK_LEN); - this.block_len = 0; - this.blocks_compressed = 0; - this.flags = flags; - } - - len(): usize { - return BLOCK_LEN * this.blocks_compressed + this.block_len; - } - - start_flag(): u32 { - return this.blocks_compressed == 0 ? CHUNK_START : 0; - } - - update(input: Uint8Array): void { - let inputPos = 0; - while (inputPos < input.length) { - if (this.block_len == BLOCK_LEN) { - const block_words = new StaticArray(16); - words_from_little_endian_bytes(this.block, block_words); - const compressed = compress( - this.chaining_value, - block_words, - this.chunk_counter, - BLOCK_LEN, - this.flags | this.start_flag() - ); - for (let i = 0; i < 8; i++) { - this.chaining_value[i] = compressed[i]; - } - this.blocks_compressed++; - this.block = new Uint8Array(BLOCK_LEN); - this.block_len = 0; - } - - const want = BLOCK_LEN - this.block_len; - const take = min(want, input.length - inputPos); - for (let i = 0; i < take; i++) { - this.block[this.block_len + i] = input[inputPos + i]; - } - this.block_len += take; - inputPos += take; - } - } - - output(): Output { - const block_words = new StaticArray(16); - words_from_little_endian_bytes(this.block, block_words); - return new Output( - this.chaining_value, - block_words, - this.chunk_counter, - this.block_len, - this.flags | this.start_flag() | CHUNK_END - ); - } -} - -class Output { - input_chaining_value: StaticArray; - block_words: StaticArray; - counter: u64; - block_len: u32; - flags: u32; - - constructor( - input_chaining_value: StaticArray, - block_words: StaticArray, - counter: u64, - block_len: u32, - flags: u32 - ) { - this.input_chaining_value = input_chaining_value; - this.block_words = block_words; - this.counter = counter; - this.block_len = block_len; - this.flags = flags; - } - - chaining_value(): StaticArray { - const compressed = compress(this.input_chaining_value, this.block_words, this.counter, this.block_len, this.flags); - const result = new StaticArray(8); - for (let i = 0; i < 8; i++) { - result[i] = compressed[i]; - } - return result; - } - - root_output_bytes(out: Uint8Array): void { - let output_block_counter: u64 = 0; - for (let i = 0; i < out.length; i += 2 * OUT_LEN) { - const words = compress( - this.input_chaining_value, - this.block_words, - output_block_counter, - this.block_len, - this.flags | ROOT - ); - const out_block = out.subarray(i, i + 2 * OUT_LEN); - for (let j = 0; j < words.length; j++) { - const word = words[j]; - const offset = j * 4; - if (offset < out_block.length) { - out_block[offset] = word & 0xff; - if (offset + 1 < out_block.length) { - out_block[offset + 1] = (word >> 8) & 0xff; - if (offset + 2 < out_block.length) { - out_block[offset + 2] = (word >> 16) & 0xff; - if (offset + 3 < out_block.length) { - out_block[offset + 3] = (word >> 24) & 0xff; - } - } - } - } - } - output_block_counter++; - } - } -} - -function parent_output( - left_child_cv: StaticArray, - right_child_cv: StaticArray, - key_words: StaticArray, - flags: u32 -): Output { - const block_words = new StaticArray(16); - for (let i = 0; i < 8; i++) { - block_words[i] = left_child_cv[i]; - block_words[i + 8] = right_child_cv[i]; - } - return new Output(key_words, block_words, 0, BLOCK_LEN, PARENT | flags); -} - -function parent_cv( - left_child_cv: StaticArray, - right_child_cv: StaticArray, - key_words: StaticArray, - flags: u32 -): StaticArray { - return parent_output(left_child_cv, right_child_cv, key_words, flags).chaining_value(); -} - -function min(a: usize, b: usize): usize { - return a < b ? a : b; -} diff --git a/packages/gearhash-wasm/package.json b/packages/gearhash-wasm/package.json index 766b1f7b8c..be57a01a40 100644 --- a/packages/gearhash-wasm/package.json +++ b/packages/gearhash-wasm/package.json @@ -4,9 +4,9 @@ "scripts": { "build:debug": "asc assembly/index.ts --target debug", "build:release": "asc assembly/index.ts --target release", - "build": "npm run build:debug && npm run build:release", + "build": "pnpm run build:debug && npm run build:release", "test": "node tests", - "start": "npx serve ." + "prepare": "pnpm run build" }, "dependencies": { "assemblyscript": "^0.27.36", diff --git a/packages/xetchunk-wasm/assembly/tsconfig.json b/packages/xetchunk-wasm/assembly/tsconfig.json index 8131d68a0a..33daff5dac 100644 --- a/packages/xetchunk-wasm/assembly/tsconfig.json +++ b/packages/xetchunk-wasm/assembly/tsconfig.json @@ -1,4 +1,4 @@ { - "extends": "../node_modules/.pnpm/assemblyscript@0.27.36/node_modules/assemblyscript/std/assembly.json", + "extends": "../node_modules/.pnpm/assemblyscript@0.27.37/node_modules/assemblyscript/std/assembly.json", "include": ["./**/*.ts"] } diff --git a/packages/xetchunk-wasm/package.json b/packages/xetchunk-wasm/package.json index ee20e8a094..3929f2ef8f 100644 --- a/packages/xetchunk-wasm/package.json +++ b/packages/xetchunk-wasm/package.json @@ -4,8 +4,9 @@ "scripts": { "build:debug": "asc assembly/index.ts --target debug", "build:release": "asc assembly/index.ts --target release", - "build": "npm run build:debug && npm run build:release", - "test": "node tests" + "build": "pnpm run build:debug && npm run build:release", + "test": "node tests", + "prepare": "pnpm run build" }, "dependencies": { "@huggingface/blake3-wasm": "workspace:*" From 2664cba2be3ed620884dd57a5b00e8d2820f0569 Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Wed, 4 Jun 2025 16:24:39 +0200 Subject: [PATCH 12/18] use builtins --- packages/blake3-wasm/assembly/blake3.ts | 4 --- .../xetchunk-wasm/assembly/xet-chunker.ts | 35 ------------------- 2 files changed, 39 deletions(-) diff --git a/packages/blake3-wasm/assembly/blake3.ts b/packages/blake3-wasm/assembly/blake3.ts index 8f1ddb537f..eadddaa2de 100644 --- a/packages/blake3-wasm/assembly/blake3.ts +++ b/packages/blake3-wasm/assembly/blake3.ts @@ -351,7 +351,3 @@ function parent_cv( ): StaticArray { return parent_output(left_child_cv, right_child_cv, key_words, flags).chaining_value(); } - -function min(a: usize, b: usize): usize { - return a < b ? a : b; -} diff --git a/packages/xetchunk-wasm/assembly/xet-chunker.ts b/packages/xetchunk-wasm/assembly/xet-chunker.ts index 2054a20694..6816064efd 100644 --- a/packages/xetchunk-wasm/assembly/xet-chunker.ts +++ b/packages/xetchunk-wasm/assembly/xet-chunker.ts @@ -133,38 +133,3 @@ function computeDataHash(data: Uint8Array): Uint8Array { hasher.finalize(hash); return hash; } - -// Helper function to find minimum of two numbers -function min(a: usize, b: usize): usize { - return a < b ? a : b; -} - -// Helper function to count leading zeros -function clz(x: u64): u32 { - let n: u32 = 0; - if (x == 0) return 64; - if ((x & 0xffffffff00000000) == 0) { - n += 32; - x <<= 32; - } - if ((x & 0xffff000000000000) == 0) { - n += 16; - x <<= 16; - } - if ((x & 0xff00000000000000) == 0) { - n += 8; - x <<= 8; - } - if ((x & 0xf000000000000000) == 0) { - n += 4; - x <<= 4; - } - if ((x & 0xc000000000000000) == 0) { - n += 2; - x <<= 2; - } - if ((x & 0x8000000000000000) == 0) { - n += 1; - } - return n; -} From 031dc0d6ee7c4f4046af76d1779f4b7963e9e2cf Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Wed, 4 Jun 2025 16:33:40 +0200 Subject: [PATCH 13/18] package org --- packages/blake3-wasm/package.json | 12 ++++++-- packages/blake3-wasm/pnpm-lock.yaml | 2 +- packages/gearhash-wasm/package.json | 9 +++--- packages/gearhash-wasm/pnpm-lock.yaml | 8 +----- packages/xetchunk-wasm/assembly/index.ts | 1 - packages/xetchunk-wasm/assembly/next-match.ts | 28 ------------------- .../xetchunk-wasm/assembly/xet-chunker.ts | 4 +-- packages/xetchunk-wasm/package.json | 14 +++++++++- packages/xetchunk-wasm/pnpm-lock.yaml | 3 ++ 9 files changed, 33 insertions(+), 48 deletions(-) delete mode 100644 packages/xetchunk-wasm/assembly/next-match.ts diff --git a/packages/blake3-wasm/package.json b/packages/blake3-wasm/package.json index cdb861f379..32b92cc077 100644 --- a/packages/blake3-wasm/package.json +++ b/packages/blake3-wasm/package.json @@ -8,14 +8,20 @@ "test": "node tests", "prepare": "pnpm run build" }, - "dependencies": { - "assemblyscript": "^0.27.36" - }, + "keywords": [ + "blake3", + "assemblyscript", + "assembly", + "wasm" + ], "type": "module", "exports": { ".": { "import": "./build/release.js", "types": "./build/release.d.ts" + }, + "./assembly": { + "import": "./assembly/index.ts" } }, "devDependencies": { diff --git a/packages/blake3-wasm/pnpm-lock.yaml b/packages/blake3-wasm/pnpm-lock.yaml index d18c304a0e..f96d25e4f8 100644 --- a/packages/blake3-wasm/pnpm-lock.yaml +++ b/packages/blake3-wasm/pnpm-lock.yaml @@ -7,7 +7,7 @@ settings: importers: .: - dependencies: + devDependencies: assemblyscript: specifier: ^0.27.36 version: 0.27.37 diff --git a/packages/gearhash-wasm/package.json b/packages/gearhash-wasm/package.json index be57a01a40..54b4f3b690 100644 --- a/packages/gearhash-wasm/package.json +++ b/packages/gearhash-wasm/package.json @@ -8,14 +8,10 @@ "test": "node tests", "prepare": "pnpm run build" }, - "dependencies": { - "assemblyscript": "^0.27.36", - "@huggingface/blake3-wasm": "workspace:*", - "@huggingface/xetchunk-wasm": "workspace:*" - }, "keywords": [ "gearhash", "assemblyscript", + "assembly", "wasm" ], "type": "module", @@ -23,6 +19,9 @@ ".": { "import": "./build/release.js", "types": "./build/release.d.ts" + }, + "./assembly": { + "import": "./assembly/index.ts" } }, "devDependencies": { diff --git a/packages/gearhash-wasm/pnpm-lock.yaml b/packages/gearhash-wasm/pnpm-lock.yaml index c86f2350d3..cf8533a3b9 100644 --- a/packages/gearhash-wasm/pnpm-lock.yaml +++ b/packages/gearhash-wasm/pnpm-lock.yaml @@ -7,13 +7,7 @@ settings: importers: .: - dependencies: - '@huggingface/blake3-wasm': - specifier: workspace:* - version: link:../blake3-wasm - '@huggingface/xetchunk-wasm': - specifier: workspace:* - version: link:../xetchunk-wasm + devDependencies: assemblyscript: specifier: ^0.27.36 version: 0.27.36 diff --git a/packages/xetchunk-wasm/assembly/index.ts b/packages/xetchunk-wasm/assembly/index.ts index e4bd372b33..d889a1ba0a 100644 --- a/packages/xetchunk-wasm/assembly/index.ts +++ b/packages/xetchunk-wasm/assembly/index.ts @@ -1,2 +1 @@ export * from "./xet-chunker"; -export * from "./next-match"; diff --git a/packages/xetchunk-wasm/assembly/next-match.ts b/packages/xetchunk-wasm/assembly/next-match.ts deleted file mode 100644 index 5cf17752c1..0000000000 --- a/packages/xetchunk-wasm/assembly/next-match.ts +++ /dev/null @@ -1,28 +0,0 @@ -export class MatchResult { - position: i32; - hash: u64; - - constructor(position: i32, hash: u64) { - this.position = position; - this.hash = hash; - } -} - -export function nextMatch(data: Uint8Array, mask: u64, hash: u64): MatchResult { - const nBytes = data.length; - let pos: usize = 0; - - while (pos < nBytes) { - // Update hash with next byte - hash = ((hash << 1) | data[pos]) & mask; - - // Check if we found a match - if (hash == 0) { - return new MatchResult(pos, hash); - } - - pos++; - } - - return new MatchResult(-1, hash); -} diff --git a/packages/xetchunk-wasm/assembly/xet-chunker.ts b/packages/xetchunk-wasm/assembly/xet-chunker.ts index 6816064efd..fac83a1f8e 100644 --- a/packages/xetchunk-wasm/assembly/xet-chunker.ts +++ b/packages/xetchunk-wasm/assembly/xet-chunker.ts @@ -1,5 +1,5 @@ -import { nextMatch } from "./next-match"; -import { Blake3Hasher } from "@huggingface/blake3-wasm"; +import { nextMatch } from "@huggingface/gearhash-wasm/assembly"; +import { Blake3Hasher } from "@huggingface/blake3-wasm/assembly"; // Constants const TARGET_CHUNK_SIZE: usize = 64 * 1024; // 64KB diff --git a/packages/xetchunk-wasm/package.json b/packages/xetchunk-wasm/package.json index 3929f2ef8f..668e3da2c7 100644 --- a/packages/xetchunk-wasm/package.json +++ b/packages/xetchunk-wasm/package.json @@ -8,14 +8,26 @@ "test": "node tests", "prepare": "pnpm run build" }, + "keywords": [ + "xet", + "chunk", + "chunking", + "assemblyscript", + "assembly", + "wasm" + ], "dependencies": { - "@huggingface/blake3-wasm": "workspace:*" + "@huggingface/blake3-wasm": "workspace:*", + "@huggingface/gearhash-wasm": "workspace:*" }, "type": "module", "exports": { ".": { "import": "./build/release.js", "types": "./build/release.d.ts" + }, + "./assembly": { + "import": "./assembly/index.ts" } }, "devDependencies": { diff --git a/packages/xetchunk-wasm/pnpm-lock.yaml b/packages/xetchunk-wasm/pnpm-lock.yaml index 23a82a8008..32bdab0b41 100644 --- a/packages/xetchunk-wasm/pnpm-lock.yaml +++ b/packages/xetchunk-wasm/pnpm-lock.yaml @@ -11,6 +11,9 @@ importers: '@huggingface/blake3-wasm': specifier: workspace:* version: link:../blake3-wasm + '@huggingface/gearhash-wasm': + specifier: workspace:* + version: link:../gearhash-wasm devDependencies: assemblyscript: specifier: ^0.27.36 From 01d7472d0157dd766461424298460844c6e05536 Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Wed, 4 Jun 2025 16:46:34 +0200 Subject: [PATCH 14/18] also provide direct wasm exports --- packages/blake3-wasm/package.json | 3 +++ packages/gearhash-wasm/package.json | 3 +++ packages/xetchunk-wasm/package.json | 3 +++ 3 files changed, 9 insertions(+) diff --git a/packages/blake3-wasm/package.json b/packages/blake3-wasm/package.json index 32b92cc077..4085980507 100644 --- a/packages/blake3-wasm/package.json +++ b/packages/blake3-wasm/package.json @@ -22,6 +22,9 @@ }, "./assembly": { "import": "./assembly/index.ts" + }, + "./wasm": { + "import": "./build/release.wasm" } }, "devDependencies": { diff --git a/packages/gearhash-wasm/package.json b/packages/gearhash-wasm/package.json index 54b4f3b690..ecc3ef79ff 100644 --- a/packages/gearhash-wasm/package.json +++ b/packages/gearhash-wasm/package.json @@ -22,6 +22,9 @@ }, "./assembly": { "import": "./assembly/index.ts" + }, + "./wasm": { + "import": "./build/release.wasm" } }, "devDependencies": { diff --git a/packages/xetchunk-wasm/package.json b/packages/xetchunk-wasm/package.json index 668e3da2c7..1842aa1c47 100644 --- a/packages/xetchunk-wasm/package.json +++ b/packages/xetchunk-wasm/package.json @@ -28,6 +28,9 @@ }, "./assembly": { "import": "./assembly/index.ts" + }, + "./wasm": { + "import": "./build/release.wasm" } }, "devDependencies": { From 2b9b57ae8498f92795079cac7d9eb52d706c53f8 Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Wed, 4 Jun 2025 16:53:10 +0200 Subject: [PATCH 15/18] proper build outputs --- packages/blake3-wasm/asconfig.json | 22 ++++++++++++++++++++++ packages/blake3-wasm/build/.gitignore | 2 ++ packages/gearhash-wasm/index.html | 10 ---------- packages/xetchunk-wasm/asconfig.json | 22 ++++++++++++++++++++++ packages/xetchunk-wasm/build/.gitignore | 2 ++ 5 files changed, 48 insertions(+), 10 deletions(-) create mode 100644 packages/blake3-wasm/asconfig.json create mode 100644 packages/blake3-wasm/build/.gitignore delete mode 100644 packages/gearhash-wasm/index.html create mode 100644 packages/xetchunk-wasm/asconfig.json create mode 100644 packages/xetchunk-wasm/build/.gitignore diff --git a/packages/blake3-wasm/asconfig.json b/packages/blake3-wasm/asconfig.json new file mode 100644 index 0000000000..8776597856 --- /dev/null +++ b/packages/blake3-wasm/asconfig.json @@ -0,0 +1,22 @@ +{ + "targets": { + "debug": { + "outFile": "build/debug.wasm", + "textFile": "build/debug.wat", + "sourceMap": true, + "debug": true + }, + "release": { + "outFile": "build/release.wasm", + "textFile": "build/release.wat", + "sourceMap": true, + "optimizeLevel": 3, + "shrinkLevel": 0, + "converge": false, + "noAssert": false + } + }, + "options": { + "bindings": "esm" + } +} \ No newline at end of file diff --git a/packages/blake3-wasm/build/.gitignore b/packages/blake3-wasm/build/.gitignore new file mode 100644 index 0000000000..d6b7ef32c8 --- /dev/null +++ b/packages/blake3-wasm/build/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore diff --git a/packages/gearhash-wasm/index.html b/packages/gearhash-wasm/index.html deleted file mode 100644 index c170ddeb9a..0000000000 --- a/packages/gearhash-wasm/index.html +++ /dev/null @@ -1,10 +0,0 @@ - - - - - - - diff --git a/packages/xetchunk-wasm/asconfig.json b/packages/xetchunk-wasm/asconfig.json new file mode 100644 index 0000000000..8776597856 --- /dev/null +++ b/packages/xetchunk-wasm/asconfig.json @@ -0,0 +1,22 @@ +{ + "targets": { + "debug": { + "outFile": "build/debug.wasm", + "textFile": "build/debug.wat", + "sourceMap": true, + "debug": true + }, + "release": { + "outFile": "build/release.wasm", + "textFile": "build/release.wat", + "sourceMap": true, + "optimizeLevel": 3, + "shrinkLevel": 0, + "converge": false, + "noAssert": false + } + }, + "options": { + "bindings": "esm" + } +} \ No newline at end of file diff --git a/packages/xetchunk-wasm/build/.gitignore b/packages/xetchunk-wasm/build/.gitignore new file mode 100644 index 0000000000..d6b7ef32c8 --- /dev/null +++ b/packages/xetchunk-wasm/build/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore From af2ccc1e00a5289f18317edfb2207e7d00907ede Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Wed, 4 Jun 2025 16:54:50 +0200 Subject: [PATCH 16/18] add exports for JS --- packages/blake3-wasm/assembly/blake3.ts | 4 ++++ packages/xetchunk-wasm/assembly/xet-chunker.ts | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/packages/blake3-wasm/assembly/blake3.ts b/packages/blake3-wasm/assembly/blake3.ts index eadddaa2de..8fd20946c4 100644 --- a/packages/blake3-wasm/assembly/blake3.ts +++ b/packages/blake3-wasm/assembly/blake3.ts @@ -351,3 +351,7 @@ function parent_cv( ): StaticArray { return parent_output(left_child_cv, right_child_cv, key_words, flags).chaining_value(); } + +export function createBlake3Hasher(): Blake3Hasher { + return new Blake3Hasher(); +} diff --git a/packages/xetchunk-wasm/assembly/xet-chunker.ts b/packages/xetchunk-wasm/assembly/xet-chunker.ts index fac83a1f8e..2145684eab 100644 --- a/packages/xetchunk-wasm/assembly/xet-chunker.ts +++ b/packages/xetchunk-wasm/assembly/xet-chunker.ts @@ -133,3 +133,7 @@ function computeDataHash(data: Uint8Array): Uint8Array { hasher.finalize(hash); return hash; } + +export function createXetChunker(targetChunkSize: usize = TARGET_CHUNK_SIZE): XetChunker { + return new XetChunker(targetChunkSize); +} From f1a31cb2191cd686d9282f21376a041848916de0 Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Fri, 13 Jun 2025 11:37:37 +0200 Subject: [PATCH 17/18] add test for blake3 + simple function --- packages/blake3-wasm/assembly/blake3.ts | 53 +++++--- packages/blake3-wasm/tests/index.js | 163 ++++++++++++++++++++++++ 2 files changed, 198 insertions(+), 18 deletions(-) create mode 100644 packages/blake3-wasm/tests/index.js diff --git a/packages/blake3-wasm/assembly/blake3.ts b/packages/blake3-wasm/assembly/blake3.ts index 8fd20946c4..d864765385 100644 --- a/packages/blake3-wasm/assembly/blake3.ts +++ b/packages/blake3-wasm/assembly/blake3.ts @@ -1,8 +1,8 @@ // Constants from the reference implementation -const OUT_LEN: usize = 32; +const OUT_LEN: i32 = 32; // const KEY_LEN: usize = 32; -const BLOCK_LEN: usize = 64; -const CHUNK_LEN: usize = 1024; +const BLOCK_LEN: i32 = 64; +const CHUNK_LEN: i32 = 1024; const CHUNK_START: u32 = 1 << 0; const CHUNK_END: u32 = 1 << 1; @@ -16,10 +16,10 @@ const IV: StaticArray = [ 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19, ]; -const MSG_PERMUTATION: StaticArray = [2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8]; +const MSG_PERMUTATION: StaticArray = [2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8]; // The mixing function, G, which mixes either a column or a diagonal. -function g(state: StaticArray, a: usize, b: usize, c: usize, d: usize, mx: u32, my: u32): void { +function g(state: StaticArray, a: i32, b: i32, c: i32, d: i32, mx: u32, my: u32): void { state[a] = state[a] + state[b] + mx; state[d] = rotl32(state[d] ^ state[a], 16); state[c] = state[c] + state[d]; @@ -115,7 +115,7 @@ function words_from_little_endian_bytes(bytes: Uint8Array, words: StaticArray; private cv_stack: StaticArray>; @@ -123,17 +123,19 @@ export class Blake3Hasher { private flags: u32; constructor() { - this.key_words = new StaticArray(8); + const key_words = new StaticArray(8); for (let i = 0; i < 8; i++) { - this.key_words[i] = IV[i]; + key_words[i] = IV[i]; } - this.chunk_state = new ChunkState(this.key_words, 0, 0); + this.key_words = key_words; + this.chunk_state = new ChunkState(key_words, 0, 0); this.cv_stack = new StaticArray>(54); + this.cv_stack_len = 0; + this.flags = 0; + for (let i = 0; i < 54; i++) { this.cv_stack[i] = new StaticArray(8); } - this.cv_stack_len = 0; - this.flags = 0; } update(input: Uint8Array): void { @@ -205,17 +207,18 @@ class ChunkState { constructor(key_words: StaticArray, chunk_counter: u64, flags: u32) { this.chaining_value = new StaticArray(8); - for (let i = 0; i < 8; i++) { - this.chaining_value[i] = key_words[i]; - } this.chunk_counter = chunk_counter; this.block = new Uint8Array(BLOCK_LEN); this.block_len = 0; this.blocks_compressed = 0; this.flags = flags; + + for (let i = 0; i < 8; i++) { + this.chaining_value[i] = key_words[i]; + } } - len(): usize { + len(): i32 { return BLOCK_LEN * this.blocks_compressed + this.block_len; } @@ -249,7 +252,7 @@ class ChunkState { for (let i = 0; i < take; i++) { this.block[this.block_len + i] = input[inputPos + i]; } - this.block_len += take; + this.block_len += take as u8; inputPos += take; } } @@ -352,6 +355,20 @@ function parent_cv( return parent_output(left_child_cv, right_child_cv, key_words, flags).chaining_value(); } -export function createBlake3Hasher(): Blake3Hasher { - return new Blake3Hasher(); +export function blake3(input: Uint8Array): Uint8Array { + const hasher = new Blake3Hasher(); + hasher.update(input); + const output = new Uint8Array(32); + hasher.finalize(output); + return output; +} + +export function blake3Hex(input: Uint8Array): string { + const hash = blake3(input); + const hex = new Array(64); + for (let i = 0; i < 32; i++) { + hex[i * 2] = (hash[i] >> 4).toString(16); + hex[i * 2 + 1] = (hash[i] & 0x0f).toString(16); + } + return hex.join(""); } diff --git a/packages/blake3-wasm/tests/index.js b/packages/blake3-wasm/tests/index.js new file mode 100644 index 0000000000..8f5b95b9f6 --- /dev/null +++ b/packages/blake3-wasm/tests/index.js @@ -0,0 +1,163 @@ +// Adapted from https://github.com/mcmilk/BLAKE3-tests/blob/11a8abeceac93b5eba664eae3679efb4ffa5bc0a/blake3_test.c + +import { blake3Hex } from "../build/debug.js"; + +const buffer = new Uint8Array(102400); +let i = 0; +let j = 0; + +for (i = 0, j = 0; i < buffer.length; i++, j++) { + if (j === 251) { + j = 0; + } + buffer[i] = j; +} + +const testCases = [ + { + buf: buffer.slice(0, 0), + expected: "af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262", + }, + { + buf: buffer.slice(0, 1), + expected: "2d3adedff11b61f14c886e35afa036736dcd87a74d27b5c1510225d0f592e213", + }, + { + buf: buffer.slice(0, 2), + expected: "7b7015bb92cf0b318037702a6cdd81dee41224f734684c2c122cd6359cb1ee63", + }, + { + buf: buffer.slice(0, 3), + expected: "e1be4d7a8ab5560aa4199eea339849ba8e293d55ca0a81006726d184519e647f", + }, + { + buf: buffer.slice(0, 4), + expected: "f30f5ab28fe047904037f77b6da4fea1e27241c5d132638d8bedce9d40494f32", + }, + { + buf: buffer.slice(0, 5), + expected: "b40b44dfd97e7a84a996a91af8b85188c66c126940ba7aad2e7ae6b385402aa2", + }, + { + buf: buffer.slice(0, 6), + expected: "06c4e8ffb6872fad96f9aaca5eee1553eb62aed0ad7198cef42e87f6a616c844", + }, + { + buf: buffer.slice(0, 7), + expected: "3f8770f387faad08faa9d8414e9f449ac68e6ff0417f673f602a646a891419fe", + }, + { + buf: buffer.slice(0, 8), + expected: "2351207d04fc16ade43ccab08600939c7c1fa70a5c0aaca76063d04c3228eaeb", + }, + { + buf: buffer.slice(0, 63), + expected: "e9bc37a594daad83be9470df7f7b3798297c3d834ce80ba85d6e207627b7db7b", + }, + { + buf: buffer.slice(0, 64), + expected: "4eed7141ea4a5cd4b788606bd23f46e212af9cacebacdc7d1f4c6dc7f2511b98", + }, + { + buf: buffer.slice(0, 65), + expected: "de1e5fa0be70df6d2be8fffd0e99ceaa8eb6e8c93a63f2d8d1c30ecb6b263dee", + }, + { + buf: buffer.slice(0, 127), + expected: "d81293fda863f008c09e92fc382a81f5a0b4a1251cba1634016a0f86a6bd640d", + }, + { + buf: buffer.slice(0, 128), + expected: "f17e570564b26578c33bb7f44643f539624b05df1a76c81f30acd548c44b45ef", + }, + { + buf: buffer.slice(0, 129), + expected: "683aaae9f3c5ba37eaaf072aed0f9e30bac0865137bae68b1fde4ca2aebdcb12", + }, + { + buf: buffer.slice(0, 1023), + expected: "10108970eeda3eb932baac1428c7a2163b0e924c9a9e25b35bba72b28f70bd1", + }, + { + buf: buffer.slice(0, 1024), + expected: "42214739f095a406f3fc83deb889744ac00df831c10daa55189b5d121c855af", + }, + { + buf: buffer.slice(0, 1025), + expected: "d00278ae47eb27b34faecf67b4fe263f82d5412916c1ffd97c8cb7fb814b844", + }, + { + buf: buffer.slice(0, 2048), + expected: "e776b6028c7cd22a4d0ba182a8bf62205d2ef576467e838ed6f2529b85fba24a", + }, + { + buf: buffer.slice(0, 2049), + expected: "5f4d72f40d7a5f82b15ca2b2e44b1de3c2ef86c426c95c1af0b6879522563030", + }, + { + buf: buffer.slice(0, 3072), + expected: "b98cb0ff3623be03326b373de6b9095218513e64f1ee2edd2525c7ad1e5cffd2", + }, + { + buf: buffer.slice(0, 3073), + expected: "7124b49501012f81cc7f11ca069ec9226cecb8a2c850cfe644e327d22d3e1cd3", + }, + { + buf: buffer.slice(0, 4096), + expected: "015094013f57a5277b59d8475c0501042c0b642e531b0a1c8f58d2163229e969", + }, + { + buf: buffer.slice(0, 4097), + expected: "9b4052b38f1c5fc8b1f9ff7ac7b27cd242487b3d890d15c96a1c25b8aa0fb995", + }, + { + buf: buffer.slice(0, 5120), + expected: "9cadc15fed8b5d854562b26a9536d9707cadeda9b143978f319ab34230535833a", + }, + { + buf: buffer.slice(0, 5121), + expected: "628bd2cb2004694adaab7bbd778a25df25c47b9d4155a55f8fbd79f2fe154cff9", + }, + { + buf: buffer.slice(0, 6144), + expected: "3e2e5b74e048f3add6d21faab3f83aa44d3b2278afb83b80b3c35164ebeca205", + }, + { + buf: buffer.slice(0, 6145), + expected: "f1323a8631446cc50536a9f705ee5cb619424d46887f3c376c695b70e0f0507f", + }, + { + buf: buffer.slice(0, 7168), + expected: "61da957ec2499a95d6b8023e2b0e604ec7f6b50e80a9678b89d2628e99ada77a", + }, + { + buf: buffer.slice(0, 7169), + expected: "a003fc7a51754a9b3c7fae0367ab3d782dccf28855a03d435f8cfe74605e7817", + }, + { + buf: buffer.slice(0, 8192), + expected: "aae792484c8efe4f19e2ca7d371d8c467ffb10748d8a5a1ae579948f718a2a63", + }, + { + buf: buffer.slice(0, 8193), + expected: "bab6c09cb8ce8cf459261398d2e7aef35700bf488116ceb94a36d0f5f1b7bc3bb", + }, + { + buf: buffer.slice(0, 102400), + expected: "bc3e3d41a1146b069abffad3c0d44860cf664390afce4d9661f7902e7943e085", + }, +]; + +for (const testCase of testCases) { + const result = blake3Hex(testCase.buf); + console.log(result); + + if (result !== testCase.expected) { + console.error(`Test case failed: ${testCase.buf.length} bytes`); + console.error(`Expected: ${testCase.expected}`); + console.error(`Actual: ${result}`); + process.exit(1); + } +} + +console.log("All test cases passed"); From fef842369290b6b8c5c8d5a72658e6dada525a90 Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Fri, 13 Jun 2025 12:00:18 +0200 Subject: [PATCH 18/18] add rust reference implementation in package --- packages/blake3-wasm/.npmignore | 1 + packages/blake3-wasm/vendor/Cargo.lock | 7 + packages/blake3-wasm/vendor/Cargo.toml | 12 + packages/blake3-wasm/vendor/README.md | 27 ++ packages/blake3-wasm/vendor/src/blake3.rs | 376 ++++++++++++++++++ packages/blake3-wasm/vendor/src/lib.rs | 3 + packages/blake3-wasm/vendor/src/main.rs | 22 + packages/blake3-wasm/vendor/target/.gitignore | 2 + 8 files changed, 450 insertions(+) create mode 100644 packages/blake3-wasm/.npmignore create mode 100644 packages/blake3-wasm/vendor/Cargo.lock create mode 100644 packages/blake3-wasm/vendor/Cargo.toml create mode 100644 packages/blake3-wasm/vendor/README.md create mode 100644 packages/blake3-wasm/vendor/src/blake3.rs create mode 100644 packages/blake3-wasm/vendor/src/lib.rs create mode 100644 packages/blake3-wasm/vendor/src/main.rs create mode 100644 packages/blake3-wasm/vendor/target/.gitignore diff --git a/packages/blake3-wasm/.npmignore b/packages/blake3-wasm/.npmignore new file mode 100644 index 0000000000..5657f6ea7d --- /dev/null +++ b/packages/blake3-wasm/.npmignore @@ -0,0 +1 @@ +vendor \ No newline at end of file diff --git a/packages/blake3-wasm/vendor/Cargo.lock b/packages/blake3-wasm/vendor/Cargo.lock new file mode 100644 index 0000000000..9f0162bf75 --- /dev/null +++ b/packages/blake3-wasm/vendor/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "blake3-example" +version = "0.1.0" diff --git a/packages/blake3-wasm/vendor/Cargo.toml b/packages/blake3-wasm/vendor/Cargo.toml new file mode 100644 index 0000000000..7f31968ed3 --- /dev/null +++ b/packages/blake3-wasm/vendor/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "blake3-example" +version = "0.1.0" +edition = "2021" + +[lib] +name = "reference_impl" +path = "src/lib.rs" + +[[bin]] +name = "blake3-example" +path = "src/main.rs" \ No newline at end of file diff --git a/packages/blake3-wasm/vendor/README.md b/packages/blake3-wasm/vendor/README.md new file mode 100644 index 0000000000..46cce0d076 --- /dev/null +++ b/packages/blake3-wasm/vendor/README.md @@ -0,0 +1,27 @@ +# BLAKE3 Example + +This is a simple example that demonstrates using the BLAKE3 hash function with empty input. + +## Prerequisites + +- Rust and Cargo installed on your system. You can install them from [rustup.rs](https://rustup.rs/) + +## Running the Example + +1. Open a terminal in this directory +2. Run the following command: + ```bash + cargo run + ``` + +The program will output a 32-byte hash in hexadecimal format. For empty input, the expected output should be: +``` +af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262 +``` + +## What the Code Does + +1. Creates a new BLAKE3 hasher +2. Updates it with empty input +3. Finalizes the hash into a 32-byte buffer +4. Prints the hash in hexadecimal format \ No newline at end of file diff --git a/packages/blake3-wasm/vendor/src/blake3.rs b/packages/blake3-wasm/vendor/src/blake3.rs new file mode 100644 index 0000000000..bc701784f8 --- /dev/null +++ b/packages/blake3-wasm/vendor/src/blake3.rs @@ -0,0 +1,376 @@ +// From https://github.com/BLAKE3-team/BLAKE3/blob/master/reference_impl/reference_impl.rs + +//! This is the reference implementation of BLAKE3. It is used for testing and +//! as a readable example of the algorithms involved. Section 5.1 of [the BLAKE3 +//! spec](https://github.com/BLAKE3-team/BLAKE3-specs/blob/master/blake3.pdf) +//! discusses this implementation. You can render docs for this implementation +//! by running `cargo doc --open` in this directory. +//! +//! # Example +//! +//! ``` +//! let mut hasher = reference_impl::Hasher::new(); +//! hasher.update(b"abc"); +//! hasher.update(b"def"); +//! let mut hash = [0; 32]; +//! hasher.finalize(&mut hash); +//! let mut extended_hash = [0; 500]; +//! hasher.finalize(&mut extended_hash); +//! assert_eq!(hash, extended_hash[..32]); +//! ``` + +use core::cmp::min; + +const OUT_LEN: usize = 32; +const KEY_LEN: usize = 32; +const BLOCK_LEN: usize = 64; +const CHUNK_LEN: usize = 1024; + +const CHUNK_START: u32 = 1 << 0; +const CHUNK_END: u32 = 1 << 1; +const PARENT: u32 = 1 << 2; +const ROOT: u32 = 1 << 3; +const KEYED_HASH: u32 = 1 << 4; +const DERIVE_KEY_CONTEXT: u32 = 1 << 5; +const DERIVE_KEY_MATERIAL: u32 = 1 << 6; + +const IV: [u32; 8] = [ + 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19, +]; + +const MSG_PERMUTATION: [usize; 16] = [2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8]; + +// The mixing function, G, which mixes either a column or a diagonal. +fn g(state: &mut [u32; 16], a: usize, b: usize, c: usize, d: usize, mx: u32, my: u32) { + state[a] = state[a].wrapping_add(state[b]).wrapping_add(mx); + state[d] = (state[d] ^ state[a]).rotate_right(16); + state[c] = state[c].wrapping_add(state[d]); + state[b] = (state[b] ^ state[c]).rotate_right(12); + state[a] = state[a].wrapping_add(state[b]).wrapping_add(my); + state[d] = (state[d] ^ state[a]).rotate_right(8); + state[c] = state[c].wrapping_add(state[d]); + state[b] = (state[b] ^ state[c]).rotate_right(7); +} + +fn round(state: &mut [u32; 16], m: &[u32; 16]) { + // Mix the columns. + g(state, 0, 4, 8, 12, m[0], m[1]); + g(state, 1, 5, 9, 13, m[2], m[3]); + g(state, 2, 6, 10, 14, m[4], m[5]); + g(state, 3, 7, 11, 15, m[6], m[7]); + // Mix the diagonals. + g(state, 0, 5, 10, 15, m[8], m[9]); + g(state, 1, 6, 11, 12, m[10], m[11]); + g(state, 2, 7, 8, 13, m[12], m[13]); + g(state, 3, 4, 9, 14, m[14], m[15]); +} + +fn permute(m: &mut [u32; 16]) { + let mut permuted = [0; 16]; + for i in 0..16 { + permuted[i] = m[MSG_PERMUTATION[i]]; + } + *m = permuted; +} + +fn compress( + chaining_value: &[u32; 8], + block_words: &[u32; 16], + counter: u64, + block_len: u32, + flags: u32, +) -> [u32; 16] { + let counter_low = counter as u32; + let counter_high = (counter >> 32) as u32; + #[rustfmt::skip] + let mut state = [ + chaining_value[0], chaining_value[1], chaining_value[2], chaining_value[3], + chaining_value[4], chaining_value[5], chaining_value[6], chaining_value[7], + IV[0], IV[1], IV[2], IV[3], + counter_low, counter_high, block_len, flags, + ]; + let mut block = *block_words; + + round(&mut state, &block); // round 1 + permute(&mut block); + round(&mut state, &block); // round 2 + permute(&mut block); + round(&mut state, &block); // round 3 + permute(&mut block); + round(&mut state, &block); // round 4 + permute(&mut block); + round(&mut state, &block); // round 5 + permute(&mut block); + round(&mut state, &block); // round 6 + permute(&mut block); + round(&mut state, &block); // round 7 + + for i in 0..8 { + state[i] ^= state[i + 8]; + state[i + 8] ^= chaining_value[i]; + } + state +} + +fn first_8_words(compression_output: [u32; 16]) -> [u32; 8] { + compression_output[0..8].try_into().unwrap() +} + +fn words_from_little_endian_bytes(bytes: &[u8], words: &mut [u32]) { + debug_assert_eq!(bytes.len(), 4 * words.len()); + for (four_bytes, word) in bytes.chunks_exact(4).zip(words) { + *word = u32::from_le_bytes(four_bytes.try_into().unwrap()); + } +} + +// Each chunk or parent node can produce either an 8-word chaining value or, by +// setting the ROOT flag, any number of final output bytes. The Output struct +// captures the state just prior to choosing between those two possibilities. +struct Output { + input_chaining_value: [u32; 8], + block_words: [u32; 16], + counter: u64, + block_len: u32, + flags: u32, +} + +impl Output { + fn chaining_value(&self) -> [u32; 8] { + first_8_words(compress( + &self.input_chaining_value, + &self.block_words, + self.counter, + self.block_len, + self.flags, + )) + } + + fn root_output_bytes(&self, out_slice: &mut [u8]) { + let mut output_block_counter = 0; + for out_block in out_slice.chunks_mut(2 * OUT_LEN) { + let words = compress( + &self.input_chaining_value, + &self.block_words, + output_block_counter, + self.block_len, + self.flags | ROOT, + ); + // The output length might not be a multiple of 4. + for (word, out_word) in words.iter().zip(out_block.chunks_mut(4)) { + out_word.copy_from_slice(&word.to_le_bytes()[..out_word.len()]); + } + output_block_counter += 1; + } + } +} + +struct ChunkState { + chaining_value: [u32; 8], + chunk_counter: u64, + block: [u8; BLOCK_LEN], + block_len: u8, + blocks_compressed: u8, + flags: u32, +} + +impl ChunkState { + fn new(key_words: [u32; 8], chunk_counter: u64, flags: u32) -> Self { + Self { + chaining_value: key_words, + chunk_counter, + block: [0; BLOCK_LEN], + block_len: 0, + blocks_compressed: 0, + flags, + } + } + + fn len(&self) -> usize { + BLOCK_LEN * self.blocks_compressed as usize + self.block_len as usize + } + + fn start_flag(&self) -> u32 { + if self.blocks_compressed == 0 { + CHUNK_START + } else { + 0 + } + } + + fn update(&mut self, mut input: &[u8]) { + while !input.is_empty() { + // If the block buffer is full, compress it and clear it. More + // input is coming, so this compression is not CHUNK_END. + if self.block_len as usize == BLOCK_LEN { + let mut block_words = [0; 16]; + words_from_little_endian_bytes(&self.block, &mut block_words); + self.chaining_value = first_8_words(compress( + &self.chaining_value, + &block_words, + self.chunk_counter, + BLOCK_LEN as u32, + self.flags | self.start_flag(), + )); + self.blocks_compressed += 1; + self.block = [0; BLOCK_LEN]; + self.block_len = 0; + } + + // Copy input bytes into the block buffer. + let want = BLOCK_LEN - self.block_len as usize; + let take = min(want, input.len()); + self.block[self.block_len as usize..][..take].copy_from_slice(&input[..take]); + self.block_len += take as u8; + input = &input[take..]; + } + } + + fn output(&self) -> Output { + let mut block_words = [0; 16]; + words_from_little_endian_bytes(&self.block, &mut block_words); + Output { + input_chaining_value: self.chaining_value, + block_words, + counter: self.chunk_counter, + block_len: self.block_len as u32, + flags: self.flags | self.start_flag() | CHUNK_END, + } + } +} + +fn parent_output( + left_child_cv: [u32; 8], + right_child_cv: [u32; 8], + key_words: [u32; 8], + flags: u32, +) -> Output { + let mut block_words = [0; 16]; + block_words[..8].copy_from_slice(&left_child_cv); + block_words[8..].copy_from_slice(&right_child_cv); + Output { + input_chaining_value: key_words, + block_words, + counter: 0, // Always 0 for parent nodes. + block_len: BLOCK_LEN as u32, // Always BLOCK_LEN (64) for parent nodes. + flags: PARENT | flags, + } +} + +fn parent_cv( + left_child_cv: [u32; 8], + right_child_cv: [u32; 8], + key_words: [u32; 8], + flags: u32, +) -> [u32; 8] { + parent_output(left_child_cv, right_child_cv, key_words, flags).chaining_value() +} + +/// An incremental hasher that can accept any number of writes. +pub struct Hasher { + chunk_state: ChunkState, + key_words: [u32; 8], + cv_stack: [[u32; 8]; 54], // Space for 54 subtree chaining values: + cv_stack_len: u8, // 2^54 * CHUNK_LEN = 2^64 + flags: u32, +} + +impl Hasher { + fn new_internal(key_words: [u32; 8], flags: u32) -> Self { + Self { + chunk_state: ChunkState::new(key_words, 0, flags), + key_words, + cv_stack: [[0; 8]; 54], + cv_stack_len: 0, + flags, + } + } + + /// Construct a new `Hasher` for the regular hash function. + pub fn new() -> Self { + Self::new_internal(IV, 0) + } + + /// Construct a new `Hasher` for the keyed hash function. + pub fn new_keyed(key: &[u8; KEY_LEN]) -> Self { + let mut key_words = [0; 8]; + words_from_little_endian_bytes(key, &mut key_words); + Self::new_internal(key_words, KEYED_HASH) + } + + /// Construct a new `Hasher` for the key derivation function. The context + /// string should be hardcoded, globally unique, and application-specific. + pub fn new_derive_key(context: &str) -> Self { + let mut context_hasher = Self::new_internal(IV, DERIVE_KEY_CONTEXT); + context_hasher.update(context.as_bytes()); + let mut context_key = [0; KEY_LEN]; + context_hasher.finalize(&mut context_key); + let mut context_key_words = [0; 8]; + words_from_little_endian_bytes(&context_key, &mut context_key_words); + Self::new_internal(context_key_words, DERIVE_KEY_MATERIAL) + } + + fn push_stack(&mut self, cv: [u32; 8]) { + self.cv_stack[self.cv_stack_len as usize] = cv; + self.cv_stack_len += 1; + } + + fn pop_stack(&mut self) -> [u32; 8] { + self.cv_stack_len -= 1; + self.cv_stack[self.cv_stack_len as usize] + } + + // Section 5.1.2 of the BLAKE3 spec explains this algorithm in more detail. + fn add_chunk_chaining_value(&mut self, mut new_cv: [u32; 8], mut total_chunks: u64) { + // This chunk might complete some subtrees. For each completed subtree, + // its left child will be the current top entry in the CV stack, and + // its right child will be the current value of `new_cv`. Pop each left + // child off the stack, merge it with `new_cv`, and overwrite `new_cv` + // with the result. After all these merges, push the final value of + // `new_cv` onto the stack. The number of completed subtrees is given + // by the number of trailing 0-bits in the new total number of chunks. + while total_chunks & 1 == 0 { + new_cv = parent_cv(self.pop_stack(), new_cv, self.key_words, self.flags); + total_chunks >>= 1; + } + self.push_stack(new_cv); + } + + /// Add input to the hash state. This can be called any number of times. + pub fn update(&mut self, mut input: &[u8]) { + while !input.is_empty() { + // If the current chunk is complete, finalize it and reset the + // chunk state. More input is coming, so this chunk is not ROOT. + if self.chunk_state.len() == CHUNK_LEN { + let chunk_cv = self.chunk_state.output().chaining_value(); + let total_chunks = self.chunk_state.chunk_counter + 1; + self.add_chunk_chaining_value(chunk_cv, total_chunks); + self.chunk_state = ChunkState::new(self.key_words, total_chunks, self.flags); + } + + // Compress input bytes into the current chunk state. + let want = CHUNK_LEN - self.chunk_state.len(); + let take = min(want, input.len()); + self.chunk_state.update(&input[..take]); + input = &input[take..]; + } + } + + /// Finalize the hash and write any number of output bytes. + pub fn finalize(&self, out_slice: &mut [u8]) { + // Starting with the Output from the current chunk, compute all the + // parent chaining values along the right edge of the tree, until we + // have the root Output. + let mut output = self.chunk_state.output(); + let mut parent_nodes_remaining = self.cv_stack_len as usize; + while parent_nodes_remaining > 0 { + parent_nodes_remaining -= 1; + output = parent_output( + self.cv_stack[parent_nodes_remaining], + output.chaining_value(), + self.key_words, + self.flags, + ); + } + output.root_output_bytes(out_slice); + } +} diff --git a/packages/blake3-wasm/vendor/src/lib.rs b/packages/blake3-wasm/vendor/src/lib.rs new file mode 100644 index 0000000000..874b108ebf --- /dev/null +++ b/packages/blake3-wasm/vendor/src/lib.rs @@ -0,0 +1,3 @@ +mod blake3; + +pub use blake3::*; \ No newline at end of file diff --git a/packages/blake3-wasm/vendor/src/main.rs b/packages/blake3-wasm/vendor/src/main.rs new file mode 100644 index 0000000000..ff9aa8915d --- /dev/null +++ b/packages/blake3-wasm/vendor/src/main.rs @@ -0,0 +1,22 @@ +use std::io::Write; + +fn main() { + // Create a new hasher + let mut hasher = reference_impl::Hasher::new(); + + // Update with empty input + hasher.update(b""); + + // Create a buffer for the output + let mut output = [0u8; 32]; + + // Get the hash + hasher.finalize(&mut output); + + // Print the hash in hex format + let mut stdout = std::io::stdout(); + for byte in output { + write!(stdout, "{:02x}", byte).unwrap(); + } + println!(); +} \ No newline at end of file diff --git a/packages/blake3-wasm/vendor/target/.gitignore b/packages/blake3-wasm/vendor/target/.gitignore new file mode 100644 index 0000000000..d6b7ef32c8 --- /dev/null +++ b/packages/blake3-wasm/vendor/target/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore