Skip to content

Commit 17f4f05

Browse files
codethewebitaismith
authored andcommitted
[BUG]: increase max payload size of log service (Go) (#4534)
The max gRPC payload/response sizes were updated for the Rust log client, but were never updated for the log service. So the `PushLogs` method was still failing when the request was > 4MB. _How are these changes tested?_ - [x] Tests pass locally with `pytest` for python, `yarn test` for js, `cargo test` for rust _Are all docstrings for user-facing APIs updated if required? Do we need to make documentation changes in the [docs section](https://github.com/chroma-core/chroma/tree/main/docs/docs.trychroma.com)?_ n/a checkpoint after adding default checkpoint test passing
1 parent f228cf6 commit 17f4f05

File tree

19 files changed

+778
-75
lines changed

19 files changed

+778
-75
lines changed

clients/js/packages/chromadb-core/src/CollectionConfiguration.ts

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ import {
22
IEmbeddingFunction,
33
EmbeddingFunctionSpace,
44
} from "./embeddings/IEmbeddingFunction";
5-
import { DefaultEmbeddingFunction } from "./embeddings/DefaultEmbeddingFunction";
65
import { Api } from "./generated";
76
export type HnswSpace = EmbeddingFunctionSpace;
87

clients/new-js/packages/ai-embeddings/default-embed/README.md

Whitespace-only changes.
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
{
2+
"name": "@chroma-core/default-embed",
3+
"version": "0.1.0",
4+
"private": false,
5+
"description": "Default embedding function for Chroma",
6+
"main": "dist/cjs/default-embed.cjs",
7+
"types": "dist/default-embed.d.ts",
8+
"module": "dist/default-embed.legacy-esm.js",
9+
"type": "module",
10+
"exports": {
11+
".": {
12+
"import": {
13+
"types": "./dist/default-embed.d.ts",
14+
"default": "./dist/default-embed.mjs"
15+
},
16+
"require": {
17+
"types": "./dist/cjs/default-embed.d.cts",
18+
"default": "./dist/cjs/default-embed.cjs"
19+
}
20+
}
21+
},
22+
"files": [
23+
"src",
24+
"dist"
25+
],
26+
"scripts": {
27+
"clean": "rimraf dist",
28+
"prebuild": "rimraf dist",
29+
"build": "tsup",
30+
"watch": "tsup --watch",
31+
"test": "tsx src/test.ts"
32+
},
33+
"dependencies": {
34+
"@huggingface/transformers": "^3.5.1"
35+
},
36+
"devDependencies": {
37+
"rimraf": "^5.0.0",
38+
"tsup": "^8.3.5"
39+
},
40+
"engines": {
41+
"node": ">=14.17.0"
42+
},
43+
"publishConfig": {
44+
"access": "public"
45+
}
46+
}
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
import { pipeline, ProgressCallback } from "@huggingface/transformers";
2+
3+
export type DType =
4+
| "auto"
5+
| "fp32"
6+
| "fp16"
7+
| "q8"
8+
| "int8"
9+
| "uint8"
10+
| "q4"
11+
| "bnb4"
12+
| "q4f16";
13+
14+
export type Quantization = DType | Record<string, DType>;
15+
16+
interface StoredConfig {
17+
model?: string;
18+
revision?: string;
19+
dtype?: Quantization;
20+
}
21+
22+
export class DefaultEmbeddingFunction {
23+
public readonly name: string = "default";
24+
private readonly model: string;
25+
private readonly revision: string;
26+
private readonly dtype: Quantization | undefined;
27+
private readonly progressCallback: ProgressCallback | undefined = undefined;
28+
29+
constructor(
30+
args: Partial<
31+
StoredConfig & { progressCallback: ProgressCallback | undefined }
32+
> = {},
33+
) {
34+
const {
35+
model = "Xenova/all-MiniLM-L6-v2",
36+
revision = "main",
37+
dtype = undefined,
38+
progressCallback = undefined,
39+
} = args;
40+
41+
this.model = model;
42+
this.revision = revision;
43+
this.dtype = dtype;
44+
this.progressCallback = progressCallback;
45+
}
46+
47+
public async generate(texts: string[]): Promise<number[][]> {
48+
const pipe = await pipeline("feature-extraction", this.model, {
49+
revision: this.revision,
50+
progress_callback: this.progressCallback,
51+
dtype: this.dtype,
52+
});
53+
54+
const output = await pipe(texts, { pooling: "mean", normalize: true });
55+
return output.tolist();
56+
}
57+
58+
public getConfig(): Record<string, any> {
59+
return {
60+
model: this.model,
61+
revision: this.revision,
62+
dtype: this.dtype,
63+
};
64+
}
65+
66+
public buildFromConfig(config: StoredConfig): DefaultEmbeddingFunction {
67+
return new DefaultEmbeddingFunction(config);
68+
}
69+
70+
static buildFromConfig(config: StoredConfig): DefaultEmbeddingFunction {
71+
return new DefaultEmbeddingFunction(config);
72+
}
73+
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
import { DefaultEmbeddingFunction } from "./index";
2+
3+
const main = async () => {
4+
const e = new DefaultEmbeddingFunction();
5+
const x = await e.generate(["hello"]);
6+
console.log(x);
7+
};
8+
9+
main().catch(console.error);
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
{
2+
"extends": "../../../tsconfig.base.json",
3+
"compilerOptions": {
4+
"outDir": "./dist",
5+
"rootDir": "./src",
6+
"baseUrl": "./src",
7+
"stripInternal": true,
8+
"composite": true
9+
},
10+
"include": ["src/**/*"],
11+
"exclude": ["node_modules", "dist", "test", "**/*.test.ts"],
12+
"references": [
13+
{
14+
"path": "../../chromadb"
15+
}
16+
]
17+
}
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
import { defineConfig, Options } from "tsup";
2+
import * as fs from "fs";
3+
4+
export default defineConfig((options: Options) => {
5+
const commonOptions: Partial<Options> = {
6+
entry: {
7+
"default-embed": "src/index.ts",
8+
},
9+
sourcemap: true,
10+
dts: {
11+
compilerOptions: {
12+
composite: false,
13+
declaration: true,
14+
emitDeclarationOnly: false,
15+
},
16+
},
17+
...options,
18+
};
19+
20+
return [
21+
{
22+
...commonOptions,
23+
format: ["esm"],
24+
outExtension: () => ({ js: ".mjs" }),
25+
clean: true,
26+
async onSuccess() {
27+
// Support Webpack 4 by pointing `"module"` to a file with a `.js` extension
28+
fs.copyFileSync(
29+
"dist/default-embed.mjs",
30+
"dist/default-embed.legacy-esm.js",
31+
);
32+
},
33+
},
34+
{
35+
...commonOptions,
36+
format: "cjs",
37+
outDir: "./dist/cjs/",
38+
outExtension: () => ({ js: ".cjs" }),
39+
},
40+
];
41+
});

clients/new-js/packages/ai-embeddings/jina/package.json

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,15 +29,12 @@
2929
"build": "tsup",
3030
"watch": "tsup --watch"
3131
},
32-
"dependencies": {
33-
"chromadb": "workspace:^"
34-
},
3532
"devDependencies": {
3633
"rimraf": "^5.0.0",
3734
"tsup": "^8.3.5"
3835
},
3936
"peerDependencies": {
40-
37+
"chromadb": "workspace:^"
4138
},
4239
"engines": {
4340
"node": ">=14.17.0"

clients/new-js/packages/ai-embeddings/jina/src/index.ts

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -23,18 +23,18 @@ interface JinaRequestBody {
2323
}
2424

2525
export class JinaEmbeddingFunction implements EmbeddingFunction {
26-
name = "jina";
27-
28-
private api_key_env_var: string;
29-
private model_name: string;
30-
private api_url: string;
31-
private headers: { [key: string]: string };
32-
private task: string | undefined;
33-
private late_chunking: boolean | undefined;
34-
private truncate: boolean | undefined;
35-
private dimensions: number | undefined;
36-
private embedding_type: string | undefined;
37-
private normalized: boolean | undefined;
26+
public readonly name = "jina";
27+
28+
private readonly api_key_env_var: string;
29+
private readonly model_name: string;
30+
private readonly api_url: string;
31+
private readonly headers: { [key: string]: string };
32+
private readonly task: string | undefined;
33+
private readonly late_chunking: boolean | undefined;
34+
private readonly truncate: boolean | undefined;
35+
private readonly dimensions: number | undefined;
36+
private readonly embedding_type: string | undefined;
37+
private readonly normalized: boolean | undefined;
3838

3939
constructor({
4040
jinaai_api_key,
@@ -163,4 +163,4 @@ export class JinaEmbeddingFunction implements EmbeddingFunction {
163163
}
164164
}
165165

166-
registerEmbeddingFunction(JinaEmbeddingFunction.prototype);
166+
registerEmbeddingFunction(JinaEmbeddingFunction);

clients/new-js/packages/chromadb/package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,8 @@
6868
"tsd": "^0.28.1",
6969
"tsup": "^8.3.5",
7070
"typescript": "^5.0.4",
71-
"wait-on": "^8.0.3"
71+
"wait-on": "^8.0.3",
72+
"@chroma-core/default-embed": "workspace:^"
7273
},
7374
"optionalDependencies": {
7475
"chromadb-js-bindings-darwin-arm64": "^0.1.1",

clients/new-js/packages/chromadb/src/chroma-client.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import { CollectionMetadata, UserIdentity } from "./types";
99
import { Collection, CollectionImpl } from "./collection";
1010
import {
1111
EmbeddingFunction,
12+
getDefaultEFConfig,
1213
getEmbeddingFunction,
1314
serializeEmbeddingFunction,
1415
} from "./embedding-function";
@@ -126,15 +127,15 @@ export class ChromaClient {
126127
...(configuration || {}),
127128
embedding_function: embeddingFunction
128129
? serializeEmbeddingFunction(embeddingFunction)
129-
: undefined,
130+
: await getDefaultEFConfig(),
130131
};
131132

132133
const { data } = await Api.createCollection({
133134
client: this.apiClient,
134135
path: this.path(),
135136
body: {
136137
name,
137-
// configuration: undefined,
138+
configuration: collectionConfig,
138139
metadata,
139140
get_or_create: false,
140141
},

clients/new-js/packages/chromadb/src/collection-configuration.ts

Whitespace-only changes.

clients/new-js/packages/chromadb/src/embedding-function.ts

Lines changed: 40 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,12 @@ export interface EmbeddingFunction {
1616
validateConfig?(config: Record<string, any>): void;
1717
}
1818

19+
export interface EmbeddingFunctionClass {
20+
new (...args: any[]): EmbeddingFunction;
21+
name: string;
22+
buildFromConfig(config: Record<string, any>): EmbeddingFunction;
23+
}
24+
1925
class MalformedEmbeddingFunction implements EmbeddingFunction {
2026
public readonly name: string;
2127

@@ -29,18 +35,21 @@ class MalformedEmbeddingFunction implements EmbeddingFunction {
2935
}
3036
}
3137

32-
export const knownEmbeddingFunctions = new Map<string, EmbeddingFunction>();
38+
export const knownEmbeddingFunctions = new Map<
39+
string,
40+
EmbeddingFunctionClass
41+
>();
3342

34-
export const registerEmbeddingFunction = (fn: EmbeddingFunction) => {
35-
if (!fn.name) {
36-
throw new Error("Embedding function must have a name to be registered.");
37-
}
38-
if (knownEmbeddingFunctions.has(fn.name)) {
43+
export const registerEmbeddingFunction = (
44+
name: string,
45+
fn: EmbeddingFunctionClass,
46+
) => {
47+
if (knownEmbeddingFunctions.has(name)) {
3948
throw new Error(
40-
`Embedding function with name ${fn.name} is already registered.`,
49+
`Embedding function with name ${name} is already registered.`,
4150
);
4251
}
43-
knownEmbeddingFunctions.set(fn.name, fn);
52+
knownEmbeddingFunctions.set(name, fn);
4453
};
4554

4655
export const getEmbeddingFunction = (
@@ -65,7 +74,7 @@ export const getEmbeddingFunction = (
6574
if (!embeddingFunction) {
6675
return new MalformedEmbeddingFunction(
6776
collectionName,
68-
`Embedding function ${name} is not registered. Make sure that the @ai-embeddings/${name} package is installed`,
77+
`Embedding function ${name} is not registered. Make sure that the @chroma-core/${name} package is installed`,
6978
);
7079
}
7180

@@ -89,12 +98,8 @@ export const getEmbeddingFunction = (
8998
};
9099

91100
export const serializeEmbeddingFunction = (
92-
ef?: EmbeddingFunction,
101+
ef: EmbeddingFunction,
93102
): EmbeddingFunctionConfiguration => {
94-
if (!ef) {
95-
return { type: "legacy" };
96-
}
97-
98103
if (!ef.getConfig || !ef.name) {
99104
throw new Error(
100105
"Failed to serialize embedding function: missing 'getConfig' or 'name'",
@@ -108,3 +113,24 @@ export const serializeEmbeddingFunction = (
108113
config: ef.getConfig(),
109114
};
110115
};
116+
117+
export const getDefaultEFConfig =
118+
async (): Promise<EmbeddingFunctionConfiguration> => {
119+
try {
120+
const { DefaultEmbeddingFunction } = await import(
121+
"@chroma-core/default-embed"
122+
);
123+
if (!knownEmbeddingFunctions.has(new DefaultEmbeddingFunction().name)) {
124+
registerEmbeddingFunction("default", DefaultEmbeddingFunction);
125+
}
126+
} catch (e) {
127+
throw new Error(
128+
"Cannot instantiate a collection with the DefaultEmbeddingFunction. Please install @chroma-core/default-embed, or provide a different embedding function",
129+
);
130+
}
131+
return {
132+
name: "default",
133+
type: "known",
134+
config: {},
135+
};
136+
};

clients/new-js/packages/chromadb/src/utils.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ const validateEmbeddings = ({
119119
);
120120
}
121121

122-
if (embeddings.filter((e) => e.every((n: any) => typeof n === "number"))) {
122+
if (!embeddings.filter((e) => e.every((n: any) => typeof n === "number"))) {
123123
throw new Error("Expected each embedding to be an array of numbers");
124124
}
125125

0 commit comments

Comments
 (0)