Skip to content

Commit d0fe828

Browse files
authored
Implement LRU cache for BPE tokenizer (#1283)
* Implement simple LRU cache * Utilize LRU cache in BPE tokenizer * Add LRUCache unit tests * Add memory leak unit test
1 parent 6d47745 commit d0fe828

File tree

4 files changed

+164
-5
lines changed

4 files changed

+164
-5
lines changed

src/tokenizers.js

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ import {
4343
TokenLattice,
4444
CharTrie,
4545
DictionarySplitter,
46+
LRUCache,
4647
} from './utils/data-structures.js';
4748

4849
import { Template } from '@huggingface/jinja';
@@ -727,8 +728,24 @@ class BPE extends TokenizerModel {
727728

728729
this.ignore_merges = this.config.ignore_merges ?? false;
729730

730-
/** @type {Map<string, string[]>} */
731-
this.cache = new Map();
731+
/**
732+
* The maximum length we should cache in a model.
733+
* Strings that are too long have minimal chances to cache hit anyway
734+
*/
735+
this.max_length_to_cache = 256;
736+
737+
/**
738+
* The default capacity for a `BPE`'s internal cache.
739+
*/
740+
this.cache_capacity = 10000;
741+
this.cache = new LRUCache(this.cache_capacity);
742+
}
743+
744+
/**
745+
* Clears the cache.
746+
*/
747+
clear_cache() {
748+
this.cache.clear();
732749
}
733750

734751
/**
@@ -855,8 +872,10 @@ class BPE extends TokenizerModel {
855872
}
856873
}
857874

858-
// Save the result to the cache
859-
this.cache.set(token, result);
875+
if (token.length < this.max_length_to_cache) {
876+
// Save the result to the cache
877+
this.cache.put(token, result);
878+
}
860879

861880
return result;
862881
}

src/utils/data-structures.js

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -519,3 +519,56 @@ export class DictionarySplitter {
519519
return result;
520520
}
521521
}
522+
523+
/**
524+
* A simple Least Recently Used (LRU) cache implementation in JavaScript.
525+
* This cache stores key-value pairs and evicts the least recently used item
526+
* when the capacity is exceeded.
527+
*/
528+
export class LRUCache {
529+
/**
530+
* Creates an LRUCache instance.
531+
* @param {number} capacity The maximum number of items the cache can hold.
532+
*/
533+
constructor(capacity) {
534+
this.capacity = capacity;
535+
this.cache = new Map();
536+
}
537+
538+
/**
539+
* Retrieves the value associated with the given key and marks the key as recently used.
540+
* @param {any} key The key to retrieve.
541+
* @returns {any} The value associated with the key, or undefined if the key does not exist.
542+
*/
543+
get(key) {
544+
if (!this.cache.has(key)) return undefined;
545+
const value = this.cache.get(key);
546+
this.cache.delete(key);
547+
this.cache.set(key, value);
548+
return value;
549+
}
550+
551+
/**
552+
* Inserts or updates the key-value pair in the cache.
553+
* If the key already exists, it is updated and marked as recently used.
554+
* If the cache exceeds its capacity, the least recently used item is evicted.
555+
* @param {any} key The key to add or update.
556+
* @param {any} value The value to associate with the key.
557+
*/
558+
put(key, value) {
559+
if (this.cache.has(key)) {
560+
this.cache.delete(key);
561+
}
562+
this.cache.set(key, value);
563+
if (this.cache.size > this.capacity) {
564+
this.cache.delete(this.cache.keys().next().value);
565+
}
566+
}
567+
568+
/**
569+
* Clears the cache.
570+
*/
571+
clear() {
572+
this.cache.clear();
573+
}
574+
}

tests/tokenizers.test.js

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,23 @@ describe("Edge cases", () => {
302302
}, 5000); // NOTE: 5 seconds
303303
});
304304

305+
describe("Memory leak tests", () => {
306+
it("should not leak memory", async () => {
307+
const id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0";
308+
const tokenizer = await AutoTokenizer.from_pretrained(id);
309+
310+
const startMemory = process.memoryUsage().heapUsed;
311+
for (let i = 0; i < 2500; ++i) {
312+
const s = `${i} ${i} `.repeat(i);
313+
tokenizer.encode(s);
314+
}
315+
const endMemory = process.memoryUsage().heapUsed;
316+
const memoryUsed = endMemory - startMemory;
317+
const memoryLimit = 100 * 1024 * 1024; // 100 MB
318+
expect(memoryUsed).toBeLessThan(memoryLimit);
319+
}, 30000); // Increase timeout to accommodate the memory leak test
320+
});
321+
305322
describe("Extra decoding tests", () => {
306323
it(
307324
"should be able to decode the output of encode",

tests/utils/data_structures.test.js

Lines changed: 71 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { PriorityQueue, DictionarySplitter } from "../../src/utils/data-structures.js";
1+
import { PriorityQueue, DictionarySplitter, LRUCache } from "../../src/utils/data-structures.js";
22

33
describe("Priority queue", () => {
44
const EXAMPLE_ARRAY = [2, 5, 3, 1, 4];
@@ -49,3 +49,73 @@ describe("Dictionary splitter", () => {
4949
expect(result).toEqual(expected);
5050
});
5151
});
52+
53+
describe("LRUCache", () => {
54+
it("should return undefined for non-existent keys", () => {
55+
const cache = new LRUCache(2);
56+
expect(cache.get("nonexistent")).toEqual(undefined);
57+
});
58+
59+
it("should store and retrieve values correctly", () => {
60+
const cache = new LRUCache(2);
61+
cache.put("a", 1);
62+
cache.put("b", 2);
63+
expect(cache.get("a")).toEqual(1);
64+
expect(cache.get("b")).toEqual(2);
65+
});
66+
67+
it("should update the value and refresh the usage", () => {
68+
const cache = new LRUCache(2);
69+
cache.put("a", 1);
70+
cache.put("b", 2);
71+
// Update key "a"
72+
cache.put("a", 10);
73+
expect(cache.get("a")).toEqual(10);
74+
// Access "a" so "b" becomes the LRU
75+
cache.get("a");
76+
cache.put("c", 3);
77+
// "b" should be evicted since it is the least recently used.
78+
expect(cache.get("b")).toEqual(undefined);
79+
expect(cache.get("c")).toEqual(3);
80+
});
81+
82+
it("should evict the least recently used item when capacity is exceeded", () => {
83+
const cache = new LRUCache(3);
84+
cache.put("a", 1);
85+
cache.put("b", 2);
86+
cache.put("c", 3);
87+
// Access "a" to refresh its recentness.
88+
cache.get("a");
89+
// Insert a new key, this should evict "b" as it is the least recently used.
90+
cache.put("d", 4);
91+
expect(cache.get("b")).toEqual(undefined);
92+
expect(cache.get("a")).toEqual(1);
93+
expect(cache.get("c")).toEqual(3);
94+
expect(cache.get("d")).toEqual(4);
95+
});
96+
97+
it("should update the usage order on get", () => {
98+
const cache = new LRUCache(3);
99+
cache.put("a", "apple");
100+
cache.put("b", "banana");
101+
cache.put("c", "cherry");
102+
// Access "a" making it most recently used.
103+
expect(cache.get("a")).toEqual("apple");
104+
// Insert new element to evict the least recently used ("b").
105+
cache.put("d", "date");
106+
expect(cache.get("b")).toEqual(undefined);
107+
// "a", "c", and "d" should be present.
108+
expect(cache.get("a")).toEqual("apple");
109+
expect(cache.get("c")).toEqual("cherry");
110+
expect(cache.get("d")).toEqual("date");
111+
});
112+
113+
it("should clear the cache", () => {
114+
const cache = new LRUCache(2);
115+
cache.put("a", 1);
116+
cache.put("b", 2);
117+
cache.clear();
118+
expect(cache.get("a")).toEqual(undefined);
119+
expect(cache.get("b")).toEqual(undefined);
120+
});
121+
});

0 commit comments

Comments
 (0)