Skip to content

Commit e023729

Browse files
authored
Fix revision arg for vLLM tokenizer (#721)
* Fix revision arg for vLLM tokenizer * Add unit test * Update test * Move test repo
1 parent 1a90907 commit e023729

File tree

2 files changed

+41
-1
lines changed

2 files changed

+41
-1
lines changed

src/lighteval/models/vllm/vllm_model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ def _create_auto_tokenizer(self, config: VLLMModelConfig):
204204
config.model_name,
205205
tokenizer_mode="auto",
206206
trust_remote_code=config.trust_remote_code,
207-
tokenizer_revision=config.revision,
207+
revision=config.revision,
208208
)
209209
tokenizer.pad_token = tokenizer.eos_token
210210
return tokenizer

tests/models/vllm/test_vllm_model.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# MIT License
2+
3+
# Copyright (c) 2025 The HuggingFace Team
4+
5+
# Permission is hereby granted, free of charge, to any person obtaining a copy
6+
# of this software and associated documentation files (the "Software"), to deal
7+
# in the Software without restriction, including without limitation the rights
8+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
# copies of the Software, and to permit persons to whom the Software is
10+
# furnished to do so, subject to the following conditions:
11+
12+
# The above copyright notice and this permission notice shall be included in all
13+
# copies or substantial portions of the Software.
14+
15+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
# SOFTWARE.
22+
23+
import unittest
24+
25+
from transformers import AutoTokenizer
26+
27+
from lighteval.models.vllm.vllm_model import VLLMModel, VLLMModelConfig
28+
29+
30+
class TestVLLMTokenizerCreation(unittest.TestCase):
31+
def test_tokenizer_created_with_correct_revision(self):
32+
config = VLLMModelConfig(
33+
model_name="lighteval/different-chat-templates-per-revision", revision="new_chat_template"
34+
)
35+
vllm_tokenizer = VLLMModel.__new__(VLLMModel)._create_auto_tokenizer(config)
36+
tokenizer = AutoTokenizer.from_pretrained(
37+
config.model_name,
38+
revision=config.revision,
39+
)
40+
self.assertEqual(vllm_tokenizer.chat_template, tokenizer.chat_template)

0 commit comments

Comments
 (0)