From 6e54cd9d5a7397a33c3dce09ad8073dd26e705f0 Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Thu, 13 Mar 2025 02:24:25 +0000 Subject: [PATCH 1/2] Add support for gemma 3 (text) --- README.md | 1 + docs/snippets/6_supported-models.snippet | 1 + src/configs.js | 2 ++ src/models.js | 23 +++++++++++++++++++++-- 4 files changed, 25 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 6fe4f7d9c..c9435b7c4 100644 --- a/README.md +++ b/README.md @@ -330,6 +330,7 @@ You can refine your search by selecting the task you're interested in (e.g., [te 1. **Florence2** (from Microsoft) released with the paper [Florence-2: Advancing a Unified Representation for a Variety of Vision Tasks](https://arxiv.org/abs/2311.06242) by Bin Xiao, Haiping Wu, Weijian Xu, Xiyang Dai, Houdong Hu, Yumao Lu, Michael Zeng, Ce Liu, Lu Yuan. 1. **[Gemma](https://huggingface.co/docs/transformers/main/model_doc/gemma)** (from Google) released with the paper [Gemma: Open Models Based on Gemini Technology and Research](https://blog.google/technology/developers/gemma-open-models/) by the Gemma Google team. 1. **[Gemma2](https://huggingface.co/docs/transformers/main/model_doc/gemma2)** (from Google) released with the paper [Gemma2: Open Models Based on Gemini Technology and Research](https://blog.google/technology/developers/google-gemma-2/) by the Gemma Google team. +1. **[Gemma3](https://huggingface.co/docs/transformers/main/model_doc/gemma3)** (from Google) released with the paper [Introducing Gemma 3: The most capable model you can run on a single GPU or TPU](https://blog.google/technology/developers/gemma-3/) by the Gemma Google team. 1. **[GLM](https://huggingface.co/docs/transformers/main/model_doc/glm)** (from the GLM Team, THUDM & ZhipuAI) released with the paper [ChatGLM: A Family of Large Language Models from GLM-130B to GLM-4 All Tools](https://arxiv.org/abs/2406.12793v2) by Team GLM: Aohan Zeng, Bin Xu, Bowen Wang, Chenhui Zhang, Da Yin, Dan Zhang, Diego Rojas, Guanyu Feng, Hanlin Zhao, Hanyu Lai, Hao Yu, Hongning Wang, Jiadai Sun, Jiajie Zhang, Jiale Cheng, Jiayi Gui, Jie Tang, Jing Zhang, Jingyu Sun, Juanzi Li, Lei Zhao, Lindong Wu, Lucen Zhong, Mingdao Liu, Minlie Huang, Peng Zhang, Qinkai Zheng, Rui Lu, Shuaiqi Duan, Shudan Zhang, Shulin Cao, Shuxun Yang, Weng Lam Tam, Wenyi Zhao, Xiao Liu, Xiao Xia, Xiaohan Zhang, Xiaotao Gu, Xin Lv, Xinghan Liu, Xinyi Liu, Xinyue Yang, Xixuan Song, Xunkai Zhang, Yifan An, Yifan Xu, Yilin Niu, Yuantao Yang, Yueyan Li, Yushi Bai, Yuxiao Dong, Zehan Qi, Zhaoyu Wang, Zhen Yang, Zhengxiao Du, Zhenyu Hou, Zihan Wang. 1. **[GLPN](https://huggingface.co/docs/transformers/model_doc/glpn)** (from KAIST) released with the paper [Global-Local Path Networks for Monocular Depth Estimation with Vertical CutDepth](https://arxiv.org/abs/2201.07436) by Doyeon Kim, Woonghyun Ga, Pyungwhan Ahn, Donggyu Joo, Sehwan Chun, Junmo Kim. 1. **[GPT Neo](https://huggingface.co/docs/transformers/model_doc/gpt_neo)** (from EleutherAI) released in the repository [EleutherAI/gpt-neo](https://github.com/EleutherAI/gpt-neo) by Sid Black, Stella Biderman, Leo Gao, Phil Wang and Connor Leahy. diff --git a/docs/snippets/6_supported-models.snippet b/docs/snippets/6_supported-models.snippet index 2e4d5b141..b00d85699 100644 --- a/docs/snippets/6_supported-models.snippet +++ b/docs/snippets/6_supported-models.snippet @@ -44,6 +44,7 @@ 1. **Florence2** (from Microsoft) released with the paper [Florence-2: Advancing a Unified Representation for a Variety of Vision Tasks](https://arxiv.org/abs/2311.06242) by Bin Xiao, Haiping Wu, Weijian Xu, Xiyang Dai, Houdong Hu, Yumao Lu, Michael Zeng, Ce Liu, Lu Yuan. 1. **[Gemma](https://huggingface.co/docs/transformers/main/model_doc/gemma)** (from Google) released with the paper [Gemma: Open Models Based on Gemini Technology and Research](https://blog.google/technology/developers/gemma-open-models/) by the Gemma Google team. 1. **[Gemma2](https://huggingface.co/docs/transformers/main/model_doc/gemma2)** (from Google) released with the paper [Gemma2: Open Models Based on Gemini Technology and Research](https://blog.google/technology/developers/google-gemma-2/) by the Gemma Google team. +1. **[Gemma3](https://huggingface.co/docs/transformers/main/model_doc/gemma3)** (from Google) released with the paper [Introducing Gemma 3: The most capable model you can run on a single GPU or TPU](https://blog.google/technology/developers/gemma-3/) by the Gemma Google team. 1. **[GLM](https://huggingface.co/docs/transformers/main/model_doc/glm)** (from the GLM Team, THUDM & ZhipuAI) released with the paper [ChatGLM: A Family of Large Language Models from GLM-130B to GLM-4 All Tools](https://arxiv.org/abs/2406.12793v2) by Team GLM: Aohan Zeng, Bin Xu, Bowen Wang, Chenhui Zhang, Da Yin, Dan Zhang, Diego Rojas, Guanyu Feng, Hanlin Zhao, Hanyu Lai, Hao Yu, Hongning Wang, Jiadai Sun, Jiajie Zhang, Jiale Cheng, Jiayi Gui, Jie Tang, Jing Zhang, Jingyu Sun, Juanzi Li, Lei Zhao, Lindong Wu, Lucen Zhong, Mingdao Liu, Minlie Huang, Peng Zhang, Qinkai Zheng, Rui Lu, Shuaiqi Duan, Shudan Zhang, Shulin Cao, Shuxun Yang, Weng Lam Tam, Wenyi Zhao, Xiao Liu, Xiao Xia, Xiaohan Zhang, Xiaotao Gu, Xin Lv, Xinghan Liu, Xinyi Liu, Xinyue Yang, Xixuan Song, Xunkai Zhang, Yifan An, Yifan Xu, Yilin Niu, Yuantao Yang, Yueyan Li, Yushi Bai, Yuxiao Dong, Zehan Qi, Zhaoyu Wang, Zhen Yang, Zhengxiao Du, Zhenyu Hou, Zihan Wang. 1. **[GLPN](https://huggingface.co/docs/transformers/model_doc/glpn)** (from KAIST) released with the paper [Global-Local Path Networks for Monocular Depth Estimation with Vertical CutDepth](https://arxiv.org/abs/2201.07436) by Doyeon Kim, Woonghyun Ga, Pyungwhan Ahn, Donggyu Joo, Sehwan Chun, Junmo Kim. 1. **[GPT Neo](https://huggingface.co/docs/transformers/model_doc/gpt_neo)** (from EleutherAI) released in the repository [EleutherAI/gpt-neo](https://github.com/EleutherAI/gpt-neo) by Sid Black, Stella Biderman, Leo Gao, Phil Wang and Connor Leahy. diff --git a/src/configs.js b/src/configs.js index ad56509cb..dccf6add5 100644 --- a/src/configs.js +++ b/src/configs.js @@ -67,6 +67,7 @@ function getNormalizedConfig(config) { // Sub-configs case 'llava': case 'paligemma': + case 'gemma3': case 'florence2': case 'llava_onevision': case 'idefics3': @@ -126,6 +127,7 @@ function getNormalizedConfig(config) { break; case 'gemma': case 'gemma2': + case 'gemma3_text': case 'glm': case 'helium': mapping['num_heads'] = 'num_key_value_heads'; diff --git a/src/models.js b/src/models.js index 0617ba51b..c4a707e0b 100644 --- a/src/models.js +++ b/src/models.js @@ -594,8 +594,8 @@ async function decoderForward(self, model_inputs, is_encoder_decoder = false) { new_model_inputs.use_cache_branch = boolTensor(!!past_key_values); } if (session.inputNames.includes('position_ids') && new_model_inputs.attention_mask && !new_model_inputs.position_ids) { - // NOTE: Handle a special case for paligemma models, where positions are 1-indexed - const start_index = self.config.model_type === 'paligemma' ? 1 : 0; + // NOTE: Handle a special case for paligemma/gemma3 models, where positions are 1-indexed + const start_index = ['paligemma', 'gemma3_text', 'gemma3'].includes(self.config.model_type) ? 1 : 0; new_model_inputs.position_ids = createPositionIds(new_model_inputs, past_key_values, start_index); } @@ -4520,6 +4520,23 @@ export class Gemma2Model extends Gemma2PreTrainedModel { } export class Gemma2ForCausalLM extends Gemma2PreTrainedModel { } ////////////////////////////////////////////////// + +////////////////////////////////////////////////// +// Gemma3 models + +/** + * The bare Gemma3 Model outputting raw hidden-states without any specific head on top. + */ +export class Gemma3PreTrainedModel extends PreTrainedModel { } +/** + * The bare Gemma3 Model outputting raw hidden-states without any specific head on top. + */ +export class Gemma3Model extends Gemma3PreTrainedModel { } + +export class Gemma3ForCausalLM extends Gemma3PreTrainedModel { } +////////////////////////////////////////////////// + + ////////////////////////////////////////////////// export class OpenELMPreTrainedModel extends PreTrainedModel { } export class OpenELMModel extends OpenELMPreTrainedModel { } @@ -7488,6 +7505,7 @@ const MODEL_MAPPING_NAMES_DECODER_ONLY = new Map([ ['cohere', ['CohereModel', CohereModel]], ['gemma', ['GemmaModel', GemmaModel]], ['gemma2', ['Gemma2Model', Gemma2Model]], + ['gemma3_text', ['Gemma3Model', Gemma3Model]], ['helium', ['HeliumModel', HeliumModel]], ['glm', ['GlmModel', GlmModel]], ['openelm', ['OpenELMModel', OpenELMModel]], @@ -7587,6 +7605,7 @@ const MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = new Map([ ['cohere', ['CohereForCausalLM', CohereForCausalLM]], ['gemma', ['GemmaForCausalLM', GemmaForCausalLM]], ['gemma2', ['Gemma2ForCausalLM', Gemma2ForCausalLM]], + ['gemma3_text', ['Gemma3ForCausalLM', Gemma3ForCausalLM]], ['helium', ['HeliumForCausalLM', HeliumForCausalLM]], ['glm', ['GlmForCausalLM', GlmForCausalLM]], ['openelm', ['OpenELMForCausalLM', OpenELMForCausalLM]], From 2f0855aceaf9130df0e008ec7bc5369f32f5b2d1 Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Thu, 13 Mar 2025 02:24:46 +0000 Subject: [PATCH 2/2] Fix tensor slicing past boundaries --- src/utils/tensor.js | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/utils/tensor.js b/src/utils/tensor.js index 07b74ac3a..357aba9f7 100644 --- a/src/utils/tensor.js +++ b/src/utils/tensor.js @@ -1182,8 +1182,12 @@ function calc_unsqueeze_dims(dims, dim) { * @private */ function safeIndex(index, size, dimension = null, boundsCheck = true) { - if (boundsCheck && (index < -size || index >= size)) { - throw new Error(`IndexError: index ${index} is out of bounds for dimension${dimension === null ? '' : ' ' + dimension} with size ${size}`); + if (index < -size || index >= size) { + if (boundsCheck) { + throw new Error(`IndexError: index ${index} is out of bounds for dimension${dimension === null ? '' : ' ' + dimension} with size ${size}`); + } else { + return index < -size ? 0 : size; + } } if (index < 0) {