Skip to content

Commit dc96dad

Browse files
authored
Fix IPEX embedders performance (#52)
1 parent ed85cdb commit dc96dad

File tree

2 files changed

+83
-8
lines changed

2 files changed

+83
-8
lines changed

examples/optimized-embeddings.ipynb

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,16 @@
7575
"cell_type": "markdown",
7676
"metadata": {},
7777
"source": [
78-
"Bi-encoders are implemented as two classes, one encoding the documents and the other encoding the queries. We load our quantized embedding model for both:"
78+
"Bi-encoders are implemented as two classes, one encoding the documents and the other encoding the queries.\n",
79+
"Embedding performance on Intel Hardware depends on the data input strategy. It is recommended to calibrate the batch size and padding strategy to maximize the latency or throughput when embedding.\n",
80+
"\n",
81+
"If the length of the sequences is shorter than the maximum length of the model (for example shorter than 512 for BGE), it is recommended to truncate it to speed up encoding. (via `max_sequence_length` argument)\n",
82+
"Padding can be set to `True` so that each batch is padded to the maximum length (could vary between batches) or to `max_length` that will pad the batch to the maximum set length.\n",
83+
"Varying with batch size and `padding=True` will affect the throughput of the embedding model, as larger batches could be encoded to larger sequences and smaller batches could produce a large number of varying in sizes batches.\n",
84+
"\n",
85+
"Experimentation on your data is key to maximize performance!\n",
86+
"\n",
87+
"We load our quantized embedding model for both:"
7988
]
8089
},
8190
{
@@ -84,7 +93,7 @@
8493
"metadata": {},
8594
"outputs": [],
8695
"source": [
87-
"query_embedder = IPEXSentenceTransformersTextEmbedder(model=\"Intel/bge-small-en-v1.5-rag-int8-static\")"
96+
"query_embedder = IPEXSentenceTransformersTextEmbedder(model=\"Intel/bge-small-en-v1.5-rag-int8-static\", batch_size=1, max_seq_length=512, padding=True)"
8897
]
8998
},
9099
{
@@ -93,7 +102,7 @@
93102
"metadata": {},
94103
"outputs": [],
95104
"source": [
96-
"doc_embedder = IPEXSentenceTransformersDocumentEmbedder(model=\"Intel/bge-small-en-v1.5-rag-int8-static\")"
105+
"doc_embedder = IPEXSentenceTransformersDocumentEmbedder(model=\"Intel/bge-small-en-v1.5-rag-int8-static\", batch_size=32, max_seq_length=512, padding=True)"
97106
]
98107
},
99108
{

fastrag/embedders/ipex_embedder.py

Lines changed: 71 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import Optional, Union
1+
from typing import Dict, List, Optional, Tuple, Union
22

33
from haystack.components.embedders import (
44
SentenceTransformersDocumentEmbedder,
@@ -27,6 +27,8 @@ def __init__(
2727
device: Optional[str] = None,
2828
auth_token: Optional[Secret] = None,
2929
trust_remote_code: bool = False,
30+
max_seq_length: Optional[int] = None,
31+
padding: Optional[bool] = True,
3032
):
3133
import sentence_transformers
3234

@@ -39,6 +41,46 @@ def _load_model(self, model_name_or_path, config, cache_dir, **model_args):
3941
)
4042
self.auto_model.eval()
4143

44+
def tokenize(self, texts: Union[List[str], List[Dict], List[Tuple[str, str]]]):
45+
"""
46+
Override of original st.models.Transformer 'Tokenizes' method to add fixed length tokenization.
47+
"""
48+
output = {}
49+
if isinstance(texts[0], str):
50+
to_tokenize = [texts]
51+
elif isinstance(texts[0], dict):
52+
to_tokenize = []
53+
output["text_keys"] = []
54+
for lookup in texts:
55+
text_key, text = next(iter(lookup.items()))
56+
to_tokenize.append(text)
57+
output["text_keys"].append(text_key)
58+
to_tokenize = [to_tokenize]
59+
else:
60+
batch1, batch2 = [], []
61+
for text_tuple in texts:
62+
batch1.append(text_tuple[0])
63+
batch2.append(text_tuple[1])
64+
to_tokenize = [batch1, batch2]
65+
66+
# strip
67+
to_tokenize = [[str(s).strip() for s in col] for col in to_tokenize]
68+
69+
# Lowercase
70+
if self.do_lower_case:
71+
to_tokenize = [[s.lower() for s in col] for col in to_tokenize]
72+
73+
output.update(
74+
self.tokenizer(
75+
*to_tokenize,
76+
padding=self.padding,
77+
truncation=True,
78+
return_tensors="pt",
79+
max_length=self.max_seq_length,
80+
)
81+
)
82+
return output
83+
4284
class _IPEXSentenceTransformer(sentence_transformers.SentenceTransformer):
4385
def _load_auto_model(
4486
self,
@@ -81,6 +123,10 @@ def device(self):
81123
trust_remote_code=trust_remote_code,
82124
)
83125

126+
if max_seq_length is not None:
127+
self.model._first_module().max_seq_length = max_seq_length
128+
self.model._first_module().padding = padding
129+
84130

85131
def ipex_model_warm_up(self):
86132
"""
@@ -91,31 +137,51 @@ def ipex_model_warm_up(self):
91137
model=self.model,
92138
device=self.device.to_torch_str(),
93139
auth_token=self.token,
140+
max_seq_length=self.max_seq_length,
141+
padding=self.padding,
94142
)
95143

96144

97145
class IPEXSentenceTransformersDocumentEmbedder(SentenceTransformersDocumentEmbedder):
98146
"""
99-
A document embedder that uses IPEX for efficient computation.
147+
A document embedder that uses IPEX backend for efficient computation.
100148
101149
This class extends the base `SentenceTransformersDocumentEmbedder` class and provides an implementation
102150
that utilizes IPEX for faster document embedding computation.
151+
152+
Parameters:
153+
max_seq_length (int, optional): The maximum sequence length of the input documents. Defaults to None.
154+
padding (bool or str, optional): Whether to pad the input documents to the maximum sequence length.
155+
If True, padding is enabled. If False, padding is disabled. If "max_length", padding is enabled
156+
and the input documents are padded to the maximum sequence length. Defaults to True.
157+
**kwargs: Additional keyword arguments to be passed to the base class constructor.
103158
"""
104159

105-
def __init__(self, **kwargs):
160+
def __init__(self, max_seq_length=None, padding=True, **kwargs):
106161
super().__init__(**kwargs)
162+
self.max_seq_length = max_seq_length
163+
self.padding = padding
107164

108165

109166
class IPEXSentenceTransformersTextEmbedder(SentenceTransformersTextEmbedder):
110167
"""
111-
A text embedder that uses IPEX for efficient text embedding.
168+
A text embedder that uses IPEX backend for efficient text embedding.
112169
113170
This class extends the `SentenceTransformersTextEmbedder` class and provides
114171
an implementation that utilizes IPEX for faster and more efficient text embedding.
172+
173+
Parameters:
174+
max_seq_length (int, optional): The maximum sequence length of the input text. Defaults to None.
175+
padding (bool or str, optional): Whether to pad the input documents to the maximum sequence length.
176+
If True, padding is enabled. If False, padding is disabled. If "max_length", padding is enabled
177+
and the input documents are padded to the maximum sequence length. Defaults to True.
178+
**kwargs: Additional keyword arguments to be passed to the parent class.
115179
"""
116180

117-
def __init__(self, **kwargs):
181+
def __init__(self, max_seq_length=None, padding=True, **kwargs):
118182
super().__init__(**kwargs)
183+
self.max_seq_length = max_seq_length
184+
self.padding = padding
119185

120186

121187
IPEXSentenceTransformersDocumentEmbedder.warm_up = ipex_model_warm_up

0 commit comments

Comments
 (0)