Skip to content

Commit ff80e9f

Browse files
committed
feat: make post proc sequence elements mutable
1 parent c541463 commit ff80e9f

File tree

8 files changed

+349
-131
lines changed

8 files changed

+349
-131
lines changed

bindings/python/src/normalizers.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -609,6 +609,28 @@ impl PyReplace {
609609
ToPyResult(Replace::new(pattern, content)).into_py()?.into(),
610610
))
611611
}
612+
613+
#[getter]
614+
fn get_pattern(_self: PyRef<Self>) -> PyResult<()> {
615+
Err(PyException::new_err("Cannot get pattern"))
616+
}
617+
618+
#[setter]
619+
fn set_pattern(_self: PyRef<Self>, _pattern: PyPattern) -> PyResult<()> {
620+
Err(PyException::new_err(
621+
"Cannot set pattern, please instantiate a new replace pattern instead",
622+
))
623+
}
624+
625+
#[getter]
626+
fn get_content(self_: PyRef<Self>) -> String {
627+
getter!(self_, Replace, content)
628+
}
629+
630+
#[setter]
631+
fn set_content(self_: PyRef<Self>, content: String) {
632+
setter!(self_, Replace, content, content)
633+
}
612634
}
613635

614636
#[derive(Clone, Debug)]

bindings/python/src/pre_tokenizers.rs

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -422,9 +422,7 @@ impl PySplit {
422422

423423
#[getter]
424424
fn get_behavior(self_: PyRef<Self>) -> String {
425-
getter!(self_, Punctuation, behavior)
426-
.to_string()
427-
.to_lowercase()
425+
getter!(self_, Split, behavior).to_string().to_lowercase()
428426
}
429427

430428
#[setter]
@@ -442,7 +440,7 @@ impl PySplit {
442440
))
443441
}
444442
};
445-
setter!(self_, Punctuation, behavior, behavior);
443+
setter!(self_, Split, behavior, behavior);
446444
Ok(())
447445
}
448446

@@ -586,24 +584,25 @@ impl PySequence {
586584
"Index not found",
587585
)),
588586
},
589-
PyPreTokenizerTypeWrapper::Single(inner) => {
590-
PyPreTokenizer::new(PyPreTokenizerTypeWrapper::Single(inner.clone()))
591-
.get_as_subtype(py)
592-
}
587+
_ => Err(PyErr::new::<pyo3::exceptions::PyIndexError, _>(
588+
"This processor is not a Sequence, it does not support __getitem__",
589+
)),
593590
}
594591
}
595592

596593
fn __setitem__(self_: PyRef<'_, Self>, index: usize, value: Bound<'_, PyAny>) -> PyResult<()> {
597594
let pretok: PyPreTokenizer = value.extract()?;
598-
let PyPreTokenizerTypeWrapper::Single(norm) = pretok.pretok else {
599-
return Err(PyException::new_err("normalizer should not be a sequence"));
595+
let PyPreTokenizerTypeWrapper::Single(pretok) = pretok.pretok else {
596+
return Err(PyException::new_err(
597+
"pre tokenizer should not be a sequence",
598+
));
600599
};
601600
match &self_.as_ref().pretok {
602601
PyPreTokenizerTypeWrapper::Sequence(inner) => match inner.get(index) {
603602
Some(item) => {
604603
*item
605604
.write()
606-
.map_err(|_| PyException::new_err("RwLock synchronisation primitive is poisoned, cannot get subtype of PyPreTokenizer"))? = (*norm
605+
.map_err(|_| PyException::new_err("RwLock synchronisation primitive is poisoned, cannot get subtype of PyPreTokenizer"))? = (*pretok
607606
.read()
608607
.map_err(|_| PyException::new_err("RwLock synchronisation primitive is poisoned, cannot get subtype of PyPreTokenizer"))?)
609608
.clone();

0 commit comments

Comments
 (0)