Skip to content

Commit b0e4e94

Browse files
authored
Bugfix when passing empty bytes to detector (#29)
* Bugfix when passing empty seq to detector * Add test for empty bytes case * bump version 1.3.2
1 parent 6254c79 commit b0e4e94

File tree

3 files changed

+21
-1
lines changed

3 files changed

+21
-1
lines changed

charset_normalizer/normalizer.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,14 @@ def from_bytes(sequences, steps=10, chunk_size=512, threshold=0.20, cp_isolation
348348
if not explain:
349349
logger.disable('charset_normalizer')
350350

351+
if len(sequences) == 0:
352+
return CharsetNormalizerMatch(
353+
sequences,
354+
'utf-8',
355+
0.,
356+
[]
357+
)
358+
351359
too_small_sequence = len(sequences) < 24
352360

353361
if too_small_sequence is True:

charset_normalizer/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@
22
Expose version
33
"""
44

5-
__version__ = "1.3.1"
5+
__version__ = "1.3.2"
66
VERSION = __version__.split('.')

test/test_on_byte.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,18 @@ def test_too_short_none(self):
1010
CnM.from_bytes(b'\xfe\xff').best().first()
1111
)
1212

13+
def test_empty_bytes(self):
14+
r = CnM.from_bytes(b'').best().first()
15+
16+
self.assertIsNotNone(
17+
r
18+
)
19+
20+
self.assertEqual(
21+
'utf-8',
22+
r.encoding
23+
)
24+
1325
def test_bom_detection(self):
1426
with self.subTest('GB18030 UNAVAILABLE SIG'):
1527
self.assertFalse(

0 commit comments

Comments
 (0)