@@ -116,6 +116,9 @@ def from_bytes(
116
116
tested_but_hard_failure = [] # type: List[str]
117
117
tested_but_soft_failure = [] # type: List[str]
118
118
119
+ fallback_ascii = None # type: Optional[CharsetMatch]
120
+ fallback_u8 = None # type: Optional[CharsetMatch]
121
+
119
122
single_byte_hard_failure_count = 0 # type: int
120
123
single_byte_soft_failure_count = 0 # type: int
121
124
@@ -251,6 +254,20 @@ def from_bytes(
251
254
encoding_iana ,
252
255
early_stop_count ,
253
256
round (mean_mess_ratio * 100 , ndigits = 3 ))
257
+ # Preparing those fallbacks in case we got nothing.
258
+ if encoding_iana in ["ascii" , "utf_8" ]:
259
+ fallback_entry = CharsetMatch (
260
+ sequences ,
261
+ encoding_iana ,
262
+ threshold ,
263
+ False ,
264
+ [],
265
+ decoded_payload
266
+ )
267
+ if encoding_iana == "ascii" :
268
+ fallback_ascii = fallback_entry
269
+ else :
270
+ fallback_u8 = fallback_entry
254
271
continue
255
272
256
273
logger .info (
@@ -314,6 +331,17 @@ def from_bytes(
314
331
results [- 1 ]._languages
315
332
)
316
333
334
+ if len (results ) == 0 :
335
+ if fallback_u8 or fallback_ascii :
336
+ logger .warning ("Nothing got out of the detection process. Using ASCII/UTF-8 fallback." )
337
+
338
+ if fallback_u8 and fallback_u8 .fingerprint != fallback_ascii .fingerprint :
339
+ logger .warning ("utf_8 will be used as a fallback match" )
340
+ results .append (fallback_u8 )
341
+ elif fallback_ascii :
342
+ logger .warning ("ascii will be used as a fallback match" )
343
+ results .append (fallback_ascii )
344
+
317
345
return results
318
346
319
347
0 commit comments