Skip to content

Commit 48c2e6b

Browse files
authored
Improvement patch on jp (#30)
* More punctuation sign hardcoded. * Do not trigger suspicious range on hiragana x katakana * bump version minor
1 parent b0e4e94 commit 48c2e6b

File tree

2 files changed

+5
-2
lines changed

2 files changed

+5
-2
lines changed

charset_normalizer/unicode.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ def is_punc(letter):
6969
return r_name is not None and \
7070
("Punctuation" in r_name or
7171
'Forms' in r_name or
72-
letter in 'º¯—–‒‐⁃«‹?!;.:^$*»£¹¿~ª؟©±¡{}[]|¼½¾⅕⅙⅛™℠‼⁇❝❞¶⁋√↑↓�¤`')
72+
letter in set('º¯—–‒‐⁃«‹?!;.:^$¥*»£¹¿~ª؟©±¡{}[]|½⅓⅔¼¾⅕⅖⅗⅘⅙⅚⅐⅛⅜⅝⅞⅑⅒™℠¬‼⁇❝❞¶⁋√↑↓�¤`¨'))
7373

7474
@staticmethod
7575
@lru_cache(maxsize=8192)
@@ -141,6 +141,9 @@ def is_suspiciously_successive_range(range_name_a, range_name_b):
141141
if 'CJK' in range_name_a and range_name_b in ['Katakana', 'Hiragana']:
142142
return False
143143

144+
if range_name_a in ['Katakana', 'Hiragana'] and range_name_b in ['Katakana', 'Hiragana']:
145+
return False
146+
144147
return True
145148

146149
@staticmethod

charset_normalizer/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@
22
Expose version
33
"""
44

5-
__version__ = "1.3.2"
5+
__version__ = "1.3.3"
66
VERSION = __version__.split('.')

0 commit comments

Comments
 (0)