Skip to content

Commit e73a98e

Browse files
committed
Address review comments
1 parent df54c36 commit e73a98e

File tree

1 file changed

+41
-42
lines changed

1 file changed

+41
-42
lines changed

cbor/src/main/java/com/fasterxml/jackson/dataformat/cbor/CBORGenerator.java

Lines changed: 41 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,8 @@ public enum Feature implements FormatFeature {
7272
* Feature that determines if an invalid surrogate encoding found in the
7373
* incoming String should fail with an exception or silently be outputed
7474
* as the Unicode 'REPLACEMENT CHARACTER' (U+FFFD)
75+
*
76+
* @since 2.12
7577
*/
7678
LENIENT_UTF_ENCODING(false),
7779

@@ -150,6 +152,11 @@ public int getMask() {
150152

151153
protected boolean _cfgMinimalInts;
152154

155+
156+
/**
157+
* If true we will output the REPLACEMENT_CHAR for invalid unicode sequences.
158+
* If false we will throw an IllegalArgumentException for invalid unicode sequences.
159+
*/
153160
protected boolean _cfgLenientUnicodeEncoding;
154161

155162
/*
@@ -1425,27 +1432,15 @@ private final int _encode2(int i, int outputPtr, char[] str, int len,
14251432
}
14261433
// Yup, a surrogate pair
14271434
if (c > SURR1_LAST) { // must be from first range; second won't do
1428-
if (_cfgLenientUnicodeEncoding) {
1429-
c = REPLACEMENT_CHAR;
1430-
} else {
1431-
_throwIllegalSurrogate(c);
1432-
}
1435+
c = _illegalSurrogateFound(c);
14331436
}
14341437
// ... meaning it must have a pair
14351438
else if (i >= len) {
1436-
if (_cfgLenientUnicodeEncoding) {
1437-
c = REPLACEMENT_CHAR;
1438-
} else {
1439-
_throwIllegalSurrogate(c);
1440-
}
1439+
c = _illegalSurrogateFound(c);
14411440
}
14421441
// ... verify that the next character is in range
14431442
else if (str[i] < SURR2_FIRST || str[i] > SURR2_LAST) {
1444-
if (_cfgLenientUnicodeEncoding) {
1445-
c = REPLACEMENT_CHAR;
1446-
} else {
1447-
_throwIllegalSurrogatePair(c, str[i]);
1448-
}
1443+
c = _illegalSurrogatePairFound(c, str[i]);
14491444
}
14501445
// ... we have a valid surrogate pair
14511446
else {
@@ -1473,43 +1468,47 @@ private int _convertSurrogate(int firstPart, int secondPart) {
14731468
int c = 0x10000 + ((firstPart - SURR1_FIRST) << 10)
14741469
+ (secondPart - SURR2_FIRST);
14751470
if (c > 0x10FFFF) { // illegal in JSON as well as in XML
1476-
if (_cfgLenientUnicodeEncoding) {
1477-
c = REPLACEMENT_CHAR;
1478-
} else {
1479-
_throwIllegalSurrogate(c);
1480-
}
1471+
c = _illegalSurrogatePairFound(firstPart, secondPart);
14811472
}
14821473
return c;
14831474
}
14841475

1485-
private void _throwIllegalSurrogatePair(int firstPart, int secondPart) {
1486-
throw new IllegalArgumentException(
1487-
"Broken surrogate pair: first char 0x"
1488-
+ Integer.toHexString(firstPart) + ", second 0x"
1489-
+ Integer.toHexString(secondPart)
1490-
+ "; illegal combination");
1476+
private int _illegalSurrogatePairFound(int firstPart, int secondPart) {
1477+
if (_cfgLenientUnicodeEncoding) {
1478+
return REPLACEMENT_CHAR;
1479+
} else {
1480+
throw new IllegalArgumentException(
1481+
"Broken surrogate pair: first char 0x"
1482+
+ Integer.toHexString(firstPart) + ", second 0x"
1483+
+ Integer.toHexString(secondPart)
1484+
+ "; illegal combination");
1485+
}
14911486
}
14921487

1493-
private void _throwIllegalSurrogate(int code) {
1494-
if (code > 0x10FFFF) { // over max?
1495-
throw new IllegalArgumentException("Illegal character point (0x"
1496-
+ Integer.toHexString(code)
1497-
+ ") to output; max is 0x10FFFF as per RFC 4627");
1498-
}
1499-
if (code >= SURR1_FIRST) {
1500-
if (code <= SURR1_LAST) { // Unmatched first part (closing without
1501-
// second part?)
1488+
private int _illegalSurrogateFound(int code) {
1489+
if (_cfgLenientUnicodeEncoding) {
1490+
return REPLACEMENT_CHAR;
1491+
} else {
1492+
if (code > 0x10FFFF) { // over max?
1493+
throw new IllegalArgumentException("Illegal character point (0x"
1494+
+ Integer.toHexString(code)
1495+
+ ") to output; max is 0x10FFFF as per RFC 4627");
1496+
}
1497+
if (code >= SURR1_FIRST) {
1498+
if (code <= SURR1_LAST) { // Unmatched first part (closing without
1499+
// second part?)
1500+
throw new IllegalArgumentException(
1501+
"Unmatched first part of surrogate pair (0x"
1502+
+ Integer.toHexString(code) + ")");
1503+
}
15021504
throw new IllegalArgumentException(
1503-
"Unmatched first part of surrogate pair (0x"
1505+
"Unmatched second part of surrogate pair (0x"
15041506
+ Integer.toHexString(code) + ")");
15051507
}
1506-
throw new IllegalArgumentException(
1507-
"Unmatched second part of surrogate pair (0x"
1508-
+ Integer.toHexString(code) + ")");
1508+
// should we ever get this?
1509+
throw new IllegalArgumentException("Illegal character point (0x"
1510+
+ Integer.toHexString(code) + ") to output");
15091511
}
1510-
// should we ever get this?
1511-
throw new IllegalArgumentException("Illegal character point (0x"
1512-
+ Integer.toHexString(code) + ") to output");
15131512
}
15141513

15151514
/*

0 commit comments

Comments
 (0)