Skip to content

Commit 39109c6

Browse files
committed
Address review comments
1 parent 03482f4 commit 39109c6

File tree

1 file changed

+41
-42
lines changed

1 file changed

+41
-42
lines changed

cbor/src/main/java/com/fasterxml/jackson/dataformat/cbor/CBORGenerator.java

Lines changed: 41 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,8 @@ public enum Feature implements FormatFeature {
7474
* Feature that determines if an invalid surrogate encoding found in the
7575
* incoming String should fail with an exception or silently be outputed
7676
* as the Unicode 'REPLACEMENT CHARACTER' (U+FFFD)
77+
*
78+
* @since 2.12
7779
*/
7880
LENIENT_UTF_ENCODING(false),
7981

@@ -152,6 +154,11 @@ public int getMask() {
152154

153155
protected boolean _cfgMinimalInts;
154156

157+
158+
/**
159+
* If true we will output the REPLACEMENT_CHAR for invalid unicode sequences.
160+
* If false we will throw an IllegalArgumentException for invalid unicode sequences.
161+
*/
155162
protected boolean _cfgLenientUnicodeEncoding;
156163

157164
/*
@@ -1493,27 +1500,15 @@ private final int _encode2(int i, int outputPtr, char[] str, int len,
14931500
}
14941501
// Yup, a surrogate pair
14951502
if (c > SURR1_LAST) { // must be from first range; second won't do
1496-
if (_cfgLenientUnicodeEncoding) {
1497-
c = REPLACEMENT_CHAR;
1498-
} else {
1499-
_throwIllegalSurrogate(c);
1500-
}
1503+
c = _illegalSurrogateFound(c);
15011504
}
15021505
// ... meaning it must have a pair
15031506
else if (i >= len) {
1504-
if (_cfgLenientUnicodeEncoding) {
1505-
c = REPLACEMENT_CHAR;
1506-
} else {
1507-
_throwIllegalSurrogate(c);
1508-
}
1507+
c = _illegalSurrogateFound(c);
15091508
}
15101509
// ... verify that the next character is in range
15111510
else if (str[i] < SURR2_FIRST || str[i] > SURR2_LAST) {
1512-
if (_cfgLenientUnicodeEncoding) {
1513-
c = REPLACEMENT_CHAR;
1514-
} else {
1515-
_throwIllegalSurrogatePair(c, str[i]);
1516-
}
1511+
c = _illegalSurrogatePairFound(c, str[i]);
15171512
}
15181513
// ... we have a valid surrogate pair
15191514
else {
@@ -1541,43 +1536,47 @@ private int _convertSurrogate(int firstPart, int secondPart) {
15411536
int c = 0x10000 + ((firstPart - SURR1_FIRST) << 10)
15421537
+ (secondPart - SURR2_FIRST);
15431538
if (c > 0x10FFFF) { // illegal in JSON as well as in XML
1544-
if (_cfgLenientUnicodeEncoding) {
1545-
c = REPLACEMENT_CHAR;
1546-
} else {
1547-
_throwIllegalSurrogate(c);
1548-
}
1539+
c = _illegalSurrogatePairFound(firstPart, secondPart);
15491540
}
15501541
return c;
15511542
}
15521543

1553-
private void _throwIllegalSurrogatePair(int firstPart, int secondPart) {
1554-
throw new IllegalArgumentException(
1555-
"Broken surrogate pair: first char 0x"
1556-
+ Integer.toHexString(firstPart) + ", second 0x"
1557-
+ Integer.toHexString(secondPart)
1558-
+ "; illegal combination");
1544+
private int _illegalSurrogatePairFound(int firstPart, int secondPart) {
1545+
if (_cfgLenientUnicodeEncoding) {
1546+
return REPLACEMENT_CHAR;
1547+
} else {
1548+
throw new IllegalArgumentException(
1549+
"Broken surrogate pair: first char 0x"
1550+
+ Integer.toHexString(firstPart) + ", second 0x"
1551+
+ Integer.toHexString(secondPart)
1552+
+ "; illegal combination");
1553+
}
15591554
}
15601555

1561-
private void _throwIllegalSurrogate(int code) {
1562-
if (code > 0x10FFFF) { // over max?
1563-
throw new IllegalArgumentException("Illegal character point (0x"
1564-
+ Integer.toHexString(code)
1565-
+ ") to output; max is 0x10FFFF as per RFC 4627");
1566-
}
1567-
if (code >= SURR1_FIRST) {
1568-
if (code <= SURR1_LAST) { // Unmatched first part (closing without
1569-
// second part?)
1556+
private int _illegalSurrogateFound(int code) {
1557+
if (_cfgLenientUnicodeEncoding) {
1558+
return REPLACEMENT_CHAR;
1559+
} else {
1560+
if (code > 0x10FFFF) { // over max?
1561+
throw new IllegalArgumentException("Illegal character point (0x"
1562+
+ Integer.toHexString(code)
1563+
+ ") to output; max is 0x10FFFF as per RFC 4627");
1564+
}
1565+
if (code >= SURR1_FIRST) {
1566+
if (code <= SURR1_LAST) { // Unmatched first part (closing without
1567+
// second part?)
1568+
throw new IllegalArgumentException(
1569+
"Unmatched first part of surrogate pair (0x"
1570+
+ Integer.toHexString(code) + ")");
1571+
}
15701572
throw new IllegalArgumentException(
1571-
"Unmatched first part of surrogate pair (0x"
1573+
"Unmatched second part of surrogate pair (0x"
15721574
+ Integer.toHexString(code) + ")");
15731575
}
1574-
throw new IllegalArgumentException(
1575-
"Unmatched second part of surrogate pair (0x"
1576-
+ Integer.toHexString(code) + ")");
1576+
// should we ever get this?
1577+
throw new IllegalArgumentException("Illegal character point (0x"
1578+
+ Integer.toHexString(code) + ") to output");
15771579
}
1578-
// should we ever get this?
1579-
throw new IllegalArgumentException("Illegal character point (0x"
1580-
+ Integer.toHexString(code) + ") to output");
15811580
}
15821581

15831582
/*

0 commit comments

Comments
 (0)