Skip to content

Added emitUTF8 setting. #1045

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Oct 17, 2019
51 changes: 31 additions & 20 deletions src/lib_json/json_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ static String toHex16Bit(unsigned int x) {
return result;
}

static String valueToQuotedStringN(const char* value, unsigned length) {
static String valueToQuotedStringN(const char* value, unsigned length, bool emitUTF8 = false ) {
Copy link
Member

@dota17 dota17 Oct 9, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's better to overload it instead of rewriting it, I guess?

etc, use it outside the method

if (_emitUTF8) {
  ....
} else {
  valueToQuotedStringN(value, length);
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I want to use the escaping of the tabs, newlines etc. in the first part of the function and then either write the UTF-8 characters or unicode escape them in the second part. Perhaps I should split the function into those separate pieces? What's your view?

Copy link
Contributor

@baylesj baylesj Oct 10, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I actually prefer it as is. There's nothing wrong with the default argument usage here, especially since most of the downsides of default arguments don't apply to static methods.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm glad that you're all considering binary compatibility. Thank you.

Yes, internal "statics" are hidden from the public API, so either way is fine with me.

This change is a nice idea.

if (value == nullptr)
return "";

Expand Down Expand Up @@ -313,21 +313,26 @@ static String valueToQuotedStringN(const char* value, unsigned length) {
// Should add a flag to allow this compatibility mode and prevent this
// sequence from occurring.
default: {
unsigned int cp = utf8ToCodepoint(c, end);
// don't escape non-control characters
// (short escape sequence are applied above)
if (cp < 0x80 && cp >= 0x20)
result += static_cast<char>(cp);
else if (cp < 0x10000) { // codepoint is in Basic Multilingual Plane
result += "\\u";
result += toHex16Bit(cp);
} else { // codepoint is not in Basic Multilingual Plane
// convert to surrogate pair first
cp -= 0x10000;
result += "\\u";
result += toHex16Bit((cp >> 10) + 0xD800);
result += "\\u";
result += toHex16Bit((cp & 0x3FF) + 0xDC00);
if ( emitUTF8 ) {
result += *c;
}
else {
unsigned int cp = utf8ToCodepoint(c, end);
// don't escape non-control characters
// (short escape sequence are applied above)
if (cp < 0x80 && cp >= 0x20)
result += static_cast<char>(cp);
else if (cp < 0x10000) { // codepoint is in Basic Multilingual Plane
result += "\\u";
result += toHex16Bit(cp);
} else { // codepoint is not in Basic Multilingual Plane
// convert to surrogate pair first
cp -= 0x10000;
result += "\\u";
result += toHex16Bit((cp >> 10) + 0xD800);
result += "\\u";
result += toHex16Bit((cp & 0x3FF) + 0xDC00);
}
}
} break;
}
Expand Down Expand Up @@ -870,6 +875,7 @@ struct BuiltStyledStreamWriter : public StreamWriter {
String nullSymbol,
String endingLineFeedSymbol,
bool useSpecialFloats,
bool emitUTF8,
unsigned int precision,
PrecisionType precisionType);
int write(Value const& root, OStream* sout) override;
Expand Down Expand Up @@ -900,6 +906,7 @@ struct BuiltStyledStreamWriter : public StreamWriter {
bool addChildValues_ : 1;
bool indented_ : 1;
bool useSpecialFloats_ : 1;
bool emitUTF8_ : 1;
unsigned int precision_;
PrecisionType precisionType_;
};
Expand All @@ -909,13 +916,14 @@ BuiltStyledStreamWriter::BuiltStyledStreamWriter(String indentation,
String nullSymbol,
String endingLineFeedSymbol,
bool useSpecialFloats,
bool emitUTF8,
unsigned int precision,
PrecisionType precisionType)
: rightMargin_(74), indentation_(std::move(indentation)), cs_(cs),
colonSymbol_(std::move(colonSymbol)), nullSymbol_(std::move(nullSymbol)),
endingLineFeedSymbol_(std::move(endingLineFeedSymbol)),
addChildValues_(false), indented_(false),
useSpecialFloats_(useSpecialFloats), precision_(precision),
useSpecialFloats_(useSpecialFloats), emitUTF8_(emitUTF8), precision_(precision),
precisionType_(precisionType) {}
int BuiltStyledStreamWriter::write(Value const& root, OStream* sout) {
sout_ = sout;
Expand Down Expand Up @@ -953,7 +961,7 @@ void BuiltStyledStreamWriter::writeValue(Value const& value) {
char const* end;
bool ok = value.getString(&str, &end);
if (ok)
pushValue(valueToQuotedStringN(str, static_cast<unsigned>(end - str)));
pushValue(valueToQuotedStringN(str, static_cast<unsigned>(end - str), emitUTF8_));
else
pushValue("");
break;
Expand All @@ -977,7 +985,7 @@ void BuiltStyledStreamWriter::writeValue(Value const& value) {
Value const& childValue = value[name];
writeCommentBeforeValue(childValue);
writeWithIndent(valueToQuotedStringN(
name.data(), static_cast<unsigned>(name.length())));
name.data(), static_cast<unsigned>(name.length()), emitUTF8_));
*sout_ << colonSymbol_;
writeValue(childValue);
if (++it == members.end()) {
Expand Down Expand Up @@ -1159,6 +1167,7 @@ StreamWriter* StreamWriterBuilder::newStreamWriter() const {
bool eyc = settings_["enableYAMLCompatibility"].asBool();
bool dnp = settings_["dropNullPlaceholders"].asBool();
bool usf = settings_["useSpecialFloats"].asBool();
bool emitUTF8 = settings_["emitUTF8"].asBool();
unsigned int pre = settings_["precision"].asUInt();
CommentStyle::Enum cs = CommentStyle::All;
if (cs_str == "All") {
Expand Down Expand Up @@ -1190,7 +1199,7 @@ StreamWriter* StreamWriterBuilder::newStreamWriter() const {
pre = 17;
String endingLineFeedSymbol;
return new BuiltStyledStreamWriter(indentation, cs, colonSymbol, nullSymbol,
endingLineFeedSymbol, usf, pre,
endingLineFeedSymbol, usf, emitUTF8, pre,
precisionType);
}
static void getValidWriterKeys(std::set<String>* valid_keys) {
Expand All @@ -1200,6 +1209,7 @@ static void getValidWriterKeys(std::set<String>* valid_keys) {
valid_keys->insert("enableYAMLCompatibility");
valid_keys->insert("dropNullPlaceholders");
valid_keys->insert("useSpecialFloats");
valid_keys->insert("emitUTF8");
valid_keys->insert("precision");
valid_keys->insert("precisionType");
}
Expand Down Expand Up @@ -1231,6 +1241,7 @@ void StreamWriterBuilder::setDefaults(Json::Value* settings) {
(*settings)["enableYAMLCompatibility"] = false;
(*settings)["dropNullPlaceholders"] = false;
(*settings)["useSpecialFloats"] = false;
(*settings)["emitUTF8"] = false;
(*settings)["precision"] = 17;
(*settings)["precisionType"] = "significant";
//! [StreamWriterBuilderDefaults]
Expand Down
25 changes: 25 additions & 0 deletions src/test_lib_json/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1996,6 +1996,30 @@ JSONTEST_FIXTURE(StreamWriterTest, writeZeroes) {
}
}

JSONTEST_FIXTURE(StreamWriterTest, unicode) {
// Create a Json value containing UTF-8 string with some chars that need escape (tab,newline).
Json::Value root;
root["test"] = "\t\n\xF0\x91\xA2\xA1\x3D\xC4\xB3\xF0\x9B\x84\x9B\xEF\xBD\xA7";

Json::StreamWriterBuilder b;

// Default settings - should be unicode escaped.
JSONTEST_ASSERT(Json::writeString(b, root) == "{\n\t\"test\" : \"\\t\\n\\ud806\\udca1=\\u0133\\ud82c\\udd1b\\uff67\"\n}");

// Enable UTF-8
b.settings_["emitUTF8"] = true;

// Should not be unicode escaped.
JSONTEST_ASSERT(Json::writeString(b, root) == "{\n\t\"test\" : \"\\t\\n\xF0\x91\xA2\xA1=\xC4\xB3\xF0\x9B\x84\x9B\xEF\xBD\xA7\"\n}");

// Disable UTF-8
b.settings_["emitUTF8"] = false;

// Should be unicode escaped.
JSONTEST_ASSERT(Json::writeString(b, root) == "{\n\t\"test\" : \"\\t\\n\\ud806\\udca1=\\u0133\\ud82c\\udd1b\\uff67\"\n}");
}


struct ReaderTest : JsonTest::TestCase {};

JSONTEST_FIXTURE(ReaderTest, parseWithNoErrors) {
Expand Down Expand Up @@ -2731,6 +2755,7 @@ int main(int argc, const char* argv[]) {
JSONTEST_REGISTER_FIXTURE(runner, StreamWriterTest, enableYAMLCompatibility);
JSONTEST_REGISTER_FIXTURE(runner, StreamWriterTest, indentation);
JSONTEST_REGISTER_FIXTURE(runner, StreamWriterTest, writeZeroes);
JSONTEST_REGISTER_FIXTURE(runner, StreamWriterTest, unicode);

JSONTEST_REGISTER_FIXTURE(runner, ReaderTest, parseWithNoErrors);
JSONTEST_REGISTER_FIXTURE(runner, ReaderTest,
Expand Down