Skip to content

Commit 13079ad

Browse files
authored
Merge pull request #79 from Enmk/LowCardinality_as_wrapped_column
Allow users to explicitly access LowCardinality<WrappedColumn> columns as WrappedColumn
2 parents 33c58aa + 8fd000e commit 13079ad

9 files changed

+167
-17
lines changed

clickhouse/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ SET ( clickhouse-cpp-lib-src
1414
columns/ip4.cpp
1515
columns/ip6.cpp
1616
columns/lowcardinality.cpp
17+
columns/lowcardinalityadaptor.h
1718
columns/nullable.cpp
1819
columns/numeric.cpp
1920
columns/string.cpp

clickhouse/client.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -455,6 +455,9 @@ bool Client::Impl::ReadBlock(Block* block, CodedInputStream* input) {
455455
return false;
456456
}
457457

458+
CreateColumnByTypeSettings create_column_settings;
459+
create_column_settings.low_cardinality_as_wrapped_column = options_.backward_compatibility_lowcardinality_as_wrapped_column;
460+
458461
std::string name;
459462
std::string type;
460463
for (size_t i = 0; i < num_columns; ++i) {
@@ -465,7 +468,7 @@ bool Client::Impl::ReadBlock(Block* block, CodedInputStream* input) {
465468
return false;
466469
}
467470

468-
if (ColumnRef col = CreateColumnByType(type)) {
471+
if (ColumnRef col = CreateColumnByType(type, create_column_settings)) {
469472
if (num_rows && !col->Load(input, num_rows)) {
470473
throw std::runtime_error("can't load");
471474
}

clickhouse/client.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,14 @@ struct ClientOptions {
8383
// TCP options
8484
DECLARE_FIELD(tcp_nodelay, bool, TcpNoDelay, true);
8585

86+
/** It helps to ease migration of the old codebases, which can't afford to switch
87+
* to using ColumnLowCardinalityT or ColumnLowCardinality directly,
88+
* but still want to benefit from smaller on-wire LowCardinality bandwidth footprint.
89+
*
90+
* @see LowCardinalitySerializationAdaptor, CreateColumnByType
91+
*/
92+
DECLARE_FIELD(backward_compatibility_lowcardinality_as_wrapped_column, bool, SetBakcwardCompatibilityFeatureLowCardinalityAsWrappedColumn, true);
93+
8694
#undef DECLARE_FIELD
8795
};
8896

clickhouse/columns/factory.cpp

Lines changed: 28 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include "ip4.h"
88
#include "ip6.h"
99
#include "lowcardinality.h"
10+
#include "lowcardinalityadaptor.h"
1011
#include "nothing.h"
1112
#include "nullable.h"
1213
#include "numeric.h"
@@ -94,17 +95,17 @@ static ColumnRef CreateTerminalColumn(const TypeAst& ast) {
9495
}
9596
}
9697

97-
static ColumnRef CreateColumnFromAst(const TypeAst& ast) {
98+
static ColumnRef CreateColumnFromAst(const TypeAst& ast, CreateColumnByTypeSettings settings) {
9899
switch (ast.meta) {
99100
case TypeAst::Array: {
100101
return std::make_shared<ColumnArray>(
101-
CreateColumnFromAst(ast.elements.front())
102+
CreateColumnFromAst(ast.elements.front(), settings)
102103
);
103104
}
104105

105106
case TypeAst::Nullable: {
106107
return std::make_shared<ColumnNullable>(
107-
CreateColumnFromAst(ast.elements.front()),
108+
CreateColumnFromAst(ast.elements.front(), settings),
108109
std::make_shared<ColumnUInt8>()
109110
);
110111
}
@@ -118,7 +119,7 @@ static ColumnRef CreateColumnFromAst(const TypeAst& ast) {
118119

119120
columns.reserve(ast.elements.size());
120121
for (const auto& elem : ast.elements) {
121-
if (auto col = CreateColumnFromAst(elem)) {
122+
if (auto col = CreateColumnFromAst(elem, settings)) {
122123
columns.push_back(col);
123124
} else {
124125
return nullptr;
@@ -151,14 +152,27 @@ static ColumnRef CreateColumnFromAst(const TypeAst& ast) {
151152
}
152153
case TypeAst::LowCardinality: {
153154
const auto nested = ast.elements.front();
154-
switch (nested.code) {
155-
// TODO (nemkov): update this to maximize code reuse.
156-
case Type::String:
157-
return std::make_shared<ColumnLowCardinalityT<ColumnString>>();
158-
case Type::FixedString:
159-
return std::make_shared<ColumnLowCardinalityT<ColumnFixedString>>(nested.elements.front().value);
160-
default:
161-
throw std::runtime_error("LowCardinality(" + nested.name + ") is not supported");
155+
if (settings.low_cardinality_as_wrapped_column) {
156+
switch (nested.code) {
157+
// TODO (nemkov): update this to maximize code reuse.
158+
case Type::String:
159+
return std::make_shared<LowCardinalitySerializationAdaptor<ColumnString>>();
160+
case Type::FixedString:
161+
return std::make_shared<LowCardinalitySerializationAdaptor<ColumnFixedString>>(nested.elements.front().value);
162+
default:
163+
throw std::runtime_error("LowCardinality(" + nested.name + ") is not supported");
164+
}
165+
}
166+
else {
167+
switch (nested.code) {
168+
// TODO (nemkov): update this to maximize code reuse.
169+
case Type::String:
170+
return std::make_shared<ColumnLowCardinalityT<ColumnString>>();
171+
case Type::FixedString:
172+
return std::make_shared<ColumnLowCardinalityT<ColumnFixedString>>(nested.elements.front().value);
173+
default:
174+
throw std::runtime_error("LowCardinality(" + nested.name + ") is not supported");
175+
}
162176
}
163177
}
164178
case TypeAst::SimpleAggregateFunction: {
@@ -178,10 +192,10 @@ static ColumnRef CreateColumnFromAst(const TypeAst& ast) {
178192
} // namespace
179193

180194

181-
ColumnRef CreateColumnByType(const std::string& type_name) {
195+
ColumnRef CreateColumnByType(const std::string& type_name, CreateColumnByTypeSettings settings) {
182196
auto ast = ParseTypeName(type_name);
183197
if (ast != nullptr) {
184-
return CreateColumnFromAst(*ast);
198+
return CreateColumnFromAst(*ast, settings);
185199
}
186200

187201
return nullptr;

clickhouse/columns/factory.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,11 @@
44

55
namespace clickhouse {
66

7-
ColumnRef CreateColumnByType(const std::string& type_name);
7+
struct CreateColumnByTypeSettings
8+
{
9+
bool low_cardinality_as_wrapped_column = false;
10+
};
11+
12+
ColumnRef CreateColumnByType(const std::string& type_name, CreateColumnByTypeSettings settings = {});
813

914
}

clickhouse/columns/lowcardinality.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,12 @@ class ColumnLowCardinalityT : public ColumnLowCardinality {
105105

106106
template <typename ...Args>
107107
explicit ColumnLowCardinalityT(Args &&... args)
108-
: ColumnLowCardinality(std::make_shared<DictionaryColumnType>(std::forward<Args>(args)...)),
108+
: ColumnLowCardinalityT(std::make_shared<DictionaryColumnType>(std::forward<Args>(args)...))
109+
{}
110+
111+
// Create LC<T> column from existing T-column, making a deep copy of all contents.
112+
explicit ColumnLowCardinalityT(std::shared_ptr<DictionaryColumnType> dictionary_col)
113+
: ColumnLowCardinality(dictionary_col),
109114
typed_dictionary_(dynamic_cast<DictionaryColumnType &>(*GetDictionary())),
110115
type_(typed_dictionary_.Type()->GetCode())
111116
{}
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
#pragma once
2+
3+
#include "column.h"
4+
#include "lowcardinality.h"
5+
6+
#include <cassert>
7+
8+
namespace clickhouse {
9+
10+
class CodedOutputStream;
11+
class CodedInputStream;
12+
13+
/** Adapts any ColumnType to be serialized\deserialized as LowCardinality,
14+
* and to be castable to ColumnType via ColumnPtr->As<ColumnType>().
15+
*
16+
* It helps to ease migration of the old codebases, which can't afford to switch
17+
* to using ColumnLowCardinalityT or ColumnLowCardinality directly,
18+
* but still want to benefit from smaller on-wire LowCardinality bandwidth footprint.
19+
*
20+
* Not intended to be used by users directly.
21+
*
22+
* @see ClientOptions, CreateColumnByType
23+
*/
24+
template <typename AdaptedColumnType>
25+
class LowCardinalitySerializationAdaptor : public AdaptedColumnType
26+
{
27+
public:
28+
using AdaptedColumnType::AdaptedColumnType;
29+
30+
/// Loads column data from input stream.
31+
bool Load(CodedInputStream* input, size_t rows) override {
32+
auto new_data_column = this->Slice(0, 0)->template As<AdaptedColumnType>();
33+
34+
ColumnLowCardinalityT<AdaptedColumnType> low_cardinality_col(new_data_column);
35+
if (!low_cardinality_col.Load(input, rows))
36+
return false;
37+
38+
// It safe to reuse `flat_data_column` later since ColumnLowCardinalityT makes a deep copy, but still check just in case.
39+
assert(new_data_column->Size() == 0);
40+
41+
for (size_t i = 0; i < low_cardinality_col.Size(); ++i)
42+
new_data_column->Append(low_cardinality_col[i]);
43+
44+
this->Swap(*new_data_column);
45+
return true;
46+
}
47+
48+
/// Saves column data to output stream.
49+
void Save(CodedOutputStream* output) override {
50+
ColumnLowCardinalityT<AdaptedColumnType>(this->template As<AdaptedColumnType>()).Save(output);
51+
}
52+
};
53+
54+
}

ut/client_ut.cpp

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,54 @@ TEST_P(ClientCase, LowCardinality_InsertAfterClear) {
237237
ASSERT_EQ(total_rows, data.size());
238238
}
239239

240+
TEST_P(ClientCase, LowCardinalityString_AsString) {
241+
// Validate that LowCardinality(String) column values can be INSERTed from client as ColumnString
242+
// and also read on client (enabled by special option) as ColumnString.
243+
244+
ClientOptions options = GetParam();
245+
options.SetBakcwardCompatibilityFeatureLowCardinalityAsWrappedColumn(true);
246+
247+
client_ = std::make_unique<Client>(GetParam());
248+
client_->Execute("CREATE DATABASE IF NOT EXISTS test_clickhouse_cpp");
249+
250+
Block block;
251+
auto col = std::make_shared<ColumnString>();
252+
253+
client_->Execute("DROP TABLE IF EXISTS " + table_name + ";");
254+
client_->Execute("CREATE TABLE IF NOT EXISTS " + table_name + "( " + column_name + " LowCardinality(String) )"
255+
"ENGINE = Memory");
256+
257+
block.AppendColumn("test_column", col);
258+
259+
const std::vector<std::string> data{{"FooBar", "1", "2", "Foo", "4", "Bar", "Foo", "7", "8", "Foo"}};
260+
for (const auto & v : data)
261+
col->Append(v);
262+
263+
block.RefreshRowCount();
264+
client_->Insert(table_name, block);
265+
266+
// Now that we can access data via ColumnString instead of ColumnLowCardinalityT<ColumnString>
267+
size_t total_rows = 0;
268+
client_->Select(getOneColumnSelectQuery(),
269+
[&total_rows, &data](const Block& block) {
270+
total_rows += block.GetRowCount();
271+
if (block.GetRowCount() == 0) {
272+
return;
273+
}
274+
275+
ASSERT_EQ(1U, block.GetColumnCount());
276+
if (auto col = block[0]->As<ColumnString>()) {
277+
ASSERT_EQ(data.size(), col->Size());
278+
for (size_t i = 0; i < col->Size(); ++i) {
279+
EXPECT_EQ(data[i], (*col)[i]) << " at index: " << i;
280+
}
281+
}
282+
}
283+
);
284+
285+
ASSERT_EQ(total_rows, data.size());
286+
}
287+
240288
TEST_P(ClientCase, Generic) {
241289
client_->Execute(
242290
"CREATE TABLE IF NOT EXISTS test_clickhouse_cpp.client (id UInt64, name String) "

ut/columns_ut.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -631,6 +631,18 @@ TEST(ColumnsCase, UnmatchedBrackets) {
631631
ASSERT_EQ(nullptr, CreateColumnByType("Array(LowCardinality(Nullable(FixedString(10000)))"));
632632
}
633633

634+
TEST(ColumnsCase, LowCardinalityAsWrappedColumn) {
635+
CreateColumnByTypeSettings create_column_settings;
636+
create_column_settings.low_cardinality_as_wrapped_column = true;
637+
638+
ASSERT_EQ(Type::String, CreateColumnByType("LowCardinality(String)", create_column_settings)->GetType().GetCode());
639+
ASSERT_EQ(Type::String, CreateColumnByType("LowCardinality(String)", create_column_settings)->As<ColumnString>()->GetType().GetCode());
640+
641+
ASSERT_EQ(Type::FixedString, CreateColumnByType("LowCardinality(FixedString(10000))", create_column_settings)->GetType().GetCode());
642+
ASSERT_EQ(Type::FixedString, CreateColumnByType("LowCardinality(FixedString(10000))", create_column_settings)->As<ColumnFixedString>()->GetType().GetCode());
643+
}
644+
645+
634646
class ColumnsCaseWithName : public ::testing::TestWithParam<const char* /*Column Type String*/>
635647
{};
636648

0 commit comments

Comments
 (0)