diff --git a/clickhouse/CMakeLists.txt b/clickhouse/CMakeLists.txt index 01c429d3..a3eb74fd 100644 --- a/clickhouse/CMakeLists.txt +++ b/clickhouse/CMakeLists.txt @@ -14,6 +14,7 @@ SET ( clickhouse-cpp-lib-src columns/ip4.cpp columns/ip6.cpp columns/lowcardinality.cpp + columns/lowcardinalityadaptor.h columns/nullable.cpp columns/numeric.cpp columns/string.cpp diff --git a/clickhouse/client.cpp b/clickhouse/client.cpp index 415dfac1..6d06b4fb 100644 --- a/clickhouse/client.cpp +++ b/clickhouse/client.cpp @@ -455,6 +455,9 @@ bool Client::Impl::ReadBlock(Block* block, CodedInputStream* input) { return false; } + CreateColumnByTypeSettings create_column_settings; + create_column_settings.low_cardinality_as_wrapped_column = options_.backward_compatibility_lowcardinality_as_wrapped_column; + std::string name; std::string type; for (size_t i = 0; i < num_columns; ++i) { @@ -465,7 +468,7 @@ bool Client::Impl::ReadBlock(Block* block, CodedInputStream* input) { return false; } - if (ColumnRef col = CreateColumnByType(type)) { + if (ColumnRef col = CreateColumnByType(type, create_column_settings)) { if (num_rows && !col->Load(input, num_rows)) { throw std::runtime_error("can't load"); } diff --git a/clickhouse/client.h b/clickhouse/client.h index 194e0e31..e080fb91 100644 --- a/clickhouse/client.h +++ b/clickhouse/client.h @@ -83,6 +83,14 @@ struct ClientOptions { // TCP options DECLARE_FIELD(tcp_nodelay, bool, TcpNoDelay, true); + /** It helps to ease migration of the old codebases, which can't afford to switch + * to using ColumnLowCardinalityT or ColumnLowCardinality directly, + * but still want to benefit from smaller on-wire LowCardinality bandwidth footprint. + * + * @see LowCardinalitySerializationAdaptor, CreateColumnByType + */ + DECLARE_FIELD(backward_compatibility_lowcardinality_as_wrapped_column, bool, SetBakcwardCompatibilityFeatureLowCardinalityAsWrappedColumn, true); + #undef DECLARE_FIELD }; diff --git a/clickhouse/columns/factory.cpp b/clickhouse/columns/factory.cpp index 95862112..6f7526eb 100644 --- a/clickhouse/columns/factory.cpp +++ b/clickhouse/columns/factory.cpp @@ -7,6 +7,7 @@ #include "ip4.h" #include "ip6.h" #include "lowcardinality.h" +#include "lowcardinalityadaptor.h" #include "nothing.h" #include "nullable.h" #include "numeric.h" @@ -94,17 +95,17 @@ static ColumnRef CreateTerminalColumn(const TypeAst& ast) { } } -static ColumnRef CreateColumnFromAst(const TypeAst& ast) { +static ColumnRef CreateColumnFromAst(const TypeAst& ast, CreateColumnByTypeSettings settings) { switch (ast.meta) { case TypeAst::Array: { return std::make_shared( - CreateColumnFromAst(ast.elements.front()) + CreateColumnFromAst(ast.elements.front(), settings) ); } case TypeAst::Nullable: { return std::make_shared( - CreateColumnFromAst(ast.elements.front()), + CreateColumnFromAst(ast.elements.front(), settings), std::make_shared() ); } @@ -118,7 +119,7 @@ static ColumnRef CreateColumnFromAst(const TypeAst& ast) { columns.reserve(ast.elements.size()); for (const auto& elem : ast.elements) { - if (auto col = CreateColumnFromAst(elem)) { + if (auto col = CreateColumnFromAst(elem, settings)) { columns.push_back(col); } else { return nullptr; @@ -151,14 +152,27 @@ static ColumnRef CreateColumnFromAst(const TypeAst& ast) { } case TypeAst::LowCardinality: { const auto nested = ast.elements.front(); - switch (nested.code) { - // TODO (nemkov): update this to maximize code reuse. - case Type::String: - return std::make_shared>(); - case Type::FixedString: - return std::make_shared>(nested.elements.front().value); - default: - throw std::runtime_error("LowCardinality(" + nested.name + ") is not supported"); + if (settings.low_cardinality_as_wrapped_column) { + switch (nested.code) { + // TODO (nemkov): update this to maximize code reuse. + case Type::String: + return std::make_shared>(); + case Type::FixedString: + return std::make_shared>(nested.elements.front().value); + default: + throw std::runtime_error("LowCardinality(" + nested.name + ") is not supported"); + } + } + else { + switch (nested.code) { + // TODO (nemkov): update this to maximize code reuse. + case Type::String: + return std::make_shared>(); + case Type::FixedString: + return std::make_shared>(nested.elements.front().value); + default: + throw std::runtime_error("LowCardinality(" + nested.name + ") is not supported"); + } } } case TypeAst::SimpleAggregateFunction: { @@ -178,10 +192,10 @@ static ColumnRef CreateColumnFromAst(const TypeAst& ast) { } // namespace -ColumnRef CreateColumnByType(const std::string& type_name) { +ColumnRef CreateColumnByType(const std::string& type_name, CreateColumnByTypeSettings settings) { auto ast = ParseTypeName(type_name); if (ast != nullptr) { - return CreateColumnFromAst(*ast); + return CreateColumnFromAst(*ast, settings); } return nullptr; diff --git a/clickhouse/columns/factory.h b/clickhouse/columns/factory.h index 05c7358c..9d2bed18 100644 --- a/clickhouse/columns/factory.h +++ b/clickhouse/columns/factory.h @@ -4,6 +4,11 @@ namespace clickhouse { -ColumnRef CreateColumnByType(const std::string& type_name); +struct CreateColumnByTypeSettings +{ + bool low_cardinality_as_wrapped_column = false; +}; + +ColumnRef CreateColumnByType(const std::string& type_name, CreateColumnByTypeSettings settings = {}); } diff --git a/clickhouse/columns/lowcardinality.h b/clickhouse/columns/lowcardinality.h index 808e6872..6fd13a7d 100644 --- a/clickhouse/columns/lowcardinality.h +++ b/clickhouse/columns/lowcardinality.h @@ -105,7 +105,12 @@ class ColumnLowCardinalityT : public ColumnLowCardinality { template explicit ColumnLowCardinalityT(Args &&... args) - : ColumnLowCardinality(std::make_shared(std::forward(args)...)), + : ColumnLowCardinalityT(std::make_shared(std::forward(args)...)) + {} + + // Create LC column from existing T-column, making a deep copy of all contents. + explicit ColumnLowCardinalityT(std::shared_ptr dictionary_col) + : ColumnLowCardinality(dictionary_col), typed_dictionary_(dynamic_cast(*GetDictionary())), type_(typed_dictionary_.Type()->GetCode()) {} diff --git a/clickhouse/columns/lowcardinalityadaptor.h b/clickhouse/columns/lowcardinalityadaptor.h new file mode 100644 index 00000000..6477e47c --- /dev/null +++ b/clickhouse/columns/lowcardinalityadaptor.h @@ -0,0 +1,54 @@ +#pragma once + +#include "column.h" +#include "lowcardinality.h" + +#include + +namespace clickhouse { + +class CodedOutputStream; +class CodedInputStream; + +/** Adapts any ColumnType to be serialized\deserialized as LowCardinality, + * and to be castable to ColumnType via ColumnPtr->As(). + * + * It helps to ease migration of the old codebases, which can't afford to switch + * to using ColumnLowCardinalityT or ColumnLowCardinality directly, + * but still want to benefit from smaller on-wire LowCardinality bandwidth footprint. + * + * Not intended to be used by users directly. + * + * @see ClientOptions, CreateColumnByType + */ +template +class LowCardinalitySerializationAdaptor : public AdaptedColumnType +{ +public: + using AdaptedColumnType::AdaptedColumnType; + + /// Loads column data from input stream. + bool Load(CodedInputStream* input, size_t rows) override { + auto new_data_column = this->Slice(0, 0)->template As(); + + ColumnLowCardinalityT low_cardinality_col(new_data_column); + if (!low_cardinality_col.Load(input, rows)) + return false; + + // It safe to reuse `flat_data_column` later since ColumnLowCardinalityT makes a deep copy, but still check just in case. + assert(new_data_column->Size() == 0); + + for (size_t i = 0; i < low_cardinality_col.Size(); ++i) + new_data_column->Append(low_cardinality_col[i]); + + this->Swap(*new_data_column); + return true; + } + + /// Saves column data to output stream. + void Save(CodedOutputStream* output) override { + ColumnLowCardinalityT(this->template As()).Save(output); + } +}; + +} diff --git a/ut/client_ut.cpp b/ut/client_ut.cpp index 27ceae05..717cff82 100644 --- a/ut/client_ut.cpp +++ b/ut/client_ut.cpp @@ -237,6 +237,54 @@ TEST_P(ClientCase, LowCardinality_InsertAfterClear) { ASSERT_EQ(total_rows, data.size()); } +TEST_P(ClientCase, LowCardinalityString_AsString) { + // Validate that LowCardinality(String) column values can be INSERTed from client as ColumnString + // and also read on client (enabled by special option) as ColumnString. + + ClientOptions options = GetParam(); + options.SetBakcwardCompatibilityFeatureLowCardinalityAsWrappedColumn(true); + + client_ = std::make_unique(GetParam()); + client_->Execute("CREATE DATABASE IF NOT EXISTS test_clickhouse_cpp"); + + Block block; + auto col = std::make_shared(); + + client_->Execute("DROP TABLE IF EXISTS " + table_name + ";"); + client_->Execute("CREATE TABLE IF NOT EXISTS " + table_name + "( " + column_name + " LowCardinality(String) )" + "ENGINE = Memory"); + + block.AppendColumn("test_column", col); + + const std::vector data{{"FooBar", "1", "2", "Foo", "4", "Bar", "Foo", "7", "8", "Foo"}}; + for (const auto & v : data) + col->Append(v); + + block.RefreshRowCount(); + client_->Insert(table_name, block); + + // Now that we can access data via ColumnString instead of ColumnLowCardinalityT + size_t total_rows = 0; + client_->Select(getOneColumnSelectQuery(), + [&total_rows, &data](const Block& block) { + total_rows += block.GetRowCount(); + if (block.GetRowCount() == 0) { + return; + } + + ASSERT_EQ(1U, block.GetColumnCount()); + if (auto col = block[0]->As()) { + ASSERT_EQ(data.size(), col->Size()); + for (size_t i = 0; i < col->Size(); ++i) { + EXPECT_EQ(data[i], (*col)[i]) << " at index: " << i; + } + } + } + ); + + ASSERT_EQ(total_rows, data.size()); +} + TEST_P(ClientCase, Generic) { client_->Execute( "CREATE TABLE IF NOT EXISTS test_clickhouse_cpp.client (id UInt64, name String) " diff --git a/ut/columns_ut.cpp b/ut/columns_ut.cpp index f573fe23..99d33822 100644 --- a/ut/columns_ut.cpp +++ b/ut/columns_ut.cpp @@ -631,6 +631,18 @@ TEST(ColumnsCase, UnmatchedBrackets) { ASSERT_EQ(nullptr, CreateColumnByType("Array(LowCardinality(Nullable(FixedString(10000)))")); } +TEST(ColumnsCase, LowCardinalityAsWrappedColumn) { + CreateColumnByTypeSettings create_column_settings; + create_column_settings.low_cardinality_as_wrapped_column = true; + + ASSERT_EQ(Type::String, CreateColumnByType("LowCardinality(String)", create_column_settings)->GetType().GetCode()); + ASSERT_EQ(Type::String, CreateColumnByType("LowCardinality(String)", create_column_settings)->As()->GetType().GetCode()); + + ASSERT_EQ(Type::FixedString, CreateColumnByType("LowCardinality(FixedString(10000))", create_column_settings)->GetType().GetCode()); + ASSERT_EQ(Type::FixedString, CreateColumnByType("LowCardinality(FixedString(10000))", create_column_settings)->As()->GetType().GetCode()); +} + + class ColumnsCaseWithName : public ::testing::TestWithParam {};