Skip to content

Commit 66e9c54

Browse files
authored
Merge pull request #202 from 1261385937/master
optimize ColumnString Append
2 parents 84051f5 + 2bbc74a commit 66e9c54

File tree

3 files changed

+69
-11
lines changed

3 files changed

+69
-11
lines changed

clickhouse/columns/string.cpp

Lines changed: 43 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,11 @@
44
#include "../base/wire_format.h"
55

66
namespace {
7-
const size_t DEFAULT_BLOCK_SIZE = 4096;
7+
8+
constexpr size_t DEFAULT_BLOCK_SIZE = 4096;
89

910
template <typename Container>
10-
size_t ComputeTotalSize(const Container & strings, size_t begin = 0, size_t len = -1)
11-
{
11+
size_t ComputeTotalSize(const Container & strings, size_t begin = 0, size_t len = -1) {
1212
size_t result = 0;
1313
if (begin < strings.size()) {
1414
len = std::min(len, strings.size() - begin);
@@ -64,8 +64,7 @@ std::string_view ColumnFixedString::operator [](size_t n) const {
6464
return std::string_view(&data_[pos], string_size_);
6565
}
6666

67-
size_t ColumnFixedString::FixedSize() const
68-
{
67+
size_t ColumnFixedString::FixedSize() const {
6968
return string_size_;
7069
}
7170

@@ -126,8 +125,8 @@ struct ColumnString::Block
126125

127126
explicit Block(size_t starting_capacity)
128127
: size(0),
129-
capacity(starting_capacity),
130-
data_(new CharT[capacity])
128+
capacity(starting_capacity),
129+
data_(new CharT[capacity])
131130
{}
132131

133132
inline auto GetAvailable() const
@@ -167,8 +166,8 @@ ColumnString::ColumnString()
167166
{
168167
}
169168

170-
ColumnString::ColumnString(const std::vector<std::string> & data)
171-
: Column(Type::CreateString())
169+
ColumnString::ColumnString(const std::vector<std::string>& data)
170+
: ColumnString()
172171
{
173172
items_.reserve(data.size());
174173
blocks_.emplace_back(ComputeTotalSize(data));
@@ -177,6 +176,18 @@ ColumnString::ColumnString(const std::vector<std::string> & data)
177176
{
178177
AppendUnsafe(s);
179178
}
179+
};
180+
181+
ColumnString::ColumnString(std::vector<std::string>&& data)
182+
: ColumnString()
183+
{
184+
items_.reserve(data.size());
185+
186+
for (auto&& d : data) {
187+
append_data_.emplace_back(std::move(d));
188+
auto& last_data = append_data_.back();
189+
items_.emplace_back(std::string_view{ last_data.data(),last_data.length() });
190+
}
180191
}
181192

182193
ColumnString::~ColumnString()
@@ -191,14 +202,34 @@ void ColumnString::Append(std::string_view str) {
191202
items_.emplace_back(blocks_.back().AppendUnsafe(str));
192203
}
193204

194-
void ColumnString::AppendUnsafe(std::string_view str)
195-
{
205+
void ColumnString::Append(const char* str) {
206+
auto len = strlen(str);
207+
if (blocks_.size() == 0 || blocks_.back().GetAvailable() < len) {
208+
blocks_.emplace_back(std::max(DEFAULT_BLOCK_SIZE, len));
209+
}
210+
211+
items_.emplace_back(blocks_.back().AppendUnsafe(str));
212+
}
213+
214+
void ColumnString::Append(std::string&& steal_value) {
215+
append_data_.emplace_back(std::move(steal_value));
216+
auto& last_data = append_data_.back();
217+
items_.emplace_back(std::string_view{ last_data.data(),last_data.length() });
218+
}
219+
220+
void ColumnString::AppendNoManagedLifetime(std::string_view str) {
221+
items_.emplace_back(str);
222+
}
223+
224+
void ColumnString::AppendUnsafe(std::string_view str) {
196225
items_.emplace_back(blocks_.back().AppendUnsafe(str));
197226
}
198227

199228
void ColumnString::Clear() {
200229
items_.clear();
201230
blocks_.clear();
231+
append_data_.clear();
232+
append_data_.shrink_to_fit();
202233
}
203234

204235
std::string_view ColumnString::At(size_t n) const {
@@ -283,6 +314,7 @@ void ColumnString::Swap(Column& other) {
283314
auto & col = dynamic_cast<ColumnString &>(other);
284315
items_.swap(col.items_);
285316
blocks_.swap(col.blocks_);
317+
append_data_.swap(col.append_data_);
286318
}
287319

288320
ItemView ColumnString::GetItem(size_t index) const {

clickhouse/columns/string.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include <string_view>
77
#include <utility>
88
#include <vector>
9+
#include <deque>
910

1011
namespace clickhouse {
1112

@@ -78,12 +79,23 @@ class ColumnString : public Column {
7879
~ColumnString();
7980

8081
explicit ColumnString(const std::vector<std::string> & data);
82+
explicit ColumnString(std::vector<std::string>&& data);
8183
ColumnString& operator=(const ColumnString&) = delete;
8284
ColumnString(const ColumnString&) = delete;
8385

8486
/// Appends one element to the column.
8587
void Append(std::string_view str);
8688

89+
/// Appends one element to the column.
90+
void Append(const char* str);
91+
92+
/// Appends one element to the column.
93+
void Append(std::string&& steal_value);
94+
95+
/// Appends one element to the column.
96+
/// If str lifetime is managed elsewhere and guaranteed to outlive the Block sent to the server
97+
void AppendNoManagedLifetime(std::string_view str);
98+
8799
/// Returns element at given row number.
88100
std::string_view At(size_t n) const;
89101

@@ -120,6 +132,7 @@ class ColumnString : public Column {
120132

121133
std::vector<std::string_view> items_;
122134
std::vector<Block> blocks_;
135+
std::deque<std::string> append_data_;
123136
};
124137

125138
}

ut/columns_ut.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,19 @@ TEST(ColumnsCase, StringInit) {
111111
ASSERT_EQ(col->At(3), "abcd");
112112
}
113113

114+
TEST(ColumnsCase, StringAppend) {
115+
auto col = std::make_shared<ColumnString>();
116+
const char* expected = "ufiudhf3493fyiudferyer3yrifhdflkdjfeuroe";
117+
std::string data(expected);
118+
col->Append(data);
119+
col->Append(std::move(data));
120+
col->Append("11");
121+
122+
ASSERT_EQ(col->Size(), 3u);
123+
ASSERT_EQ(col->At(0), expected);
124+
ASSERT_EQ(col->At(1), expected);
125+
ASSERT_EQ(col->At(2), "11");
126+
}
114127

115128
TEST(ColumnsCase, TupleAppend){
116129
auto tuple1 = std::make_shared<ColumnTuple>(std::vector<ColumnRef>({

0 commit comments

Comments
 (0)