Skip to content

Commit 6f0a488

Browse files
committed
1
1 parent adda628 commit 6f0a488

File tree

3 files changed

+35
-8
lines changed

3 files changed

+35
-8
lines changed

be/src/olap/rowset/segment_v2/segment_writer.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -792,11 +792,13 @@ Status SegmentWriter::append_block(const vectorized::Block* block, size_t row_po
792792
short_key_pos.push_back(_short_key_row_pos - _num_rows_written);
793793
}
794794
}
795+
int64_t total_data_size = 0;
795796

796797
// convert column data from engine format to storage layer format
797798
std::vector<vectorized::IOlapColumnDataAccessor*> key_columns;
798799
vectorized::IOlapColumnDataAccessor* seq_column = nullptr;
799800
for (size_t id = 0; id < _column_writers.size(); ++id) {
801+
int64_t column_data_size = 0;
800802
// olap data convertor alway start from id = 0
801803
auto converted_result = _olap_data_convertor->convert_column_data(id);
802804
if (!converted_result.first.ok()) {
@@ -811,7 +813,16 @@ Status SegmentWriter::append_block(const vectorized::Block* block, size_t row_po
811813
}
812814
RETURN_IF_ERROR(_column_writers[id]->append(converted_result.second->get_nullmap(),
813815
converted_result.second->get_data(), num_rows));
816+
for (size_t row_id = 0; row_id < block->rows(); ++row_id) {
817+
const auto& stringRef = block->get_by_position(cid).column->get_data_at(row_id);
818+
total_data_size += stringRef.size;
819+
column_data_size += stringRef.size;
820+
}
821+
_footer.mutable_columns(id)->set_total_data_size(column_data_size);
814822
}
823+
824+
_footer.set_data_footprint(total_data_size);
825+
815826
if (_has_key) {
816827
if (_is_mow_with_cluster_key()) {
817828
// for now we don't need to query short key index for CLUSTER BY feature,

be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1174,7 +1174,9 @@ Status VerticalSegmentWriter::write_batch() {
11741174
vectorized::IOlapColumnDataAccessor* seq_column = nullptr;
11751175
// the key is cluster key column unique id
11761176
std::map<uint32_t, vectorized::IOlapColumnDataAccessor*> cid_to_column;
1177+
int64_t total_data_size = 0;
11771178
for (uint32_t cid = 0; cid < _tablet_schema->num_columns(); ++cid) {
1179+
int64_t column_data_size = 0;
11781180
RETURN_IF_ERROR(_create_column_writer(cid, _tablet_schema->column(cid), _tablet_schema));
11791181
for (auto& data : _batched_blocks) {
11801182
RETURN_IF_ERROR(_olap_data_convertor->set_source_content_with_specifid_columns(
@@ -1200,16 +1202,28 @@ Status VerticalSegmentWriter::write_batch() {
12001202
}
12011203
RETURN_IF_ERROR(_column_writers[cid]->append(column->get_nullmap(), column->get_data(),
12021204
data.num_rows));
1203-
_olap_data_convertor->clear_source_content();
1204-
}
1205-
if (_data_dir != nullptr &&
1206-
_data_dir->reach_capacity_limit(_column_writers[cid]->estimate_buffer_size())) {
1207-
return Status::Error<DISK_REACH_CAPACITY_LIMIT>("disk {} exceed capacity limit.",
1208-
_data_dir->path_hash());
1205+
1206+
const auto* block = data.block;
1207+
for (size_t row_id = 0; row_id < block->rows(); ++row_id) {
1208+
const auto& stringRef = block->get_by_position(cid).column->get_data_at(row_id);
1209+
total_data_size += stringRef.size;
1210+
column_data_size += stringRef.size;
1211+
1212+
_olap_data_convertor->clear_source_content();
1213+
}
1214+
1215+
_footer.mutable_columns(cid)->set_total_data_size(column_data_size);
1216+
1217+
if (_data_dir != nullptr &&
1218+
_data_dir->reach_capacity_limit(_column_writers[cid]->estimate_buffer_size())) {
1219+
return Status::Error<DISK_REACH_CAPACITY_LIMIT>("disk {} exceed capacity limit.",
1220+
_data_dir->path_hash());
1221+
}
1222+
RETURN_IF_ERROR(_column_writers[cid]->finish());
1223+
RETURN_IF_ERROR(_column_writers[cid]->write_data());
12091224
}
1210-
RETURN_IF_ERROR(_column_writers[cid]->finish());
1211-
RETURN_IF_ERROR(_column_writers[cid]->write_data());
12121225
}
1226+
_footer.set_data_footprint(total_data_size);
12131227

12141228
for (auto& data : _batched_blocks) {
12151229
_olap_data_convertor->set_source_content(data.block, data.row_pos, data.num_rows);

gensrc/proto/segment_v2.proto

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,8 @@ message ColumnMetaPB {
197197
optional bool result_is_nullable = 18; // used on agg_state type
198198
optional string function_name = 19; // used on agg_state type
199199
optional int32 be_exec_version = 20; // used on agg_state type
200+
201+
optional uint64 total_data_size = 21;
200202
}
201203

202204
message PrimaryKeyIndexMetaPB {

0 commit comments

Comments
 (0)