@@ -1174,7 +1174,9 @@ Status VerticalSegmentWriter::write_batch() {
1174
1174
vectorized::IOlapColumnDataAccessor* seq_column = nullptr ;
1175
1175
// the key is cluster key column unique id
1176
1176
std::map<uint32_t , vectorized::IOlapColumnDataAccessor*> cid_to_column;
1177
+ int64_t total_data_size = 0 ;
1177
1178
for (uint32_t cid = 0 ; cid < _tablet_schema->num_columns (); ++cid) {
1179
+ int64_t column_data_size = 0 ;
1178
1180
RETURN_IF_ERROR (_create_column_writer (cid, _tablet_schema->column (cid), _tablet_schema));
1179
1181
for (auto & data : _batched_blocks) {
1180
1182
RETURN_IF_ERROR (_olap_data_convertor->set_source_content_with_specifid_columns (
@@ -1200,16 +1202,28 @@ Status VerticalSegmentWriter::write_batch() {
1200
1202
}
1201
1203
RETURN_IF_ERROR (_column_writers[cid]->append (column->get_nullmap (), column->get_data (),
1202
1204
data.num_rows ));
1203
- _olap_data_convertor->clear_source_content ();
1204
- }
1205
- if (_data_dir != nullptr &&
1206
- _data_dir->reach_capacity_limit (_column_writers[cid]->estimate_buffer_size ())) {
1207
- return Status::Error<DISK_REACH_CAPACITY_LIMIT>(" disk {} exceed capacity limit." ,
1208
- _data_dir->path_hash ());
1205
+
1206
+ const auto * block = data.block ;
1207
+ for (size_t row_id = 0 ; row_id < block->rows (); ++row_id) {
1208
+ const auto & stringRef = block->get_by_position (cid).column ->get_data_at (row_id);
1209
+ total_data_size += stringRef.size ;
1210
+ column_data_size += stringRef.size ;
1211
+
1212
+ _olap_data_convertor->clear_source_content ();
1213
+ }
1214
+
1215
+ _footer.mutable_columns (cid)->set_total_data_size (column_data_size);
1216
+
1217
+ if (_data_dir != nullptr &&
1218
+ _data_dir->reach_capacity_limit (_column_writers[cid]->estimate_buffer_size ())) {
1219
+ return Status::Error<DISK_REACH_CAPACITY_LIMIT>(" disk {} exceed capacity limit." ,
1220
+ _data_dir->path_hash ());
1221
+ }
1222
+ RETURN_IF_ERROR (_column_writers[cid]->finish ());
1223
+ RETURN_IF_ERROR (_column_writers[cid]->write_data ());
1209
1224
}
1210
- RETURN_IF_ERROR (_column_writers[cid]->finish ());
1211
- RETURN_IF_ERROR (_column_writers[cid]->write_data ());
1212
1225
}
1226
+ _footer.set_data_footprint (total_data_size);
1213
1227
1214
1228
for (auto & data : _batched_blocks) {
1215
1229
_olap_data_convertor->set_source_content (data.block , data.row_pos , data.num_rows );
0 commit comments