Skip to content

Commit 6d8a113

Browse files
authored
[fix](inverted index) create empty idx file when creating a index on variant-type column (#50937) (#51181)
pick from master #50937
1 parent ecc56c2 commit 6d8a113

File tree

5 files changed

+160
-1
lines changed

5 files changed

+160
-1
lines changed

be/src/olap/rowset/segment_v2/inverted_index_file_writer.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include <filesystem>
2424

2525
#include "common/status.h"
26+
#include "io/fs/stream_sink_file_writer.h"
2627
#include "olap/rowset/segment_v2/inverted_index_desc.h"
2728
#include "olap/rowset/segment_v2/inverted_index_fs_directory.h"
2829
#include "olap/rowset/segment_v2/inverted_index_reader.h"
@@ -124,6 +125,10 @@ Status InvertedIndexFileWriter::close() {
124125
DCHECK(!_closed) << debug_string();
125126
_closed = true;
126127
if (_indices_dirs.empty()) {
128+
// An empty file must still be created even if there are no indexes to write
129+
if (dynamic_cast<io::StreamSinkFileWriter*>(_idx_v2_writer.get()) != nullptr) {
130+
return _idx_v2_writer->close();
131+
}
127132
return Status::OK();
128133
}
129134
DBUG_EXECUTE_IF("inverted_index_storage_format_must_be_v2", {

be/src/runtime/load_stream_writer.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,8 @@ Status LoadStreamWriter::close_writer(uint32_t segid, FileType file_type) {
174174
LOG(INFO) << "file " << segid << " path " << file_writer->path().native() << "closed, written "
175175
<< file_writer->bytes_appended() << " bytes"
176176
<< ", file type is " << file_type;
177-
if (file_writer->bytes_appended() == 0) {
177+
// ‌Allow the index file to be empty when creating an index on a variant-type column.
178+
if (file_writer->bytes_appended() == 0 && file_type != FileType::INVERTED_INDEX_FILE) {
178179
return Status::Corruption("file {} closed with 0 bytes, file type is {}",
179180
file_writer->path().native(), file_type);
180181
}
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
#include <brpc/channel.h>
19+
#include <brpc/server.h>
20+
21+
#include "gtest/gtest_pred_impl.h"
22+
#include "io/fs/stream_sink_file_writer.h"
23+
#include "olap/olap_common.h"
24+
#include "olap/rowset/segment_v2/inverted_index_file_writer.h"
25+
#include "vec/sink/load_stream_stub.h"
26+
27+
namespace doris {
28+
29+
constexpr int64_t LOAD_ID_LO = 1;
30+
constexpr int64_t LOAD_ID_HI = 2;
31+
constexpr int64_t NUM_STREAM = 3;
32+
constexpr static std::string_view tmp_dir = "./ut_dir/tmp";
33+
class EmptyIndexFileTest : public testing::Test {
34+
class MockStreamStub : public LoadStreamStub {
35+
public:
36+
MockStreamStub(PUniqueId load_id, int64_t src_id)
37+
: LoadStreamStub(load_id, src_id, std::make_shared<IndexToTabletSchema>(),
38+
std::make_shared<IndexToEnableMoW>()) {};
39+
40+
virtual ~MockStreamStub() = default;
41+
42+
// APPEND_DATA
43+
virtual Status append_data(int64_t partition_id, int64_t index_id, int64_t tablet_id,
44+
int64_t segment_id, uint64_t offset, std::span<const Slice> data,
45+
bool segment_eos = false,
46+
FileType file_type = FileType::SEGMENT_FILE) override {
47+
EXPECT_TRUE(segment_eos);
48+
return Status::OK();
49+
}
50+
};
51+
52+
public:
53+
EmptyIndexFileTest() = default;
54+
~EmptyIndexFileTest() = default;
55+
56+
protected:
57+
virtual void SetUp() {
58+
_load_id.set_hi(LOAD_ID_HI);
59+
_load_id.set_lo(LOAD_ID_LO);
60+
for (int src_id = 0; src_id < NUM_STREAM; src_id++) {
61+
_streams.emplace_back(new MockStreamStub(_load_id, src_id));
62+
}
63+
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(tmp_dir).ok());
64+
EXPECT_TRUE(io::global_local_filesystem()->create_directory(tmp_dir).ok());
65+
std::vector<StorePath> paths;
66+
paths.emplace_back(std::string(tmp_dir), 1024000000);
67+
auto tmp_file_dirs = std::make_unique<segment_v2::TmpFileDirs>(paths);
68+
EXPECT_TRUE(tmp_file_dirs->init().ok());
69+
ExecEnv::GetInstance()->set_tmp_file_dir(std::move(tmp_file_dirs));
70+
}
71+
72+
virtual void TearDown() {
73+
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(tmp_dir).ok());
74+
}
75+
76+
PUniqueId _load_id;
77+
std::vector<std::shared_ptr<LoadStreamStub>> _streams;
78+
};
79+
80+
TEST_F(EmptyIndexFileTest, test_empty_index_file) {
81+
io::FileWriterPtr file_writer = std::make_unique<io::StreamSinkFileWriter>(_streams);
82+
auto fs = io::global_local_filesystem();
83+
std::string index_path = "/tmp/empty_index_file_test";
84+
std::string rowset_id = "1234567890";
85+
int64_t seg_id = 1234567890;
86+
auto index_file_writer = std::make_unique<segment_v2::InvertedIndexFileWriter>(
87+
fs, index_path, rowset_id, seg_id, InvertedIndexStorageFormatPB::V2,
88+
std::move(file_writer));
89+
EXPECT_TRUE(index_file_writer->close().ok());
90+
}
91+
92+
} // namespace doris
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
-- This file is automatically generated. You should know what you did if you want to edit this
2+
-- !sql --
3+
1 "abcd"
4+
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
suite("test_variant_empty_index_file", "p0") {
19+
def tableName = "test_variant_empty_index_file"
20+
sql """ drop table if exists ${tableName} """
21+
// create table
22+
sql """
23+
CREATE TABLE IF NOT EXISTS ${tableName}
24+
(
25+
`id` bigint NOT NULL,
26+
`v` variant NULL,
27+
INDEX v_idx (`v`) USING INVERTED
28+
) DUPLICATE KEY(`id`)
29+
DISTRIBUTED BY HASH (`id`) BUCKETS 1
30+
PROPERTIES (
31+
"replication_allocation" = "tag.location.default: 1",
32+
"inverted_index_storage_format" = "v2",
33+
"disable_auto_compaction" = "true"
34+
);
35+
"""
36+
37+
sql """ set enable_memtable_on_sink_node = true """
38+
sql """ insert into ${tableName} values (1, 'abcd') """
39+
40+
def tablets = sql_return_maparray """ show tablets from ${tableName}; """
41+
42+
def backendId_to_backendIP = [:]
43+
def backendId_to_backendHttpPort = [:]
44+
getBackendIpHttpPort(backendId_to_backendIP, backendId_to_backendHttpPort);
45+
46+
String tablet_id = tablets[0].TabletId
47+
String backend_id = tablets[0].BackendId
48+
String ip = backendId_to_backendIP.get(backend_id)
49+
String port = backendId_to_backendHttpPort.get(backend_id)
50+
if (!isCloudMode()) {
51+
def (code, out, err) = http_client("GET", String.format("http://%s:%s/api/show_nested_index_file?tablet_id=%s", ip, port, tablet_id))
52+
logger.info("Run show_nested_index_file_on_tablet: code=" + code + ", out=" + out + ", err=" + err)
53+
assertEquals("E-6002", parseJson(out.trim()).status)
54+
}
55+
56+
qt_sql """ select * from ${tableName} where v match 'abcd'"""
57+
}

0 commit comments

Comments
 (0)