1
1
#include " compressed.h"
2
2
#include " wire_format.h"
3
3
#include " output.h"
4
- #include " .. /exceptions.h"
4
+ #include " clickhouse /exceptions.h"
5
5
6
6
#include < city.h>
7
7
#include < lz4.h>
8
8
#include < exception>
9
+ #include < zstd.h>
9
10
#include < stdexcept>
10
11
#include < system_error>
11
12
12
13
namespace {
13
14
constexpr size_t HEADER_SIZE = 9 ;
14
- // see DB::CompressionMethodByte::LZ4 from src/Compression/CompressionInfo.h of ClickHouse project
15
- constexpr uint8_t COMPRESSION_METHOD = 0x82 ;
16
- // Documentation says that compression is faster when output buffer is larger than LZ4_compressBound estimation.
15
+
16
+ // see DB::CompressionMethodByte from src/Compression/CompressionInfo.h of ClickHouse project
17
+ enum class CompressionMethodByte : uint8_t {
18
+ NONE = 0x02 ,
19
+ LZ4 = 0x82 ,
20
+ ZSTD = 0x90 ,
21
+ };
22
+
23
+ // Documentation says that compression is faster when output buffer is larger than LZ4_compressBound/ZSTD_compressBound estimation.
17
24
constexpr size_t EXTRA_COMPRESS_BUFFER_SIZE = 4096 ;
18
25
constexpr size_t DBMS_MAX_COMPRESSED_SIZE = 0x40000000ULL ; // 1GB
19
26
}
@@ -32,7 +39,7 @@ CompressedInput::~CompressedInput() {
32
39
#else
33
40
if (!std::uncaught_exceptions ()) {
34
41
#endif
35
- throw LZ4Error (" some data was not read" );
42
+ throw CompressionError (" some data was not read" );
36
43
}
37
44
}
38
45
}
@@ -60,55 +67,79 @@ bool CompressedInput::Decompress() {
60
67
return false ;
61
68
}
62
69
63
- if (method != COMPRESSION_METHOD) {
64
- throw LZ4Error (" unsupported compression method " + std::to_string (int (method)));
65
- } else {
66
- if (!WireFormat::ReadFixed (*input_, &compressed)) {
67
- return false ;
68
- }
69
- if (!WireFormat::ReadFixed (*input_, &original)) {
70
- return false ;
71
- }
70
+ if (method != static_cast <uint8_t >(CompressionMethodByte::LZ4) && method != static_cast <uint8_t >(CompressionMethodByte::ZSTD)) {
71
+ throw CompressionError (" unsupported compression method " + std::to_string ((method)));
72
+ }
72
73
73
- if (compressed > DBMS_MAX_COMPRESSED_SIZE) {
74
- throw LZ4Error (" compressed data too big" );
75
- }
74
+ if (!WireFormat::ReadFixed (*input_, &compressed)) {
75
+ return false ;
76
+ }
77
+ if (!WireFormat::ReadFixed (*input_, &original)) {
78
+ return false ;
79
+ }
80
+
81
+ if (compressed > DBMS_MAX_COMPRESSED_SIZE) {
82
+ throw CompressionError (" compressed data too big" );
83
+ }
76
84
77
- Buffer tmp (compressed);
85
+ Buffer tmp (compressed);
78
86
79
- // Data header
80
- {
81
- BufferOutput out (&tmp);
82
- out.Write (&method, sizeof (method));
83
- out.Write (&compressed, sizeof (compressed));
84
- out.Write (&original, sizeof (original));
85
- out.Flush ();
87
+ // Data header
88
+ {
89
+ BufferOutput out (&tmp);
90
+ out.Write (&method, sizeof (method));
91
+ out.Write (&compressed, sizeof (compressed));
92
+ out.Write (&original, sizeof (original));
93
+ out.Flush ();
94
+ }
95
+
96
+ if (!WireFormat::ReadBytes (*input_, tmp.data () + HEADER_SIZE, compressed - HEADER_SIZE)) {
97
+ return false ;
98
+ } else {
99
+ if (hash != CityHash128 ((const char *)tmp.data (), compressed)) {
100
+ throw CompressionError (" data was corrupted" );
86
101
}
102
+ }
103
+
104
+ data_ = Buffer (original);
87
105
88
- if (!WireFormat::ReadBytes (*input_, tmp.data () + HEADER_SIZE, compressed - HEADER_SIZE)) {
89
- return false ;
106
+ switch (method) {
107
+ case static_cast <uint8_t >(CompressionMethodByte::LZ4): {
108
+ if (LZ4_decompress_safe ((const char *)tmp.data () + HEADER_SIZE, (char *)data_.data (), static_cast <int >(compressed - HEADER_SIZE), original) < 0 ) {
109
+ throw CompressionError (" can't decompress LZ4-encoded data" );
90
110
} else {
91
- if (hash != CityHash128 ((const char *)tmp.data (), compressed)) {
92
- throw LZ4Error (" data was corrupted" );
93
- }
111
+ mem_.Reset (data_.data (), original);
94
112
}
113
+ return true ;
114
+ }
95
115
96
- data_ = Buffer (original);
116
+ case static_cast <uint8_t >(CompressionMethodByte::ZSTD): {
117
+ size_t res = ZSTD_decompress ((char *)data_.data (), original, (const char *)tmp.data () + HEADER_SIZE, static_cast <int >(compressed - HEADER_SIZE));
97
118
98
- if (LZ4_decompress_safe (( const char *)tmp. data () + HEADER_SIZE, ( char *)data_. data (), static_cast < int >(compressed - HEADER_SIZE), original) < 0 ) {
99
- throw LZ4Error (" can't decompress data" );
119
+ if (ZSTD_isError (res) ) {
120
+ throw CompressionError (" can't decompress ZSTD-encoded data, ZSTD error: " + std::string ( ZSTD_getErrorName (res)) );
100
121
} else {
101
122
mem_.Reset (data_.data (), original);
102
123
}
124
+ return true ;
125
+ }
126
+
127
+ case static_cast <uint8_t >(CompressionMethodByte::NONE): {
128
+ throw CompressionError (" compression method not defined" + std::to_string ((method)));
129
+ }
130
+ default : {
131
+ throw CompressionError (" Unknown or unsupported compression method " + std::to_string ((method)));
132
+ }
103
133
}
104
134
105
135
return true ;
106
136
}
107
137
108
138
109
- CompressedOutput::CompressedOutput (OutputStream * destination, size_t max_compressed_chunk_size)
139
+ CompressedOutput::CompressedOutput (OutputStream * destination, size_t max_compressed_chunk_size, CompressionMethod method )
110
140
: destination_ (destination)
111
141
, max_compressed_chunk_size_ (max_compressed_chunk_size)
142
+ , method_ (method)
112
143
{
113
144
PreallocateCompressBuffer (max_compressed_chunk_size);
114
145
}
@@ -139,37 +170,89 @@ void CompressedOutput::DoFlush() {
139
170
}
140
171
141
172
void CompressedOutput::Compress (const void * data, size_t len) {
142
- const auto compressed_size = LZ4_compress_default (
143
- (const char *)data,
144
- (char *)compressed_buffer_.data () + HEADER_SIZE,
145
- static_cast <int >(len),
146
- static_cast <int >(compressed_buffer_.size () - HEADER_SIZE));
147
- if (compressed_size <= 0 )
148
- throw LZ4Error (" Failed to compress chunk of " + std::to_string (len) + " bytes, "
149
- " LZ4 error: " + std::to_string (compressed_size));
173
+ switch (method_) {
174
+ case clickhouse::CompressionMethod::LZ4: {
175
+ const auto compressed_size = LZ4_compress_default (
176
+ (const char *)data,
177
+ (char *)compressed_buffer_.data () + HEADER_SIZE,
178
+ static_cast <int >(len),
179
+ static_cast <int >(compressed_buffer_.size () - HEADER_SIZE));
180
+ if (compressed_size <= 0 )
181
+ throw CompressionError (" Failed to compress chunk of " + std::to_string (len) + " bytes, "
182
+ " LZ4 error: " + std::to_string (compressed_size));
150
183
151
- {
152
- auto header = compressed_buffer_.data ();
153
- WriteUnaligned (header, COMPRESSION_METHOD);
154
- // Compressed data size with header
155
- WriteUnaligned (header + 1 , static_cast <uint32_t >(compressed_size + HEADER_SIZE));
156
- // Original data size
157
- WriteUnaligned (header + 5 , static_cast <uint32_t >(len));
184
+ {
185
+ auto header = compressed_buffer_.data ();
186
+ WriteUnaligned (header, CompressionMethodByte::LZ4);
187
+ // Compressed data size with header
188
+ WriteUnaligned (header + 1 , static_cast <uint32_t >(compressed_size + HEADER_SIZE));
189
+ // Original data size
190
+ WriteUnaligned (header + 5 , static_cast <uint32_t >(len));
191
+ }
192
+
193
+ WireFormat::WriteFixed (*destination_, CityHash128 ((const char *)compressed_buffer_.data (), compressed_size + HEADER_SIZE));
194
+ WireFormat::WriteBytes (*destination_, compressed_buffer_.data (), compressed_size + HEADER_SIZE);
195
+ break ;
158
196
}
159
197
160
- WireFormat::WriteFixed (*destination_, CityHash128 (
161
- (const char *)compressed_buffer_.data (), compressed_size + HEADER_SIZE));
162
- WireFormat::WriteBytes (*destination_, compressed_buffer_.data (), compressed_size + HEADER_SIZE);
198
+ case clickhouse::CompressionMethod::ZSTD: {
199
+ const size_t compressed_size = ZSTD_compress (
200
+ (char *)compressed_buffer_.data () + HEADER_SIZE,
201
+ static_cast <int >(compressed_buffer_.size () - HEADER_SIZE),
202
+ (const char *)data,
203
+ static_cast <int >(len),
204
+ ZSTD_fast);
205
+ if (ZSTD_isError (compressed_size))
206
+ throw CompressionError (" Failed to compress chunk of " + std::to_string (len) + " bytes, "
207
+ " ZSTD error: " + std::string (ZSTD_getErrorName (compressed_size)));
208
+
209
+ {
210
+ auto header = compressed_buffer_.data ();
211
+ WriteUnaligned (header, CompressionMethodByte::ZSTD);
212
+ // Compressed data size with header
213
+ WriteUnaligned (header + 1 , static_cast <uint32_t >(compressed_size + HEADER_SIZE));
214
+ // Original data size
215
+ WriteUnaligned (header + 5 , static_cast <uint32_t >(len));
216
+ }
217
+
218
+ WireFormat::WriteFixed (*destination_, CityHash128 ((const char *)compressed_buffer_.data (), compressed_size + HEADER_SIZE));
219
+ WireFormat::WriteBytes (*destination_, compressed_buffer_.data (), compressed_size + HEADER_SIZE);
220
+ break ;
221
+ }
222
+
223
+ case clickhouse::CompressionMethod::None: {
224
+ throw CompressionError (" no compression defined" );
225
+ }
226
+ }
163
227
164
228
destination_->Flush ();
165
229
}
166
230
167
231
void CompressedOutput::PreallocateCompressBuffer (size_t input_size) {
168
- const auto estimated_compressed_buffer_size = LZ4_compressBound (static_cast <int >(input_size));
169
- if (estimated_compressed_buffer_size <= 0 )
170
- throw LZ4Error (" Failed to estimate compressed buffer size, LZ4 error: " + std::to_string (estimated_compressed_buffer_size));
232
+ switch (method_) {
233
+ case clickhouse::CompressionMethod::LZ4: {
234
+ const auto estimated_compressed_buffer_size = LZ4_compressBound (static_cast <int >(input_size));
235
+ if (estimated_compressed_buffer_size <= 0 )
236
+ throw CompressionError (" Failed to estimate compressed buffer size, LZ4 error: " + std::to_string (estimated_compressed_buffer_size));
237
+
238
+ compressed_buffer_.resize (estimated_compressed_buffer_size + HEADER_SIZE + EXTRA_COMPRESS_BUFFER_SIZE);
239
+ break ;
240
+ }
171
241
172
- compressed_buffer_.resize (estimated_compressed_buffer_size + HEADER_SIZE + EXTRA_COMPRESS_BUFFER_SIZE);
242
+ case clickhouse::CompressionMethod::ZSTD: {
243
+ const size_t estimated_compressed_buffer_size = ZSTD_compressBound (static_cast <int >(input_size));
244
+ if (ZSTD_isError (estimated_compressed_buffer_size))
245
+ throw CompressionError (" Failed to estimate compressed buffer size, ZSTD error: " + std::string (ZSTD_getErrorName (estimated_compressed_buffer_size)));
246
+
247
+ compressed_buffer_.resize (estimated_compressed_buffer_size + HEADER_SIZE + EXTRA_COMPRESS_BUFFER_SIZE);
248
+ break ;
249
+ }
250
+
251
+ case clickhouse::CompressionMethod::None: {
252
+ // / do nothing
253
+ break ;
254
+ }
255
+ }
173
256
}
174
257
175
258
}
0 commit comments