Skip to content

Commit 20405e5

Browse files
committed
Add DefaultRecordBatch implementation aka V2 message format parser/builder.
1 parent d10051b commit 20405e5

15 files changed

+1211
-22
lines changed

kafka/producer/kafka.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from ..codec import has_gzip, has_snappy, has_lz4
1616
from ..metrics import MetricConfig, Metrics
1717
from ..partitioner.default import DefaultPartitioner
18+
from ..record.default_records import DefaultRecordBatchBuilder
1819
from ..record.legacy_records import LegacyRecordBatchBuilder
1920
from ..serializer import Serializer
2021
from ..structs import TopicPartition
@@ -483,15 +484,21 @@ def partitions_for(self, topic):
483484
return self._wait_on_metadata(topic, max_wait)
484485

485486
def _max_usable_produce_magic(self):
486-
if self.config['api_version'] >= (0, 10):
487+
if self.config['api_version'] >= (0, 11):
488+
return 2
489+
elif self.config['api_version'] >= (0, 10):
487490
return 1
488491
else:
489492
return 0
490493

491-
def _estimate_size_in_bytes(self, key, value):
494+
def _estimate_size_in_bytes(self, key, value, headers=[]):
492495
magic = self._max_usable_produce_magic()
493-
return LegacyRecordBatchBuilder.estimate_size_in_bytes(
494-
magic, self.config['compression_type'], key, value)
496+
if magic == 2:
497+
return DefaultRecordBatchBuilder.estimate_size_in_bytes(
498+
key, value, headers)
499+
else:
500+
return LegacyRecordBatchBuilder.estimate_size_in_bytes(
501+
magic, self.config['compression_type'], key, value)
495502

496503
def send(self, topic, value=None, key=None, partition=None, timestamp_ms=None):
497504
"""Publish a message to a topic.

kafka/producer/record_accumulator.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,8 @@ def record_count(self):
5757

5858
def try_append(self, timestamp_ms, key, value):
5959
offset = self.records.next_offset()
60-
checksum, record_size = self.records.append(timestamp_ms, key, value)
60+
checksum, record_size = self.records.append(
61+
timestamp_ms, key, value, headers=[])
6162
if record_size == 0:
6263
return None
6364

kafka/producer/sender.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,11 @@ def _produce_request(self, node_id, acks, timeout, batches):
291291
buf = batch.records.buffer()
292292
produce_records_by_partition[topic][partition] = buf
293293

294-
if self.config['api_version'] >= (0, 10):
294+
kwargs = {}
295+
if self.config['api_version'] >= (0, 11):
296+
version = 3
297+
kwargs = dict(transactional_id=None)
298+
elif self.config['api_version'] >= (0, 10):
295299
version = 2
296300
elif self.config['api_version'] == (0, 9):
297301
version = 1
@@ -302,7 +306,8 @@ def _produce_request(self, node_id, acks, timeout, batches):
302306
timeout=timeout,
303307
topics=[(topic, list(partition_info.items()))
304308
for topic, partition_info
305-
in six.iteritems(produce_records_by_partition)]
309+
in six.iteritems(produce_records_by_partition)],
310+
**kwargs
306311
)
307312

308313
def wakeup(self):

kafka/record/README

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
Module structured mostly based on
2+
kafka/clients/src/main/java/org/apache/kafka/common/record/ module of Java
3+
Client.
4+
5+
See abc.py for abstract declarations. `ABCRecords` is used as a facade to hide
6+
version differences. `ABCRecordBatch` subclasses will implement actual parsers
7+
for different versions (v0/v1 as LegacyBatch and v2 as DefaultBatch. Names
8+
taken from Java).

kafka/record/_crc32c.py

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
#!/usr/bin/env python
2+
#
3+
# Taken from https://cloud.google.com/appengine/docs/standard/python/refdocs/\
4+
# modules/google/appengine/api/files/crc32c?hl=ru
5+
#
6+
# Copyright 2007 Google Inc.
7+
#
8+
# Licensed under the Apache License, Version 2.0 (the "License");
9+
# you may not use this file except in compliance with the License.
10+
# You may obtain a copy of the License at
11+
#
12+
# http://www.apache.org/licenses/LICENSE-2.0
13+
#
14+
# Unless required by applicable law or agreed to in writing, software
15+
# distributed under the License is distributed on an "AS IS" BASIS,
16+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17+
# See the License for the specific language governing permissions and
18+
# limitations under the License.
19+
#
20+
"""Implementation of CRC-32C checksumming as in rfc3720 section B.4.
21+
See http://en.wikipedia.org/wiki/Cyclic_redundancy_check for details on CRC-32C
22+
This code is a manual python translation of c code generated by
23+
pycrc 0.7.1 (http://www.tty1.net/pycrc/). Command line used:
24+
'./pycrc.py --model=crc-32c --generate c --algorithm=table-driven'
25+
"""
26+
27+
import array
28+
29+
CRC_TABLE = (
30+
0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4,
31+
0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb,
32+
0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b,
33+
0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24,
34+
0x105ec76f, 0xe235446c, 0xf165b798, 0x030e349b,
35+
0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384,
36+
0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54,
37+
0x5d1d08bf, 0xaf768bbc, 0xbc267848, 0x4e4dfb4b,
38+
0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a,
39+
0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35,
40+
0xaa64d611, 0x580f5512, 0x4b5fa6e6, 0xb93425e5,
41+
0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa,
42+
0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45,
43+
0xf779deae, 0x05125dad, 0x1642ae59, 0xe4292d5a,
44+
0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a,
45+
0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595,
46+
0x417b1dbc, 0xb3109ebf, 0xa0406d4b, 0x522bee48,
47+
0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957,
48+
0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687,
49+
0x0c38d26c, 0xfe53516f, 0xed03a29b, 0x1f682198,
50+
0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927,
51+
0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38,
52+
0xdbfc821c, 0x2997011f, 0x3ac7f2eb, 0xc8ac71e8,
53+
0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7,
54+
0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096,
55+
0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789,
56+
0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859,
57+
0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46,
58+
0x7198540d, 0x83f3d70e, 0x90a324fa, 0x62c8a7f9,
59+
0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6,
60+
0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36,
61+
0x3cdb9bdd, 0xceb018de, 0xdde0eb2a, 0x2f8b6829,
62+
0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c,
63+
0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93,
64+
0x082f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043,
65+
0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c,
66+
0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3,
67+
0x55326b08, 0xa759e80b, 0xb4091bff, 0x466298fc,
68+
0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c,
69+
0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033,
70+
0xa24bb5a6, 0x502036a5, 0x4370c551, 0xb11b4652,
71+
0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d,
72+
0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d,
73+
0xef087a76, 0x1d63f975, 0x0e330a81, 0xfc588982,
74+
0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d,
75+
0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622,
76+
0x38cc2a06, 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2,
77+
0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed,
78+
0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530,
79+
0x0417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f,
80+
0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff,
81+
0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0,
82+
0xd3d3e1ab, 0x21b862a8, 0x32e8915c, 0xc083125f,
83+
0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540,
84+
0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90,
85+
0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f,
86+
0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee,
87+
0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1,
88+
0x69e9f0d5, 0x9b8273d6, 0x88d28022, 0x7ab90321,
89+
0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e,
90+
0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81,
91+
0x34f4f86a, 0xc69f7b69, 0xd5cf889d, 0x27a40b9e,
92+
0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e,
93+
0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351,
94+
)
95+
96+
CRC_INIT = 0
97+
_MASK = 0xFFFFFFFF
98+
99+
100+
def crc_update(crc, data):
101+
"""Update CRC-32C checksum with data.
102+
Args:
103+
crc: 32-bit checksum to update as long.
104+
data: byte array, string or iterable over bytes.
105+
Returns:
106+
32-bit updated CRC-32C as long.
107+
"""
108+
if type(data) != array.array or data.itemsize != 1:
109+
buf = array.array("B", data)
110+
else:
111+
buf = data
112+
crc = crc ^ _MASK
113+
for b in buf:
114+
table_index = (crc ^ b) & 0xff
115+
crc = (CRC_TABLE[table_index] ^ (crc >> 8)) & _MASK
116+
return crc ^ _MASK
117+
118+
119+
def crc_finalize(crc):
120+
"""Finalize CRC-32C checksum.
121+
This function should be called as last step of crc calculation.
122+
Args:
123+
crc: 32-bit checksum as long.
124+
Returns:
125+
finalized 32-bit checksum as long
126+
"""
127+
return crc & _MASK
128+
129+
130+
def crc(data):
131+
"""Compute CRC-32C checksum of the data.
132+
Args:
133+
data: byte array, string or iterable over bytes.
134+
Returns:
135+
32-bit CRC-32C checksum of data as long.
136+
"""
137+
return crc_finalize(crc_update(CRC_INIT, data))
138+
139+
140+
if __name__ == "__main__":
141+
import sys
142+
data = sys.stdin.read()
143+
print(hex(crc(data)))

kafka/record/abc.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,12 +36,18 @@ def checksum(self):
3636
be the checksum for v0 and v1 and None for v2 and above.
3737
"""
3838

39+
@abc.abstractproperty
40+
def headers(self):
41+
""" If supported by version list of key-value tuples, or empty list if
42+
not supported by format.
43+
"""
44+
3945

4046
class ABCRecordBatchBuilder(object):
4147
__metaclass__ = abc.ABCMeta
4248

4349
@abc.abstractmethod
44-
def append(self, offset, timestamp, key, value):
50+
def append(self, offset, timestamp, key, value, headers=None):
4551
""" Writes record to internal buffer.
4652
4753
Arguments:
@@ -51,6 +57,8 @@ def append(self, offset, timestamp, key, value):
5157
set to current time.
5258
key (bytes or None): Key of the record
5359
value (bytes or None): Value of the record
60+
headers (List[Tuple[str, bytes]]): Headers of the record. Header
61+
keys can not be ``None``.
5462
5563
Returns:
5664
(bytes, int): Checksum of the written record (or None for v2 and

0 commit comments

Comments
 (0)