Skip to content

Commit 39796ec

Browse files
committed
Merge branch 'teach_producers_about_compression' into producer_compression
Conflicts: servers/0.8.0/kafka-src test/test_unit.py
2 parents b47bf78 + 671b74a commit 39796ec

File tree

3 files changed

+49
-25
lines changed

3 files changed

+49
-25
lines changed

kafka/producer.py

Lines changed: 33 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,10 @@
1111

1212
from kafka.common import ProduceRequest, TopicAndPartition
1313
from kafka.partitioner import HashedPartitioner
14-
from kafka.protocol import create_message
14+
from kafka.protocol import (
15+
CODEC_NONE, CODEC_GZIP, CODEC_SNAPPY, ALL_CODECS,
16+
create_message, create_gzip_message, create_snappy_message,
17+
)
1518

1619
log = logging.getLogger("kafka")
1720

@@ -21,7 +24,7 @@
2124
STOP_ASYNC_PRODUCER = -1
2225

2326

24-
def _send_upstream(queue, client, batch_time, batch_size,
27+
def _send_upstream(queue, client, codec, batch_time, batch_size,
2528
req_acks, ack_timeout):
2629
"""
2730
Listen on the queue for a specified number of messages or till
@@ -62,7 +65,14 @@ def _send_upstream(queue, client, batch_time, batch_size,
6265

6366
# Send collected requests upstream
6467
reqs = []
65-
for topic_partition, messages in msgset.items():
68+
for topic_partition, msg in msgset.items():
69+
if codec == CODEC_GZIP:
70+
messages = [create_gzip_message(msg)]
71+
elif codec == CODEC_SNAPPY:
72+
messages = [create_snappy_message(msg)]
73+
else:
74+
messages = [create_message(m) for m in msg]
75+
6676
req = ProduceRequest(topic_partition.topic,
6777
topic_partition.partition,
6878
messages)
@@ -102,6 +112,7 @@ class Producer(object):
102112
def __init__(self, client, async=False,
103113
req_acks=ACK_AFTER_LOCAL_WRITE,
104114
ack_timeout=DEFAULT_ACK_TIMEOUT,
115+
codec=None,
105116
batch_send=False,
106117
batch_send_every_n=BATCH_SEND_MSG_COUNT,
107118
batch_send_every_t=BATCH_SEND_DEFAULT_INTERVAL):
@@ -119,11 +130,17 @@ def __init__(self, client, async=False,
119130
self.req_acks = req_acks
120131
self.ack_timeout = ack_timeout
121132

133+
if codec is None:
134+
codec = CODEC_NONE
135+
assert codec in ALL_CODECS
136+
self.codec = codec
137+
122138
if self.async:
123139
self.queue = Queue() # Messages are sent through this queue
124140
self.proc = Process(target=_send_upstream,
125141
args=(self.queue,
126142
self.client.copy(),
143+
self.codec,
127144
batch_send_every_t,
128145
batch_send_every_n,
129146
self.req_acks,
@@ -139,11 +156,16 @@ def send_messages(self, topic, partition, *msg):
139156
"""
140157
if self.async:
141158
for m in msg:
142-
self.queue.put((TopicAndPartition(topic, partition),
143-
create_message(m)))
159+
self.queue.put((TopicAndPartition(topic, partition), m))
144160
resp = []
145161
else:
146-
messages = [create_message(m) for m in msg]
162+
if self.codec == CODEC_GZIP:
163+
messages = [create_gzip_message(msg)]
164+
elif self.codec == CODEC_SNAPPY:
165+
messages = [create_snappy_message(msg)]
166+
else:
167+
messages = [create_message(m) for m in msg]
168+
147169
req = ProduceRequest(topic, partition, messages)
148170
try:
149171
resp = self.client.send_produce_request([req], acks=self.req_acks,
@@ -168,7 +190,7 @@ def stop(self, timeout=1):
168190

169191
class SimpleProducer(Producer):
170192
"""
171-
A simple, round-robbin producer. Each message goes to exactly one partition
193+
A simple, round-robin producer. Each message goes to exactly one partition
172194
173195
Params:
174196
client - The Kafka client instance to use
@@ -189,14 +211,15 @@ class SimpleProducer(Producer):
189211
def __init__(self, client, async=False,
190212
req_acks=Producer.ACK_AFTER_LOCAL_WRITE,
191213
ack_timeout=Producer.DEFAULT_ACK_TIMEOUT,
214+
codec=None,
192215
batch_send=False,
193216
batch_send_every_n=BATCH_SEND_MSG_COUNT,
194217
batch_send_every_t=BATCH_SEND_DEFAULT_INTERVAL,
195218
random_start=False):
196219
self.partition_cycles = {}
197220
self.random_start = random_start
198221
super(SimpleProducer, self).__init__(client, async, req_acks,
199-
ack_timeout, batch_send,
222+
ack_timeout, codec, batch_send,
200223
batch_send_every_n,
201224
batch_send_every_t)
202225

@@ -241,6 +264,7 @@ class KeyedProducer(Producer):
241264
def __init__(self, client, partitioner=None, async=False,
242265
req_acks=Producer.ACK_AFTER_LOCAL_WRITE,
243266
ack_timeout=Producer.DEFAULT_ACK_TIMEOUT,
267+
codec=None,
244268
batch_send=False,
245269
batch_send_every_n=BATCH_SEND_MSG_COUNT,
246270
batch_send_every_t=BATCH_SEND_DEFAULT_INTERVAL):
@@ -250,7 +274,7 @@ def __init__(self, client, partitioner=None, async=False,
250274
self.partitioners = {}
251275

252276
super(KeyedProducer, self).__init__(client, async, req_acks,
253-
ack_timeout, batch_send,
277+
ack_timeout, codec, batch_send,
254278
batch_send_every_n,
255279
batch_send_every_t)
256280

kafka/protocol.py

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,12 @@
1818

1919
log = logging.getLogger("kafka")
2020

21+
ATTRIBUTE_CODEC_MASK = 0x03
22+
CODEC_NONE = 0x00
23+
CODEC_GZIP = 0x01
24+
CODEC_SNAPPY = 0x02
25+
ALL_CODECS = (CODEC_NONE, CODEC_GZIP, CODEC_SNAPPY)
26+
2127

2228
class KafkaProtocol(object):
2329
"""
@@ -32,11 +38,6 @@ class KafkaProtocol(object):
3238
OFFSET_COMMIT_KEY = 8
3339
OFFSET_FETCH_KEY = 9
3440

35-
ATTRIBUTE_CODEC_MASK = 0x03
36-
CODEC_NONE = 0x00
37-
CODEC_GZIP = 0x01
38-
CODEC_SNAPPY = 0x02
39-
4041
###################
4142
# Private API #
4243
###################
@@ -150,17 +151,17 @@ def _decode_message(cls, data, offset):
150151
(key, cur) = read_int_string(data, cur)
151152
(value, cur) = read_int_string(data, cur)
152153

153-
codec = att & KafkaProtocol.ATTRIBUTE_CODEC_MASK
154+
codec = att & ATTRIBUTE_CODEC_MASK
154155

155-
if codec == KafkaProtocol.CODEC_NONE:
156+
if codec == CODEC_NONE:
156157
yield (offset, Message(magic, att, key, value))
157158

158-
elif codec == KafkaProtocol.CODEC_GZIP:
159+
elif codec == CODEC_GZIP:
159160
gz = gzip_decode(value)
160161
for (offset, msg) in KafkaProtocol._decode_message_set_iter(gz):
161162
yield (offset, msg)
162163

163-
elif codec == KafkaProtocol.CODEC_SNAPPY:
164+
elif codec == CODEC_SNAPPY:
164165
snp = snappy_decode(value)
165166
for (offset, msg) in KafkaProtocol._decode_message_set_iter(snp):
166167
yield (offset, msg)
@@ -543,7 +544,7 @@ def create_gzip_message(payloads, key=None):
543544
[create_message(payload) for payload in payloads])
544545

545546
gzipped = gzip_encode(message_set)
546-
codec = KafkaProtocol.ATTRIBUTE_CODEC_MASK & KafkaProtocol.CODEC_GZIP
547+
codec = ATTRIBUTE_CODEC_MASK & CODEC_GZIP
547548

548549
return Message(0, 0x00 | codec, key, gzipped)
549550

@@ -564,6 +565,6 @@ def create_snappy_message(payloads, key=None):
564565
[create_message(payload) for payload in payloads])
565566

566567
snapped = snappy_encode(message_set)
567-
codec = KafkaProtocol.ATTRIBUTE_CODEC_MASK & KafkaProtocol.CODEC_SNAPPY
568+
codec = ATTRIBUTE_CODEC_MASK & CODEC_SNAPPY
568569

569570
return Message(0, 0x00 | codec, key, snapped)

test/test_protocol.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@
1616
snappy_encode, snappy_decode
1717
)
1818
from kafka.protocol import (
19-
create_gzip_message, create_message, create_snappy_message, KafkaProtocol
19+
create_gzip_message, create_message, create_snappy_message, KafkaProtocol,
20+
ATTRIBUTE_CODEC_MASK, CODEC_GZIP, CODEC_SNAPPY
2021
)
2122

2223
class TestProtocol(unittest2.TestCase):
@@ -33,8 +34,7 @@ def test_create_gzip(self):
3334
payloads = ["v1", "v2"]
3435
msg = create_gzip_message(payloads)
3536
self.assertEqual(msg.magic, 0)
36-
self.assertEqual(msg.attributes, KafkaProtocol.ATTRIBUTE_CODEC_MASK &
37-
KafkaProtocol.CODEC_GZIP)
37+
self.assertEqual(msg.attributes, ATTRIBUTE_CODEC_MASK & CODEC_GZIP)
3838
self.assertEqual(msg.key, None)
3939
# Need to decode to check since gzipped payload is non-deterministic
4040
decoded = gzip_decode(msg.value)
@@ -63,8 +63,7 @@ def test_create_snappy(self):
6363
payloads = ["v1", "v2"]
6464
msg = create_snappy_message(payloads)
6565
self.assertEqual(msg.magic, 0)
66-
self.assertEqual(msg.attributes, KafkaProtocol.ATTRIBUTE_CODEC_MASK &
67-
KafkaProtocol.CODEC_SNAPPY)
66+
self.assertEqual(msg.attributes, ATTRIBUTE_CODEC_MASK & CODEC_SNAPPY)
6867
self.assertEqual(msg.key, None)
6968
decoded = snappy_decode(msg.value)
7069
expect = "".join([

0 commit comments

Comments
 (0)