@@ -303,7 +303,7 @@ class KafkaConsumer(six.Iterator):
303
303
'sasl_kerberos_service_name' : 'kafka' ,
304
304
'sasl_kerberos_domain_name' : None ,
305
305
'sasl_oauth_token_provider' : None ,
306
- 'legacy_iterator' : True , # experimental feature
306
+ 'legacy_iterator' : False , # enable to revert to < 1.4.7 iterator
307
307
}
308
308
DEFAULT_SESSION_TIMEOUT_MS_0_9 = 30000
309
309
@@ -598,7 +598,7 @@ def partitions_for_topic(self, topic):
598
598
partitions = cluster .partitions_for_topic (topic )
599
599
return partitions
600
600
601
- def poll (self , timeout_ms = 0 , max_records = None ):
601
+ def poll (self , timeout_ms = 0 , max_records = None , update_offsets = True ):
602
602
"""Fetch data from assigned topics / partitions.
603
603
604
604
Records are fetched and returned in batches by topic-partition.
@@ -622,6 +622,12 @@ def poll(self, timeout_ms=0, max_records=None):
622
622
dict: Topic to list of records since the last fetch for the
623
623
subscribed list of topics and partitions.
624
624
"""
625
+ # Note: update_offsets is an internal-use only argument. It is used to
626
+ # support the python iterator interface, and which wraps consumer.poll()
627
+ # and requires that the partition offsets tracked by the fetcher are not
628
+ # updated until the iterator returns each record to the user. As such,
629
+ # the argument is not documented and should not be relied on by library
630
+ # users to not break in the future.
625
631
assert timeout_ms >= 0 , 'Timeout must not be negative'
626
632
if max_records is None :
627
633
max_records = self .config ['max_poll_records' ]
@@ -632,7 +638,7 @@ def poll(self, timeout_ms=0, max_records=None):
632
638
start = time .time ()
633
639
remaining = timeout_ms
634
640
while True :
635
- records = self ._poll_once (remaining , max_records )
641
+ records = self ._poll_once (remaining , max_records , update_offsets = update_offsets )
636
642
if records :
637
643
return records
638
644
@@ -642,7 +648,7 @@ def poll(self, timeout_ms=0, max_records=None):
642
648
if remaining <= 0 :
643
649
return {}
644
650
645
- def _poll_once (self , timeout_ms , max_records ):
651
+ def _poll_once (self , timeout_ms , max_records , update_offsets = True ):
646
652
"""Do one round of polling. In addition to checking for new data, this does
647
653
any needed heart-beating, auto-commits, and offset updates.
648
654
@@ -661,7 +667,7 @@ def _poll_once(self, timeout_ms, max_records):
661
667
662
668
# If data is available already, e.g. from a previous network client
663
669
# poll() call to commit, then just return it immediately
664
- records , partial = self ._fetcher .fetched_records (max_records , update_offsets = bool ( self . _iterator ) )
670
+ records , partial = self ._fetcher .fetched_records (max_records , update_offsets = update_offsets )
665
671
if records :
666
672
# Before returning the fetched records, we can send off the
667
673
# next round of fetches and avoid block waiting for their
@@ -681,7 +687,7 @@ def _poll_once(self, timeout_ms, max_records):
681
687
if self ._coordinator .need_rejoin ():
682
688
return {}
683
689
684
- records , _ = self ._fetcher .fetched_records (max_records , update_offsets = bool ( self . _iterator ) )
690
+ records , _ = self ._fetcher .fetched_records (max_records , update_offsets = update_offsets )
685
691
return records
686
692
687
693
def position (self , partition ):
@@ -1089,7 +1095,7 @@ def _update_fetch_positions(self, partitions):
1089
1095
1090
1096
def _message_generator_v2 (self ):
1091
1097
timeout_ms = 1000 * (self ._consumer_timeout - time .time ())
1092
- record_map = self .poll (timeout_ms = timeout_ms )
1098
+ record_map = self .poll (timeout_ms = timeout_ms , update_offsets = False )
1093
1099
for tp , records in six .iteritems (record_map ):
1094
1100
# Generators are stateful, and it is possible that the tp / records
1095
1101
# here may become stale during iteration -- i.e., we seek to a
0 commit comments