1
1
from .base import Partitioner
2
2
3
- class HashedPartitioner (Partitioner ):
3
+
4
+ class Murmur2Partitioner (Partitioner ):
4
5
"""
6
+ Implements a partitioner which selects the target partition based on
7
+ the hash of the key. Attempts to apply the same hashing
8
+ function as mainline java client.
9
+ """
10
+ def partition (self , key , partitions = None ):
11
+ if not partitions :
12
+ partitions = self .partitions
13
+
14
+ # https://github.com/apache/kafka/blob/0.8.2/clients/src/main/java/org/apache/kafka/clients/producer/internals/Partitioner.java#L69
15
+ idx = (murmur2 (key ) & 0x7fffffff ) % len (partitions )
16
+
17
+ return partitions [idx ]
18
+
19
+
20
+ class LegacyPartitioner (Partitioner ):
21
+ """DEPRECATED -- See Issue 374
22
+
5
23
Implements a partitioner which selects the target partition based on
6
24
the hash of the key
7
25
"""
@@ -12,3 +30,79 @@ def partition(self, key, partitions=None):
12
30
idx = hash (key ) % size
13
31
14
32
return partitions [idx ]
33
+
34
+
35
+ # Default will change to Murmur2 in 0.10 release
36
+ HashedPartitioner = LegacyPartitioner
37
+
38
+
39
+ # https://github.com/apache/kafka/blob/0.8.2/clients/src/main/java/org/apache/kafka/common/utils/Utils.java#L244
40
+ def murmur2 (key ):
41
+ """Pure-python Murmur2 implementation.
42
+
43
+ Based on java client, see org.apache.kafka.common.utils.Utils.murmur2
44
+
45
+ Args:
46
+ key: if not a bytearray, converted via bytearray(str(key))
47
+
48
+ Returns: MurmurHash2 of key bytearray
49
+ """
50
+
51
+ # Convert key to a bytearray
52
+ if not isinstance (key , bytearray ):
53
+ data = bytearray (str (key ))
54
+
55
+ length = len (data )
56
+ seed = 0x9747b28c
57
+ # 'm' and 'r' are mixing constants generated offline.
58
+ # They're not really 'magic', they just happen to work well.
59
+ m = 0x5bd1e995
60
+ r = 24
61
+
62
+ # Initialize the hash to a random value
63
+ h = seed ^ length
64
+ length4 = length / 4
65
+
66
+ for i in range (length4 ):
67
+ i4 = i * 4
68
+ k = ((data [i4 + 0 ] & 0xff ) +
69
+ ((data [i4 + 1 ] & 0xff ) << 8 ) +
70
+ ((data [i4 + 2 ] & 0xff ) << 16 ) +
71
+ ((data [i4 + 3 ] & 0xff ) << 24 ))
72
+ k &= 0xffffffff
73
+ k *= m
74
+ k &= 0xffffffff
75
+ k ^= (k % 0x100000000 ) >> r # k ^= k >>> r
76
+ k &= 0xffffffff
77
+ k *= m
78
+ k &= 0xffffffff
79
+
80
+ h *= m
81
+ h &= 0xffffffff
82
+ h ^= k
83
+ h &= 0xffffffff
84
+
85
+ # Handle the last few bytes of the input array
86
+ extra_bytes = length % 4
87
+ if extra_bytes == 3 :
88
+ h ^= (data [(length & ~ 3 ) + 2 ] & 0xff ) << 16
89
+ h &= 0xffffffff
90
+
91
+ if extra_bytes == 2 :
92
+ h ^= (data [(length & ~ 3 ) + 1 ] & 0xff ) << 8
93
+ h &= 0xffffffff
94
+
95
+ if extra_bytes == 1 :
96
+ h ^= (data [length & ~ 3 ] & 0xff )
97
+ h &= 0xffffffff
98
+ h *= m
99
+ h &= 0xffffffff
100
+
101
+ h ^= (h % 0x100000000 ) >> 13 # h >>> 13;
102
+ h &= 0xffffffff
103
+ h *= m
104
+ h &= 0xffffffff
105
+ h ^= (h % 0x100000000 ) >> 15 # h >>> 15;
106
+ h &= 0xffffffff
107
+
108
+ return h
0 commit comments