Skip to content

Commit c6ac5ac

Browse files
committed
Make the acceptable compression overhead used by MultiOrdinals configurable and default to PackedInts.FASTEST (causing it to byte align).
Closes elastic#3623 Before this commit , this was the output of TermsFacetSearchBenchmark, on my MacBookAir: ``` ------------------ SUMMARY ------------------------------- name took millis terms_s 7.3s 36 terms_map_s 28.8s 144 terms_l 15.9s 79 terms_map_l 15.5s 77 terms_sm 1m 319 terms_map_sm 4.9m 1491 terms_lm 2.7m 825 terms_map_lm 2.7m 829 terms_stats_s_l 37.6s 188 terms_stats_s_lm 2.4m 722 terms_stats_sm_l 6.5m 1958 ------------------ SUMMARY ------------------------------- ``` After the change to FASTEST, we have: ``` ------------------ SUMMARY ------------------------------- name took millis terms_s 6.9s 34 terms_map_s 28.8s 144 terms_l 17.4s 87 terms_map_l 17.6s 88 terms_sm 42s 210 terms_map_sm 4.2m 1287 terms_lm 2.3m 714 terms_map_lm 2.3m 716 terms_stats_s_l 37.5s 187 terms_stats_s_lm 1.6m 482 terms_stats_sm_l 6.1m 1852 ------------------ SUMMARY ------------------------------- ```
1 parent e33107d commit c6ac5ac

File tree

6 files changed

+603
-59
lines changed

6 files changed

+603
-59
lines changed
Lines changed: 239 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,239 @@
1+
package org.apache.lucene.util.packed;
2+
3+
/*
4+
* Licensed to the Apache Software Foundation (ASF) under one or more
5+
* contributor license agreements. See the NOTICE file distributed with
6+
* this work for additional information regarding copyright ownership.
7+
* The ASF licenses this file to You under the Apache License, Version 2.0
8+
* (the "License"); you may not use this file except in compliance with
9+
* the License. You may obtain a copy of the License at
10+
*
11+
* http://www.apache.org/licenses/LICENSE-2.0
12+
*
13+
* Unless required by applicable law or agreed to in writing, software
14+
* distributed under the License is distributed on an "AS IS" BASIS,
15+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
* See the License for the specific language governing permissions and
17+
* limitations under the License.
18+
*/
19+
20+
import org.apache.lucene.util.ArrayUtil;
21+
import org.apache.lucene.util.RamUsageEstimator;
22+
import org.apache.lucene.util.Version;
23+
import org.elasticsearch.common.lucene.Lucene;
24+
25+
import java.util.Arrays;
26+
27+
import static org.apache.lucene.util.packed.PackedInts.checkBlockSize;
28+
29+
/**
30+
* Common functionality shared by {@link AppendingDeltaPackedLongBuffer} and {@link MonotonicAppendingLongBuffer}.
31+
*/
32+
abstract class XAbstractAppendingLongBuffer {
33+
static {
34+
// LUCENE MONITOR: this should be in Lucene 4.5.
35+
assert Lucene.VERSION == Version.LUCENE_44 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed";
36+
}
37+
38+
39+
static final int MIN_PAGE_SIZE = 64;
40+
// More than 1M doesn't really makes sense with these appending buffers
41+
// since their goal is to try to have small numbers of bits per value
42+
static final int MAX_PAGE_SIZE = 1 << 20;
43+
44+
final int pageShift, pageMask;
45+
PackedInts.Reader[] values;
46+
private long valuesBytes;
47+
int valuesOff;
48+
long[] pending;
49+
int pendingOff;
50+
float acceptableOverheadRatio;
51+
52+
XAbstractAppendingLongBuffer(int initialBlockCount, int pageSize, float acceptableOverheadRatio) {
53+
values = new PackedInts.Reader[initialBlockCount];
54+
pending = new long[pageSize];
55+
pageShift = checkBlockSize(pageSize, MIN_PAGE_SIZE, MAX_PAGE_SIZE);
56+
pageMask = pageSize - 1;
57+
valuesOff = 0;
58+
pendingOff = 0;
59+
this.acceptableOverheadRatio = acceptableOverheadRatio;
60+
}
61+
62+
final int pageSize() {
63+
return pageMask + 1;
64+
}
65+
66+
/**
67+
* Get the number of values that have been added to the buffer.
68+
*/
69+
public final long size() {
70+
long size = pendingOff;
71+
if (valuesOff > 0) {
72+
size += values[valuesOff - 1].size();
73+
}
74+
if (valuesOff > 1) {
75+
size += (long) (valuesOff - 1) * pageSize();
76+
}
77+
return size;
78+
}
79+
80+
/**
81+
* Append a value to this buffer.
82+
*/
83+
public final void add(long l) {
84+
if (pending == null) {
85+
throw new IllegalStateException("This buffer is frozen");
86+
}
87+
if (pendingOff == pending.length) {
88+
// check size
89+
if (values.length == valuesOff) {
90+
final int newLength = ArrayUtil.oversize(valuesOff + 1, 8);
91+
grow(newLength);
92+
}
93+
packPendingValues();
94+
valuesBytes += values[valuesOff].ramBytesUsed();
95+
++valuesOff;
96+
// reset pending buffer
97+
pendingOff = 0;
98+
}
99+
pending[pendingOff++] = l;
100+
}
101+
102+
void grow(int newBlockCount) {
103+
values = Arrays.copyOf(values, newBlockCount);
104+
}
105+
106+
abstract void packPendingValues();
107+
108+
/**
109+
* Get a value from this buffer.
110+
*/
111+
public final long get(long index) {
112+
assert index >= 0 && index < size();
113+
final int block = (int) (index >> pageShift);
114+
final int element = (int) (index & pageMask);
115+
return get(block, element);
116+
}
117+
118+
/**
119+
* Bulk get: read at least one and at most <code>len</code> longs starting
120+
* from <code>index</code> into <code>arr[off:off+len]</code> and return
121+
* the actual number of values that have been read.
122+
*/
123+
public final int get(long index, long[] arr, int off, int len) {
124+
assert len > 0 : "len must be > 0 (got " + len + ")";
125+
assert index >= 0 && index < size();
126+
assert off + len <= arr.length;
127+
128+
int block = (int) (index >> pageShift);
129+
int element = (int) (index & pageMask);
130+
return get(block, element, arr, off, len);
131+
}
132+
133+
134+
abstract long get(int block, int element);
135+
136+
abstract int get(int block, int element, long[] arr, int off, int len);
137+
138+
139+
/**
140+
* Return an iterator over the values of this buffer.
141+
*/
142+
public Iterator iterator() {
143+
return new Iterator();
144+
}
145+
146+
final public class Iterator {
147+
148+
long[] currentValues;
149+
int vOff, pOff;
150+
int currentCount; // number of entries of the current page
151+
152+
Iterator() {
153+
vOff = pOff = 0;
154+
if (valuesOff == 0) {
155+
currentValues = pending;
156+
currentCount = pendingOff;
157+
} else {
158+
currentValues = new long[values[0].size()];
159+
fillValues();
160+
}
161+
}
162+
163+
void fillValues() {
164+
if (vOff == valuesOff) {
165+
currentValues = pending;
166+
currentCount = pendingOff;
167+
} else {
168+
currentCount = values[vOff].size();
169+
for (int k = 0; k < currentCount; ) {
170+
k += get(vOff, k, currentValues, k, currentCount - k);
171+
}
172+
}
173+
}
174+
175+
/**
176+
* Whether or not there are remaining values.
177+
*/
178+
public final boolean hasNext() {
179+
return pOff < currentCount;
180+
}
181+
182+
/**
183+
* Return the next long in the buffer.
184+
*/
185+
public final long next() {
186+
assert hasNext();
187+
long result = currentValues[pOff++];
188+
if (pOff == currentCount) {
189+
vOff += 1;
190+
pOff = 0;
191+
if (vOff <= valuesOff) {
192+
fillValues();
193+
} else {
194+
currentCount = 0;
195+
}
196+
}
197+
return result;
198+
}
199+
200+
}
201+
202+
long baseRamBytesUsed() {
203+
return RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
204+
+ 2 * RamUsageEstimator.NUM_BYTES_OBJECT_REF // the 2 arrays
205+
+ 2 * RamUsageEstimator.NUM_BYTES_INT // the 2 offsets
206+
+ 2 * RamUsageEstimator.NUM_BYTES_INT // pageShift, pageMask
207+
+ RamUsageEstimator.NUM_BYTES_FLOAT // acceptable overhead
208+
+ RamUsageEstimator.NUM_BYTES_LONG; // valuesBytes
209+
}
210+
211+
/**
212+
* Return the number of bytes used by this instance.
213+
*/
214+
public long ramBytesUsed() {
215+
// TODO: this is called per-doc-per-norms/dv-field, can we optimize this?
216+
long bytesUsed = RamUsageEstimator.alignObjectSize(baseRamBytesUsed())
217+
+ (pending != null ? RamUsageEstimator.sizeOf(pending) : 0L)
218+
+ RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + (long) RamUsageEstimator.NUM_BYTES_OBJECT_REF * values.length); // values
219+
220+
return bytesUsed + valuesBytes;
221+
}
222+
223+
/**
224+
* Pack all pending values in this buffer. Subsequent calls to {@link #add(long)} will fail.
225+
*/
226+
public void freeze() {
227+
if (pendingOff > 0) {
228+
if (values.length == valuesOff) {
229+
grow(valuesOff + 1); // don't oversize!
230+
}
231+
packPendingValues();
232+
valuesBytes += values[valuesOff].ramBytesUsed();
233+
++valuesOff;
234+
pendingOff = 0;
235+
}
236+
pending = null;
237+
}
238+
239+
}
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
package org.apache.lucene.util.packed;
2+
3+
/*
4+
* Licensed to the Apache Software Foundation (ASF) under one or more
5+
* contributor license agreements. See the NOTICE file distributed with
6+
* this work for additional information regarding copyright ownership.
7+
* The ASF licenses this file to You under the Apache License, Version 2.0
8+
* (the "License"); you may not use this file except in compliance with
9+
* the License. You may obtain a copy of the License at
10+
*
11+
* http://www.apache.org/licenses/LICENSE-2.0
12+
*
13+
* Unless required by applicable law or agreed to in writing, software
14+
* distributed under the License is distributed on an "AS IS" BASIS,
15+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
* See the License for the specific language governing permissions and
17+
* limitations under the License.
18+
*/
19+
20+
21+
import org.apache.lucene.util.Version;
22+
import org.elasticsearch.common.lucene.Lucene;
23+
24+
/**
25+
* Utility class to buffer a list of signed longs in memory. This class only
26+
* supports appending and is optimized for non-negative numbers with a uniform distribution over a fixed (limited) range
27+
*
28+
* @lucene.internal
29+
*/
30+
public final class XAppendingPackedLongBuffer extends XAbstractAppendingLongBuffer {
31+
32+
static {
33+
// LUCENE MONITOR: this should be in Lucene 4.5.
34+
assert Lucene.VERSION == Version.LUCENE_44 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed";
35+
}
36+
37+
38+
/**
39+
* {@link XAppendingPackedLongBuffer}
40+
*
41+
* @param initialPageCount the initial number of pages
42+
* @param pageSize the size of a single page
43+
* @param acceptableOverheadRatio an acceptable overhead ratio per value
44+
*/
45+
public XAppendingPackedLongBuffer(int initialPageCount, int pageSize, float acceptableOverheadRatio) {
46+
super(initialPageCount, pageSize, acceptableOverheadRatio);
47+
}
48+
49+
/**
50+
* Create an {@link XAppendingPackedLongBuffer} with initialPageCount=16,
51+
* pageSize=1024 and acceptableOverheadRatio={@link PackedInts#DEFAULT}
52+
*/
53+
public XAppendingPackedLongBuffer() {
54+
this(16, 1024, PackedInts.DEFAULT);
55+
}
56+
57+
/**
58+
* Create an {@link XAppendingPackedLongBuffer} with initialPageCount=16,
59+
* pageSize=1024
60+
*/
61+
public XAppendingPackedLongBuffer(float acceptableOverheadRatio) {
62+
this(16, 1024, acceptableOverheadRatio);
63+
}
64+
65+
@Override
66+
long get(int block, int element) {
67+
if (block == valuesOff) {
68+
return pending[element];
69+
} else {
70+
return values[block].get(element);
71+
}
72+
}
73+
74+
@Override
75+
int get(int block, int element, long[] arr, int off, int len) {
76+
if (block == valuesOff) {
77+
int sysCopyToRead = Math.min(len, pendingOff - element);
78+
System.arraycopy(pending, element, arr, off, sysCopyToRead);
79+
return sysCopyToRead;
80+
} else {
81+
/* packed block */
82+
return values[block].get(element, arr, off, len);
83+
}
84+
}
85+
86+
@Override
87+
void packPendingValues() {
88+
// compute max delta
89+
long minValue = pending[0];
90+
long maxValue = pending[0];
91+
for (int i = 1; i < pendingOff; ++i) {
92+
minValue = Math.min(minValue, pending[i]);
93+
maxValue = Math.max(maxValue, pending[i]);
94+
}
95+
96+
97+
// build a new packed reader
98+
final int bitsRequired = minValue < 0 ? 64 : PackedInts.bitsRequired(maxValue);
99+
final PackedInts.Mutable mutable = PackedInts.getMutable(pendingOff, bitsRequired, acceptableOverheadRatio);
100+
for (int i = 0; i < pendingOff; ) {
101+
i += mutable.set(i, pending, i, pendingOff - i);
102+
}
103+
values[valuesOff] = mutable;
104+
105+
}
106+
107+
}

0 commit comments

Comments
 (0)