Skip to content

Commit 9273cdc

Browse files
MINOR: Bytes lexicographic comparator could use compiler builtin (#21073)
The `Arrays.compare` method has a vectorized compiler intrinsic available and should offer better performance than a hand-rolled loop. The JDK implementation is at least able to compare 8 bytes at a time instead of 1 byte at a time, and avoids repeated bounds-checking. This operation is performance-sensitive if you use in-memory stores, since it is used to navigate the TreeMap backing the store. Reviewers: Sean Quah <[email protected]>, Bill Bejeck <[email protected]>
1 parent 25a8ae0 commit 9273cdc

File tree

3 files changed

+141
-9
lines changed

3 files changed

+141
-9
lines changed

clients/src/main/java/org/apache/kafka/common/utils/Bytes.java

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -194,17 +194,9 @@ public int compare(final byte[] buffer1, int offset1, int length1,
194194
return 0;
195195
}
196196

197-
// similar to Arrays.compare() but considers offset and length
198197
int end1 = offset1 + length1;
199198
int end2 = offset2 + length2;
200-
for (int i = offset1, j = offset2; i < end1 && j < end2; i++, j++) {
201-
int a = buffer1[i] & 0xff;
202-
int b = buffer2[j] & 0xff;
203-
if (a != b) {
204-
return a - b;
205-
}
206-
}
207-
return length1 - length2;
199+
return Arrays.compareUnsigned(buffer1, offset1, end1, buffer2, offset2, end2);
208200
}
209201
}
210202
}

clients/src/test/java/org/apache/kafka/common/utils/BytesTest.java

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,15 @@
1818

1919
import org.junit.jupiter.api.Test;
2020

21+
import java.nio.charset.StandardCharsets;
2122
import java.util.Comparator;
2223
import java.util.NavigableMap;
2324
import java.util.TreeMap;
2425

2526
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
2627
import static org.junit.jupiter.api.Assertions.assertEquals;
2728
import static org.junit.jupiter.api.Assertions.assertThrows;
29+
import static org.junit.jupiter.api.Assertions.assertTrue;
2830

2931
public class BytesTest {
3032

@@ -81,4 +83,37 @@ public void testIncrementWithSubmap() {
8183

8284
assertEquals(subMapExpected.keySet(), subMapResults.keySet());
8385
}
86+
87+
@Test
88+
public void testBytesLexicographicCases() {
89+
assertEquals(0, cmp("", ""));
90+
assertTrue(cmp("", "aaa") < 0);
91+
assertTrue(cmp("aaa", "") > 0);
92+
93+
assertEquals(0, cmp("aaa", "aaa"));
94+
assertTrue(cmp("aaa", "bbb") < 0);
95+
assertTrue(cmp("bbb", "aaa") > 0);
96+
97+
assertTrue(cmp("aaaaaa", "bbb") < 0);
98+
assertTrue(cmp("aaa", "bbbbbb") < 0);
99+
assertTrue(cmp("bbbbbb", "aaa") > 0);
100+
assertTrue(cmp("bbb", "aaaaaa") > 0);
101+
102+
assertTrue(cmp("common_prefix_aaa", "common_prefix_bbb") < 0);
103+
assertTrue(cmp("common_prefix_bbb", "common_prefix_aaa") > 0);
104+
105+
assertTrue(cmp("common_prefix_aaaaaa", "common_prefix_bbb") < 0);
106+
assertTrue(cmp("common_prefix_aaa", "common_prefix_bbbbbb") < 0);
107+
assertTrue(cmp("common_prefix_bbbbbb", "common_prefix_aaa") > 0);
108+
assertTrue(cmp("common_prefix_bbb", "common_prefix_aaaaaa") > 0);
109+
110+
assertTrue(cmp("common_prefix", "common_prefix_aaa") < 0);
111+
assertTrue(cmp("common_prefix_aaa", "common_prefix") > 0);
112+
}
113+
114+
private int cmp(String l, String r) {
115+
return Bytes.BYTES_LEXICO_COMPARATOR.compare(
116+
l.getBytes(StandardCharsets.UTF_8),
117+
r.getBytes(StandardCharsets.UTF_8));
118+
}
84119
}
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.kafka.jmh.util;
18+
19+
import org.apache.kafka.common.utils.Bytes;
20+
21+
import org.openjdk.jmh.annotations.Benchmark;
22+
import org.openjdk.jmh.annotations.Fork;
23+
import org.openjdk.jmh.annotations.Measurement;
24+
import org.openjdk.jmh.annotations.OutputTimeUnit;
25+
import org.openjdk.jmh.annotations.Param;
26+
import org.openjdk.jmh.annotations.Scope;
27+
import org.openjdk.jmh.annotations.Setup;
28+
import org.openjdk.jmh.annotations.State;
29+
import org.openjdk.jmh.annotations.Warmup;
30+
import org.openjdk.jmh.infra.Blackhole;
31+
32+
import java.util.TreeMap;
33+
import java.util.concurrent.TimeUnit;
34+
35+
@State(Scope.Benchmark)
36+
@OutputTimeUnit(TimeUnit.MILLISECONDS)
37+
@Fork(2)
38+
@Warmup(iterations = 5, time = 1)
39+
@Measurement(iterations = 10, time = 1)
40+
public class BytesCompareBenchmark {
41+
private static final int TREE_SIZE = 10240;
42+
43+
@Param({"8", "16", "32", "128", "1024"})
44+
private int bytes;
45+
46+
private byte[][] tv;
47+
private TreeMap<byte[], Integer> oldMap = new TreeMap<>(new HandwrittenLexicoComparator());
48+
private TreeMap<byte[], Integer> newMap = new TreeMap<>(Bytes.BYTES_LEXICO_COMPARATOR);
49+
50+
@Setup
51+
public void setup() {
52+
tv = new byte[TREE_SIZE][bytes];
53+
for (int i = 0; i < TREE_SIZE; i++) {
54+
tv[i][bytes - 4] = (byte) (i >>> 24);
55+
tv[i][bytes - 3] = (byte) (i >>> 16);
56+
tv[i][bytes - 2] = (byte) (i >>> 8);
57+
tv[i][bytes - 1] = (byte) i;
58+
oldMap.put(tv[i], i);
59+
newMap.put(tv[i], i);
60+
}
61+
}
62+
63+
@Benchmark
64+
public void samePrefixLexicoCustom(Blackhole bh) {
65+
for (int i = 0; i < TREE_SIZE; i++) {
66+
bh.consume(oldMap.get(tv[i]));
67+
}
68+
}
69+
70+
@Benchmark
71+
public void samePrefixLexicoJdk(Blackhole bh) {
72+
for (int i = 0; i < TREE_SIZE; i++) {
73+
bh.consume(newMap.get(tv[i]));
74+
}
75+
}
76+
77+
static class HandwrittenLexicoComparator implements Bytes.ByteArrayComparator {
78+
@Override
79+
public int compare(byte[] buffer1, byte[] buffer2) {
80+
return compare(buffer1, 0, buffer1.length, buffer2, 0, buffer2.length);
81+
}
82+
83+
public int compare(final byte[] buffer1, int offset1, int length1,
84+
final byte[] buffer2, int offset2, int length2) {
85+
86+
// short circuit equal case
87+
if (buffer1 == buffer2 &&
88+
offset1 == offset2 &&
89+
length1 == length2) {
90+
return 0;
91+
}
92+
93+
int end1 = offset1 + length1;
94+
int end2 = offset2 + length2;
95+
for (int i = offset1, j = offset2; i < end1 && j < end2; i++, j++) {
96+
int a = buffer1[i] & 0xff;
97+
int b = buffer2[j] & 0xff;
98+
if (a != b) {
99+
return a - b;
100+
}
101+
}
102+
return length1 - length2;
103+
}
104+
}
105+
}

0 commit comments

Comments
 (0)