gf2121 opened a new pull request #2139: URL: https://github.com/apache/lucene-solr/pull/2139
# Description In `decode6()` `decode7()` `decode14()` `decode15()` `decode24`, longs always `&` a same mask and do some shift. By printing assemble language, i find that JIT did not optimize them with SIMD instructions. But when we extract all `&` operations and do them first, JIT will use SIMD to optimize them. # Tests Java Version: > java version "11.0.6" 2020-01-14 LTS > Java(TM) SE Runtime Environment 18.9 (build 11.0.6+8-LTS) > Java HotSpot(TM) 64-Bit Server VM 18.9 (build 11.0.6+8-LTS, mixed mode) Using `decode15` as an example, here is a microbenchmark based on JMH: **code** ``` @Benchmark @BenchmarkMode({Mode.Throughput}) @Fork(1) @Measurement(iterations = 10, time = 1, timeUnit = TimeUnit.SECONDS) @Warmup(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS) public void decode15a() { for (int iter = 0, tmpIdx = 0, longsIdx = 30; iter < 2; ++iter, tmpIdx += 15, longsIdx += 1) { long l0 = (TMP[tmpIdx+0] & MASK16_1) << 14; l0 |= (TMP[tmpIdx+1] & MASK16_1) << 13; l0 |= (TMP[tmpIdx+2] & MASK16_1) << 12; l0 |= (TMP[tmpIdx+3] & MASK16_1) << 11; l0 |= (TMP[tmpIdx+4] & MASK16_1) << 10; l0 |= (TMP[tmpIdx+5] & MASK16_1) << 9; l0 |= (TMP[tmpIdx+6] & MASK16_1) << 8; l0 |= (TMP[tmpIdx+7] & MASK16_1) << 7; l0 |= (TMP[tmpIdx+8] & MASK16_1) << 6; l0 |= (TMP[tmpIdx+9] & MASK16_1) << 5; l0 |= (TMP[tmpIdx+10] & MASK16_1) << 4; l0 |= (TMP[tmpIdx+11] & MASK16_1) << 3; l0 |= (TMP[tmpIdx+12] & MASK16_1) << 2; l0 |= (TMP[tmpIdx+13] & MASK16_1) << 1; l0 |= (TMP[tmpIdx+14] & MASK16_1) << 0; ARR[longsIdx+0] = l0; } } @Benchmark @BenchmarkMode({Mode.Throughput}) @Fork(1) @Measurement(iterations = 10, time = 1, timeUnit = TimeUnit.SECONDS) @Warmup(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS) public void decode15b() { shiftLongs(TMP, 30, TMP, 0, 0, MASK16_1); for (int iter = 0, tmpIdx = 0, longsIdx = 30; iter < 2; ++iter, tmpIdx += 15, longsIdx += 1) { long l0 = TMP[tmpIdx+0] << 14; l0 |= TMP[tmpIdx+1] << 13; l0 |= TMP[tmpIdx+2] << 12; l0 |= TMP[tmpIdx+3] << 11; l0 |= TMP[tmpIdx+4] << 10; l0 |= TMP[tmpIdx+5] << 9; l0 |= TMP[tmpIdx+6] << 8; l0 |= TMP[tmpIdx+7] << 7; l0 |= TMP[tmpIdx+8] << 6; l0 |= TMP[tmpIdx+9] << 5; l0 |= TMP[tmpIdx+10] << 4; l0 |= TMP[tmpIdx+11] << 3; l0 |= TMP[tmpIdx+12] << 2; l0 |= TMP[tmpIdx+13] << 1; l0 |= TMP[tmpIdx+14] << 0; ARR[longsIdx+0] = l0; } } ``` **Result** ``` Benchmark Mode Cnt Score Error Units MyBenchmark.decode15a thrpt 10 65234108.600 ± 1336311.970 ops/s MyBenchmark.decode15b thrpt 10 106840656.363 ± 448026.092 ops/s ``` And an end-to-end test based on _wikimedium1m_ also looks positive overall: ``` Fuzzy1 131.77 (5.4%) 131.75 (4.2%) -0.0% ( -9% - 10%) 0.990 MedPhrase 146.41 (4.5%) 146.44 (4.8%) 0.0% ( -8% - 9%) 0.992 AndHighMed 643.10 (5.4%) 643.95 (5.5%) 0.1% ( -10% - 11%) 0.939 HighSpanNear 125.99 (5.7%) 126.48 (4.9%) 0.4% ( -9% - 11%) 0.818 Respell 164.81 (4.9%) 165.48 (4.5%) 0.4% ( -8% - 10%) 0.783 HighSloppyPhrase 103.20 (6.2%) 103.65 (5.8%) 0.4% ( -10% - 13%) 0.816 IntNRQ 662.80 (5.0%) 665.87 (5.1%) 0.5% ( -9% - 11%) 0.770 Prefix3 882.57 (6.8%) 887.18 (8.6%) 0.5% ( -13% - 17%) 0.832 LowSloppyPhrase 76.17 (5.5%) 76.57 (5.0%) 0.5% ( -9% - 11%) 0.754 AndHighHigh 236.71 (5.8%) 237.99 (5.2%) 0.5% ( -9% - 12%) 0.756 Fuzzy2 100.40 (5.6%) 101.02 (4.7%) 0.6% ( -9% - 11%) 0.708 OrHighHigh 154.05 (5.4%) 155.08 (5.0%) 0.7% ( -9% - 11%) 0.684 LowPhrase 327.86 (4.4%) 330.10 (4.9%) 0.7% ( -8% - 10%) 0.641 BrowseDayOfYearSSDVFacets 120.00 (5.1%) 120.88 (4.5%) 0.7% ( -8% - 10%) 0.627 MedTerm 2239.68 (6.3%) 2256.94 (5.9%) 0.8% ( -10% - 13%) 0.690 LowTerm 2516.56 (6.1%) 2537.04 (6.3%) 0.8% ( -10% - 14%) 0.679 OrHighMed 594.85 (6.7%) 599.76 (5.2%) 0.8% ( -10% - 13%) 0.664 MedSloppyPhrase 256.82 (5.2%) 259.03 (5.1%) 0.9% ( -9% - 11%) 0.601 PKLookup 221.95 (6.2%) 223.88 (5.6%) 0.9% ( -10% - 13%) 0.641 BrowseMonthSSDVFacets 135.72 (5.9%) 136.94 (5.4%) 0.9% ( -9% - 12%) 0.615 LowSpanNear 668.06 (6.4%) 674.95 (5.1%) 1.0% ( -9% - 13%) 0.572 AndHighLow 1603.74 (7.1%) 1621.34 (5.5%) 1.1% ( -10% - 14%) 0.585 HighTerm 1927.72 (5.4%) 1949.95 (6.6%) 1.2% ( -10% - 13%) 0.547 HighIntervalsOrdered 293.62 (5.8%) 297.01 (5.0%) 1.2% ( -9% - 12%) 0.501 HighPhrase 396.34 (5.4%) 401.03 (5.4%) 1.2% ( -9% - 12%) 0.491 Wildcard 749.60 (7.8%) 759.43 (8.9%) 1.3% ( -14% - 19%) 0.620 MedSpanNear 576.19 (5.8%) 584.48 (5.2%) 1.4% ( -9% - 13%) 0.407 BrowseDayOfYearTaxoFacets 32.34 (7.6%) 32.86 (8.0%) 1.6% ( -12% - 18%) 0.513 BrowseDateTaxoFacets 32.23 (7.7%) 32.76 (8.0%) 1.6% ( -13% - 18%) 0.512 OrHighLow 526.26 (6.7%) 536.54 (6.3%) 2.0% ( -10% - 16%) 0.342 BrowseMonthTaxoFacets 35.48 (9.1%) 36.21 (9.1%) 2.1% ( -14% - 22%) 0.474 HighTermMonthSort 349.19 (12.8%) 364.73 (14.0%) 4.5% ( -19% - 35%) 0.294 HighTermDayOfYearSort 690.75 (11.2%) 724.87 (11.0%) 4.9% ( -15% - 30%) 0.159 ``` ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org