Skip to content

Commit 71787da

Browse files
bowenlan-amznshiv0408
authored andcommitted
Fix: reset the filter built at segment level for date histogram optimization (opensearch-project#12267)
--------- Signed-off-by: bowenlan-amzn <[email protected]> Signed-off-by: Shivansh Arora <[email protected]>
1 parent 2cc5e56 commit 71787da

File tree

2 files changed

+117
-12
lines changed

2 files changed

+117
-12
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.search.aggregations.bucket;
10+
11+
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
12+
13+
import org.opensearch.action.index.IndexRequestBuilder;
14+
import org.opensearch.action.search.SearchResponse;
15+
import org.opensearch.common.settings.Settings;
16+
import org.opensearch.common.time.DateFormatter;
17+
import org.opensearch.index.query.QueryBuilder;
18+
import org.opensearch.index.query.QueryBuilders;
19+
import org.opensearch.search.aggregations.bucket.histogram.DateHistogramInterval;
20+
import org.opensearch.search.aggregations.bucket.histogram.Histogram;
21+
import org.opensearch.test.OpenSearchIntegTestCase;
22+
import org.opensearch.test.ParameterizedDynamicSettingsOpenSearchIntegTestCase;
23+
24+
import java.time.ZoneOffset;
25+
import java.time.ZonedDateTime;
26+
import java.util.ArrayList;
27+
import java.util.Arrays;
28+
import java.util.Collection;
29+
import java.util.HashMap;
30+
import java.util.HashSet;
31+
import java.util.List;
32+
import java.util.Map;
33+
import java.util.Set;
34+
35+
import static org.opensearch.common.xcontent.XContentFactory.jsonBuilder;
36+
import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING;
37+
import static org.opensearch.search.aggregations.AggregationBuilders.dateHistogram;
38+
import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked;
39+
40+
@OpenSearchIntegTestCase.SuiteScopeTestCase
41+
public class FilterRewriteIT extends ParameterizedDynamicSettingsOpenSearchIntegTestCase {
42+
43+
// simulate segment level match all
44+
private static final QueryBuilder QUERY = QueryBuilders.termQuery("match", true);
45+
private static final Map<String, Long> expected = new HashMap<>();
46+
47+
public FilterRewriteIT(Settings dynamicSettings) {
48+
super(dynamicSettings);
49+
}
50+
51+
@ParametersFactory
52+
public static Collection<Object[]> parameters() {
53+
return Arrays.asList(
54+
new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), false).build() },
55+
new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), true).build() }
56+
);
57+
}
58+
59+
@Override
60+
protected void setupSuiteScopeCluster() throws Exception {
61+
assertAcked(client().admin().indices().prepareCreate("idx").get());
62+
63+
final int segmentCount = randomIntBetween(2, 10);
64+
final Set<Long> longTerms = new HashSet();
65+
66+
final Map<String, Integer> dateTerms = new HashMap<>();
67+
for (int i = 0; i < segmentCount; i++) {
68+
final List<IndexRequestBuilder> indexRequests = new ArrayList<>();
69+
70+
long longTerm;
71+
do {
72+
longTerm = randomInt(segmentCount * 2);
73+
} while (!longTerms.add(longTerm));
74+
ZonedDateTime time = ZonedDateTime.of(2024, 1, ((int) longTerm % 20) + 1, 0, 0, 0, 0, ZoneOffset.UTC);
75+
String dateTerm = DateFormatter.forPattern("yyyy-MM-dd").format(time);
76+
77+
final int frequency = randomBoolean() ? 1 : randomIntBetween(2, 20);
78+
for (int j = 0; j < frequency; j++) {
79+
indexRequests.add(
80+
client().prepareIndex("idx")
81+
.setSource(jsonBuilder().startObject().field("date", dateTerm).field("match", true).endObject())
82+
);
83+
}
84+
expected.put(dateTerm + "T00:00:00.000Z", (long) frequency);
85+
86+
indexRandom(true, false, indexRequests);
87+
}
88+
89+
ensureSearchable();
90+
}
91+
92+
public void testMinDocCountOnDateHistogram() throws Exception {
93+
final SearchResponse allResponse = client().prepareSearch("idx")
94+
.setSize(0)
95+
.setQuery(QUERY)
96+
.addAggregation(dateHistogram("histo").field("date").dateHistogramInterval(DateHistogramInterval.DAY).minDocCount(0))
97+
.get();
98+
99+
final Histogram allHisto = allResponse.getAggregations().get("histo");
100+
Map<String, Long> results = new HashMap<>();
101+
allHisto.getBuckets().forEach(bucket -> results.put(bucket.getKeyAsString(), bucket.getDocCount()));
102+
103+
for (Map.Entry<String, Long> entry : expected.entrySet()) {
104+
assertEquals(entry.getValue(), results.get(entry.getKey()));
105+
}
106+
}
107+
}

server/src/main/java/org/opensearch/search/aggregations/bucket/FastFilterRewriteHelper.java

+10-12
Original file line numberDiff line numberDiff line change
@@ -269,12 +269,15 @@ public void buildFastFilter() throws IOException {
269269
}
270270
}
271271

272-
public void buildFastFilter(LeafReaderContext leaf) throws IOException {
273-
assert filters == null : "Filters should only be built once, but they are already built";
274-
this.filters = this.aggregationType.buildFastFilter(leaf, context);
272+
/**
273+
* Built filters for a segment
274+
*/
275+
public Weight[] buildFastFilter(LeafReaderContext leaf) throws IOException {
276+
Weight[] filters = this.aggregationType.buildFastFilter(leaf, context);
275277
if (filters != null) {
276278
logger.debug("Fast filter built for shard {} segment {}", context.indexShard().shardId(), leaf.ord);
277279
}
280+
return filters;
278281
}
279282
}
280283

@@ -340,7 +343,6 @@ public Weight[] buildFastFilter(LeafReaderContext leaf, SearchContext context) t
340343

341344
private Weight[] buildFastFilter(SearchContext context, long[] bounds) throws IOException {
342345
bounds = processHardBounds(bounds);
343-
logger.debug("Bounds are {} for shard {} with hard bound", bounds, context.indexShard().shardId());
344346
if (bounds == null) {
345347
return null;
346348
}
@@ -447,8 +449,7 @@ public static boolean tryFastFilterAggregation(
447449
fastFilterContext.context.indexShard().shardId(),
448450
ctx.ord
449451
);
450-
fastFilterContext.buildFastFilter(ctx);
451-
filters = fastFilterContext.filters;
452+
filters = fastFilterContext.buildFastFilter(ctx);
452453
if (filters == null) {
453454
return false;
454455
}
@@ -480,20 +481,17 @@ public static boolean tryFastFilterAggregation(
480481
incrementDocCount.accept(bucketKey, counts[i]);
481482
s++;
482483
if (s > size) {
483-
logger.debug("Fast filter optimization applied to composite aggregation with size {}", size);
484-
return true;
484+
break;
485485
}
486486
}
487487
}
488488

489-
logger.debug("Fast filter optimization applied");
489+
logger.debug("Fast filter optimization applied to shard {} segment {}", fastFilterContext.context.indexShard().shardId(), ctx.ord);
490490
return true;
491491
}
492492

493493
private static boolean segmentMatchAll(SearchContext ctx, LeafReaderContext leafCtx) throws IOException {
494494
Weight weight = ctx.searcher().createWeight(ctx.query(), ScoreMode.COMPLETE_NO_SCORES, 1f);
495-
assert weight != null;
496-
int count = weight.count(leafCtx);
497-
return count > 0 && count == leafCtx.reader().numDocs();
495+
return weight != null && weight.count(leafCtx) == leafCtx.reader().numDocs();
498496
}
499497
}

0 commit comments

Comments
 (0)