Skip to content

Commit ba077c5

Browse files
saratvemulapalliakolarkunnu
authored andcommitted
Adding allowlist setting for ingest-useragent and ingest-geoip processors (opensearch-project#15325)
* Adding allowlist setting for user-agent, geo-ip and updated tests for ingest-common. Signed-off-by: Sarat Vemulapalli <[email protected]> * Remove duplicate test in ingest-common Signed-off-by: Sarat Vemulapalli <[email protected]> * Adding changelog Signed-off-by: Sarat Vemulapalli <[email protected]> --------- Signed-off-by: Sarat Vemulapalli <[email protected]>
1 parent 789fe74 commit ba077c5

File tree

5 files changed

+286
-3
lines changed

5 files changed

+286
-3
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
2222
- [Workload Management] QueryGroup resource tracking framework changes ([#13897](https://github.com/opensearch-project/OpenSearch/pull/13897))
2323
- Support filtering on a large list encoded by bitmap ([#14774](https://github.com/opensearch-project/OpenSearch/pull/14774))
2424
- Add slice execution listeners to SearchOperationListener interface ([#15153](https://github.com/opensearch-project/OpenSearch/pull/15153))
25+
- Add allowlist setting for ingest-geoip and ingest-useragent ([#15325](https://github.com/opensearch-project/OpenSearch/pull/15325))
2526
- Adding access to noSubMatches and noOverlappingMatches in Hyphenation ([#13895](https://github.com/opensearch-project/OpenSearch/pull/13895))
2627
- Add support for index level max slice count setting for concurrent segment search ([#15336](https://github.com/opensearch-project/OpenSearch/pull/15336))
2728

modules/ingest-geoip/src/main/java/org/opensearch/ingest/geoip/IngestGeoIpModulePlugin.java

+38-2
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
import org.opensearch.common.cache.CacheBuilder;
4545
import org.opensearch.common.io.PathUtils;
4646
import org.opensearch.common.settings.Setting;
47+
import org.opensearch.common.settings.Settings;
4748
import org.opensearch.common.util.io.IOUtils;
4849
import org.opensearch.ingest.Processor;
4950
import org.opensearch.plugins.IngestPlugin;
@@ -62,10 +63,18 @@
6263
import java.util.List;
6364
import java.util.Map;
6465
import java.util.Objects;
66+
import java.util.Set;
6567
import java.util.function.Function;
68+
import java.util.stream.Collectors;
6669
import java.util.stream.Stream;
6770

6871
public class IngestGeoIpModulePlugin extends Plugin implements IngestPlugin, Closeable {
72+
static final Setting<List<String>> PROCESSORS_ALLOWLIST_SETTING = Setting.listSetting(
73+
"ingest.geoip.processors.allowed",
74+
List.of(),
75+
Function.identity(),
76+
Setting.Property.NodeScope
77+
);
6978
public static final Setting<Long> CACHE_SIZE = Setting.longSetting("ingest.geoip.cache_size", 1000, 0, Setting.Property.NodeScope);
7079

7180
static String[] DEFAULT_DATABASE_FILENAMES = new String[] { "GeoLite2-ASN.mmdb", "GeoLite2-City.mmdb", "GeoLite2-Country.mmdb" };
@@ -74,7 +83,7 @@ public class IngestGeoIpModulePlugin extends Plugin implements IngestPlugin, Clo
7483

7584
@Override
7685
public List<Setting<?>> getSettings() {
77-
return Arrays.asList(CACHE_SIZE);
86+
return Arrays.asList(CACHE_SIZE, PROCESSORS_ALLOWLIST_SETTING);
7887
}
7988

8089
@Override
@@ -90,7 +99,10 @@ public Map<String, Processor.Factory> getProcessors(Processor.Parameters paramet
9099
} catch (IOException e) {
91100
throw new RuntimeException(e);
92101
}
93-
return Collections.singletonMap(GeoIpProcessor.TYPE, new GeoIpProcessor.Factory(databaseReaders, new GeoIpCache(cacheSize)));
102+
return filterForAllowlistSetting(
103+
parameters.env.settings(),
104+
Map.of(GeoIpProcessor.TYPE, new GeoIpProcessor.Factory(databaseReaders, new GeoIpCache(cacheSize)))
105+
);
94106
}
95107

96108
/*
@@ -175,6 +187,30 @@ public void close() throws IOException {
175187
}
176188
}
177189

190+
private Map<String, Processor.Factory> filterForAllowlistSetting(Settings settings, Map<String, Processor.Factory> map) {
191+
if (PROCESSORS_ALLOWLIST_SETTING.exists(settings) == false) {
192+
return Map.copyOf(map);
193+
}
194+
final Set<String> allowlist = Set.copyOf(PROCESSORS_ALLOWLIST_SETTING.get(settings));
195+
// Assert that no unknown processors are defined in the allowlist
196+
final Set<String> unknownAllowlistProcessors = allowlist.stream()
197+
.filter(p -> map.containsKey(p) == false)
198+
.collect(Collectors.toUnmodifiableSet());
199+
if (unknownAllowlistProcessors.isEmpty() == false) {
200+
throw new IllegalArgumentException(
201+
"Processor(s) "
202+
+ unknownAllowlistProcessors
203+
+ " were defined in ["
204+
+ PROCESSORS_ALLOWLIST_SETTING.getKey()
205+
+ "] but do not exist"
206+
);
207+
}
208+
return map.entrySet()
209+
.stream()
210+
.filter(e -> allowlist.contains(e.getKey()))
211+
.collect(Collectors.toUnmodifiableMap(Map.Entry::getKey, Map.Entry::getValue));
212+
}
213+
178214
/**
179215
* The in-memory cache for the geoip data. There should only be 1 instance of this class..
180216
* This cache differs from the maxmind's {@link NodeCache} such that this cache stores the deserialized Json objects to avoid the

modules/ingest-geoip/src/test/java/org/opensearch/ingest/geoip/IngestGeoIpModulePluginTests.java

+95
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,20 @@
3535
import com.maxmind.geoip2.model.AbstractResponse;
3636

3737
import org.opensearch.common.network.InetAddresses;
38+
import org.opensearch.common.settings.Settings;
39+
import org.opensearch.env.TestEnvironment;
40+
import org.opensearch.ingest.Processor;
3841
import org.opensearch.ingest.geoip.IngestGeoIpModulePlugin.GeoIpCache;
3942
import org.opensearch.test.OpenSearchTestCase;
43+
import org.opensearch.test.StreamsUtils;
44+
45+
import java.io.ByteArrayInputStream;
46+
import java.io.IOException;
47+
import java.io.UncheckedIOException;
48+
import java.nio.file.Files;
49+
import java.nio.file.Path;
50+
import java.util.List;
51+
import java.util.Set;
4052

4153
import static org.mockito.Mockito.mock;
4254

@@ -77,4 +89,87 @@ public void testInvalidInit() {
7789
IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> new GeoIpCache(-1));
7890
assertEquals("geoip max cache size must be 0 or greater", ex.getMessage());
7991
}
92+
93+
public void testAllowList() throws IOException {
94+
runAllowListTest(List.of());
95+
runAllowListTest(List.of("geoip"));
96+
}
97+
98+
public void testInvalidAllowList() throws IOException {
99+
List<String> invalidAllowList = List.of("set");
100+
Settings.Builder settingsBuilder = Settings.builder()
101+
.putList(IngestGeoIpModulePlugin.PROCESSORS_ALLOWLIST_SETTING.getKey(), invalidAllowList);
102+
createDb(settingsBuilder);
103+
try (IngestGeoIpModulePlugin plugin = new IngestGeoIpModulePlugin()) {
104+
IllegalArgumentException e = expectThrows(
105+
IllegalArgumentException.class,
106+
() -> plugin.getProcessors(createParameters(settingsBuilder.build()))
107+
);
108+
assertEquals(
109+
"Processor(s) "
110+
+ invalidAllowList
111+
+ " were defined in ["
112+
+ IngestGeoIpModulePlugin.PROCESSORS_ALLOWLIST_SETTING.getKey()
113+
+ "] but do not exist",
114+
e.getMessage()
115+
);
116+
}
117+
}
118+
119+
public void testAllowListNotSpecified() throws IOException {
120+
Settings.Builder settingsBuilder = Settings.builder();
121+
settingsBuilder.remove(IngestGeoIpModulePlugin.PROCESSORS_ALLOWLIST_SETTING.getKey());
122+
createDb(settingsBuilder);
123+
try (IngestGeoIpModulePlugin plugin = new IngestGeoIpModulePlugin()) {
124+
final Set<String> expected = Set.of("geoip");
125+
assertEquals(expected, plugin.getProcessors(createParameters(settingsBuilder.build())).keySet());
126+
}
127+
}
128+
129+
private void runAllowListTest(List<String> allowList) throws IOException {
130+
Settings.Builder settingsBuilder = Settings.builder();
131+
createDb(settingsBuilder);
132+
final Settings settings = settingsBuilder.putList(IngestGeoIpModulePlugin.PROCESSORS_ALLOWLIST_SETTING.getKey(), allowList).build();
133+
try (IngestGeoIpModulePlugin plugin = new IngestGeoIpModulePlugin()) {
134+
assertEquals(Set.copyOf(allowList), plugin.getProcessors(createParameters(settings)).keySet());
135+
}
136+
}
137+
138+
private void createDb(Settings.Builder settingsBuilder) throws IOException {
139+
Path configDir = createTempDir();
140+
Path userAgentConfigDir = configDir.resolve("ingest-geoip");
141+
Files.createDirectories(userAgentConfigDir);
142+
settingsBuilder.put("ingest.geoip.database_path", configDir).put("path.home", configDir);
143+
try {
144+
Files.copy(
145+
new ByteArrayInputStream(StreamsUtils.copyToBytesFromClasspath("/GeoLite2-City.mmdb")),
146+
configDir.resolve("GeoLite2-City.mmdb")
147+
);
148+
Files.copy(
149+
new ByteArrayInputStream(StreamsUtils.copyToBytesFromClasspath("/GeoLite2-Country.mmdb")),
150+
configDir.resolve("GeoLite2-Country.mmdb")
151+
);
152+
Files.copy(
153+
new ByteArrayInputStream(StreamsUtils.copyToBytesFromClasspath("/GeoLite2-ASN.mmdb")),
154+
configDir.resolve("GeoLite2-ASN.mmdb")
155+
);
156+
} catch (IOException e) {
157+
throw new UncheckedIOException(e);
158+
}
159+
}
160+
161+
private static Processor.Parameters createParameters(Settings settings) {
162+
return new Processor.Parameters(
163+
TestEnvironment.newEnvironment(settings),
164+
null,
165+
null,
166+
null,
167+
() -> 0L,
168+
(a, b) -> null,
169+
null,
170+
null,
171+
$ -> {},
172+
null
173+
);
174+
}
80175
}

modules/ingest-user-agent/src/main/java/org/opensearch/ingest/useragent/IngestUserAgentModulePlugin.java

+38-1
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
package org.opensearch.ingest.useragent;
3434

3535
import org.opensearch.common.settings.Setting;
36+
import org.opensearch.common.settings.Settings;
3637
import org.opensearch.ingest.Processor;
3738
import org.opensearch.plugins.IngestPlugin;
3839
import org.opensearch.plugins.Plugin;
@@ -47,10 +48,19 @@
4748
import java.util.HashMap;
4849
import java.util.List;
4950
import java.util.Map;
51+
import java.util.Set;
52+
import java.util.function.Function;
53+
import java.util.stream.Collectors;
5054
import java.util.stream.Stream;
5155

5256
public class IngestUserAgentModulePlugin extends Plugin implements IngestPlugin {
5357

58+
static final Setting<List<String>> PROCESSORS_ALLOWLIST_SETTING = Setting.listSetting(
59+
"ingest.useragent.processors.allowed",
60+
List.of(),
61+
Function.identity(),
62+
Setting.Property.NodeScope
63+
);
5464
private final Setting<Long> CACHE_SIZE_SETTING = Setting.longSetting(
5565
"ingest.user_agent.cache_size",
5666
1000,
@@ -77,7 +87,34 @@ public Map<String, Processor.Factory> getProcessors(Processor.Parameters paramet
7787
} catch (IOException e) {
7888
throw new RuntimeException(e);
7989
}
80-
return Collections.singletonMap(UserAgentProcessor.TYPE, new UserAgentProcessor.Factory(userAgentParsers));
90+
return filterForAllowlistSetting(
91+
parameters.env.settings(),
92+
Collections.singletonMap(UserAgentProcessor.TYPE, new UserAgentProcessor.Factory(userAgentParsers))
93+
);
94+
}
95+
96+
private Map<String, Processor.Factory> filterForAllowlistSetting(Settings settings, Map<String, Processor.Factory> map) {
97+
if (PROCESSORS_ALLOWLIST_SETTING.exists(settings) == false) {
98+
return Map.copyOf(map);
99+
}
100+
final Set<String> allowlist = Set.copyOf(PROCESSORS_ALLOWLIST_SETTING.get(settings));
101+
// Assert that no unknown processors are defined in the allowlist
102+
final Set<String> unknownAllowlistProcessors = allowlist.stream()
103+
.filter(p -> map.containsKey(p) == false)
104+
.collect(Collectors.toUnmodifiableSet());
105+
if (unknownAllowlistProcessors.isEmpty() == false) {
106+
throw new IllegalArgumentException(
107+
"Processor(s) "
108+
+ unknownAllowlistProcessors
109+
+ " were defined in ["
110+
+ PROCESSORS_ALLOWLIST_SETTING.getKey()
111+
+ "] but do not exist"
112+
);
113+
}
114+
return map.entrySet()
115+
.stream()
116+
.filter(e -> allowlist.contains(e.getKey()))
117+
.collect(Collectors.toUnmodifiableMap(Map.Entry::getKey, Map.Entry::getValue));
81118
}
82119

83120
static Map<String, UserAgentParser> createUserAgentParsers(Path userAgentConfigDirectory, UserAgentCache cache) throws IOException {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.ingest.useragent;
10+
11+
import org.opensearch.common.settings.Settings;
12+
import org.opensearch.env.TestEnvironment;
13+
import org.opensearch.ingest.Processor;
14+
import org.opensearch.test.OpenSearchTestCase;
15+
import org.junit.Before;
16+
17+
import java.io.BufferedReader;
18+
import java.io.BufferedWriter;
19+
import java.io.IOException;
20+
import java.io.InputStreamReader;
21+
import java.nio.charset.StandardCharsets;
22+
import java.nio.file.Files;
23+
import java.nio.file.Path;
24+
import java.util.List;
25+
import java.util.Set;
26+
27+
public class IngestUserAgentModulePluginTests extends OpenSearchTestCase {
28+
private Settings.Builder settingsBuilder;
29+
30+
@Before
31+
public void setup() throws IOException {
32+
Path configDir = createTempDir();
33+
Path userAgentConfigDir = configDir.resolve("ingest-user-agent");
34+
Files.createDirectories(userAgentConfigDir);
35+
settingsBuilder = Settings.builder().put("ingest-user-agent", configDir).put("path.home", configDir);
36+
37+
// Copy file, leaving out the device parsers at the end
38+
String regexWithoutDevicesFilename = "regexes_without_devices.yml";
39+
try (
40+
BufferedReader reader = new BufferedReader(
41+
new InputStreamReader(UserAgentProcessor.class.getResourceAsStream("/regexes.yml"), StandardCharsets.UTF_8)
42+
);
43+
BufferedWriter writer = Files.newBufferedWriter(userAgentConfigDir.resolve(regexWithoutDevicesFilename));
44+
) {
45+
String line;
46+
while ((line = reader.readLine()) != null) {
47+
if (line.startsWith("device_parsers:")) {
48+
break;
49+
}
50+
51+
writer.write(line);
52+
writer.newLine();
53+
}
54+
}
55+
}
56+
57+
public void testAllowList() throws IOException {
58+
runAllowListTest(List.of());
59+
runAllowListTest(List.of("user_agent"));
60+
}
61+
62+
public void testInvalidAllowList() throws IOException {
63+
List<String> invalidAllowList = List.of("set");
64+
final Settings settings = settingsBuilder.putList(
65+
IngestUserAgentModulePlugin.PROCESSORS_ALLOWLIST_SETTING.getKey(),
66+
invalidAllowList
67+
).build();
68+
try (IngestUserAgentModulePlugin plugin = new IngestUserAgentModulePlugin()) {
69+
IllegalArgumentException e = expectThrows(
70+
IllegalArgumentException.class,
71+
() -> plugin.getProcessors(createParameters(settings))
72+
);
73+
assertEquals(
74+
"Processor(s) "
75+
+ invalidAllowList
76+
+ " were defined in ["
77+
+ IngestUserAgentModulePlugin.PROCESSORS_ALLOWLIST_SETTING.getKey()
78+
+ "] but do not exist",
79+
e.getMessage()
80+
);
81+
}
82+
}
83+
84+
public void testAllowListNotSpecified() throws IOException {
85+
settingsBuilder.remove(IngestUserAgentModulePlugin.PROCESSORS_ALLOWLIST_SETTING.getKey());
86+
try (IngestUserAgentModulePlugin plugin = new IngestUserAgentModulePlugin()) {
87+
final Set<String> expected = Set.of("user_agent");
88+
assertEquals(expected, plugin.getProcessors(createParameters(settingsBuilder.build())).keySet());
89+
}
90+
}
91+
92+
private void runAllowListTest(List<String> allowList) throws IOException {
93+
final Settings settings = settingsBuilder.putList(IngestUserAgentModulePlugin.PROCESSORS_ALLOWLIST_SETTING.getKey(), allowList)
94+
.build();
95+
try (IngestUserAgentModulePlugin plugin = new IngestUserAgentModulePlugin()) {
96+
assertEquals(Set.copyOf(allowList), plugin.getProcessors(createParameters(settings)).keySet());
97+
}
98+
}
99+
100+
private static Processor.Parameters createParameters(Settings settings) {
101+
return new Processor.Parameters(
102+
TestEnvironment.newEnvironment(settings),
103+
null,
104+
null,
105+
null,
106+
() -> 0L,
107+
(a, b) -> null,
108+
null,
109+
null,
110+
$ -> {},
111+
null
112+
);
113+
}
114+
}

0 commit comments

Comments
 (0)