Skip to content

Commit 4e5c680

Browse files
authored
Improve performance of grok pattern cycle detection (#111947) (#112219)
1 parent eed3754 commit 4e5c680

File tree

3 files changed

+267
-61
lines changed

3 files changed

+267
-61
lines changed

docs/changelog/111947.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 111947
2+
summary: Improve performance of grok pattern cycle detection
3+
area: Ingest Node
4+
type: bug
5+
issues: []

libs/grok/src/main/java/org/elasticsearch/grok/PatternBank.java

Lines changed: 98 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,17 @@
88

99
package org.elasticsearch.grok;
1010

11+
import java.util.ArrayDeque;
1112
import java.util.ArrayList;
1213
import java.util.Collections;
14+
import java.util.Deque;
15+
import java.util.HashSet;
1316
import java.util.LinkedHashMap;
17+
import java.util.LinkedHashSet;
1418
import java.util.List;
1519
import java.util.Map;
1620
import java.util.Objects;
21+
import java.util.Set;
1722

1823
public class PatternBank {
1924

@@ -57,52 +62,102 @@ public PatternBank extendWith(Map<String, String> extraPatterns) {
5762
}
5863

5964
/**
60-
* Checks whether patterns reference each other in a circular manner and if so fail with an exception.
65+
* Checks whether patterns reference each other in a circular manner and if so fail with an IllegalArgumentException. It will also
66+
* fail if any pattern value contains a pattern name that does not exist in the bank.
6167
* <p>
6268
* In a pattern, anything between <code>%{</code> and <code>}</code> or <code>:</code> is considered
6369
* a reference to another named pattern. This method will navigate to all these named patterns and
6470
* check for a circular reference.
6571
*/
6672
static void forbidCircularReferences(Map<String, String> bank) {
67-
// first ensure that the pattern bank contains no simple circular references (i.e., any pattern
68-
// containing an immediate reference to itself) as those can cause the remainder of this algorithm
69-
// to recurse infinitely
70-
for (Map.Entry<String, String> entry : bank.entrySet()) {
71-
if (patternReferencesItself(entry.getValue(), entry.getKey())) {
72-
throw new IllegalArgumentException("circular reference in pattern [" + entry.getKey() + "][" + entry.getValue() + "]");
73+
Set<String> allVisitedNodes = new HashSet<>();
74+
Set<String> nodesVisitedMoreThanOnceInAPath = new HashSet<>();
75+
// Walk the full path starting at each node in the graph:
76+
for (String traversalStartNode : bank.keySet()) {
77+
if (nodesVisitedMoreThanOnceInAPath.contains(traversalStartNode) == false && allVisitedNodes.contains(traversalStartNode)) {
78+
// If we have seen this node before in a path, and it only appeared once in that path, there is no need to check it again
79+
continue;
7380
}
74-
}
75-
76-
// next, recursively check any other pattern names referenced in each pattern
77-
for (Map.Entry<String, String> entry : bank.entrySet()) {
78-
String name = entry.getKey();
79-
String pattern = entry.getValue();
80-
innerForbidCircularReferences(bank, name, new ArrayList<>(), pattern);
81+
Set<String> visitedFromThisStartNode = new LinkedHashSet<>();
82+
/*
83+
* This stack records where we are in the graph. Each String[] in the stack represents a collection of neighbors to the first
84+
* non-null node in the layer below it. Null means that the path from that location has been fully traversed. Once all nodes
85+
* at a layer have been set to null, the layer is popped. So for example say we have the graph
86+
* ( 1 -> (2 -> (4, 5, 8), 3 -> (6, 7))) then when we are at 6 via 1 -> 3 -> 6, the stack looks like this:
87+
* [6, 7]
88+
* [null, 3]
89+
* [1]
90+
*/
91+
Deque<String[]> stack = new ArrayDeque<>();
92+
stack.push(new String[] { traversalStartNode });
93+
// This is used so that we know that we're unwinding the stack and know not to get the current node's neighbors again.
94+
boolean unwinding = false;
95+
while (stack.isEmpty() == false) {
96+
String[] currentLevel = stack.peek();
97+
int firstNonNullIndex = findFirstNonNull(currentLevel);
98+
String node = currentLevel[firstNonNullIndex];
99+
boolean endOfThisPath = false;
100+
if (unwinding) {
101+
// We have completed all of this node's neighbors and have popped back to the node
102+
endOfThisPath = true;
103+
} else if (traversalStartNode.equals(node) && stack.size() > 1) {
104+
Deque<String> reversedPath = new ArrayDeque<>();
105+
for (String[] level : stack) {
106+
reversedPath.push(level[findFirstNonNull(level)]);
107+
}
108+
throw new IllegalArgumentException("circular reference detected: " + String.join("->", reversedPath));
109+
} else if (visitedFromThisStartNode.contains(node)) {
110+
/*
111+
* We are only looking for a cycle starting and ending at traversalStartNode right now. But this node has been
112+
* visited more than once in the path rooted at traversalStartNode. This could be because it is a cycle, or could be
113+
* because two nodes in the path both point to it. We add it to nodesVisitedMoreThanOnceInAPath so that we make sure
114+
* to check the path rooted at this node later.
115+
*/
116+
nodesVisitedMoreThanOnceInAPath.add(node);
117+
endOfThisPath = true;
118+
} else {
119+
visitedFromThisStartNode.add(node);
120+
String[] neighbors = getPatternNamesForPattern(bank, node);
121+
if (neighbors.length == 0) {
122+
endOfThisPath = true;
123+
} else {
124+
stack.push(neighbors);
125+
}
126+
}
127+
if (endOfThisPath) {
128+
if (firstNonNullIndex == currentLevel.length - 1) {
129+
// We have handled all the neighbors at this level -- there are no more non-null ones
130+
stack.pop();
131+
unwinding = true;
132+
} else {
133+
currentLevel[firstNonNullIndex] = null;
134+
unwinding = false;
135+
}
136+
} else {
137+
unwinding = false;
138+
}
139+
}
140+
allVisitedNodes.addAll(visitedFromThisStartNode);
81141
}
82142
}
83143

84-
private static void innerForbidCircularReferences(Map<String, String> bank, String patternName, List<String> path, String pattern) {
85-
if (patternReferencesItself(pattern, patternName)) {
86-
String message;
87-
if (path.isEmpty()) {
88-
message = "circular reference in pattern [" + patternName + "][" + pattern + "]";
89-
} else {
90-
message = "circular reference in pattern ["
91-
+ path.remove(path.size() - 1)
92-
+ "]["
93-
+ pattern
94-
+ "] back to pattern ["
95-
+ patternName
96-
+ "]";
97-
// add rest of the path:
98-
if (path.isEmpty() == false) {
99-
message += " via patterns [" + String.join("=>", path) + "]";
100-
}
144+
private static int findFirstNonNull(String[] level) {
145+
for (int i = 0; i < level.length; i++) {
146+
if (level[i] != null) {
147+
return i;
101148
}
102-
throw new IllegalArgumentException(message);
103149
}
150+
return -1;
151+
}
104152

105-
// next check any other pattern names found in the pattern
153+
/**
154+
* This method returns the array of pattern names (if any) found in the bank for the pattern named patternName. If no pattern names
155+
* are found, an empty array is returned. If any of the list of pattern names to be returned does not exist in the bank, an exception
156+
* is thrown.
157+
*/
158+
private static String[] getPatternNamesForPattern(Map<String, String> bank, String patternName) {
159+
String pattern = bank.get(patternName);
160+
List<String> patternReferences = new ArrayList<>();
106161
for (int i = pattern.indexOf("%{"); i != -1; i = pattern.indexOf("%{", i + 1)) {
107162
int begin = i + 2;
108163
int bracketIndex = pattern.indexOf('}', begin);
@@ -112,25 +167,22 @@ private static void innerForbidCircularReferences(Map<String, String> bank, Stri
112167
end = bracketIndex;
113168
} else if (columnIndex != -1 && bracketIndex == -1) {
114169
end = columnIndex;
115-
} else if (bracketIndex != -1 && columnIndex != -1) {
170+
} else if (bracketIndex != -1) {
116171
end = Math.min(bracketIndex, columnIndex);
117172
} else {
118173
throw new IllegalArgumentException("pattern [" + pattern + "] has an invalid syntax");
119174
}
120175
String otherPatternName = pattern.substring(begin, end);
121-
path.add(otherPatternName);
122-
String otherPattern = bank.get(otherPatternName);
123-
if (otherPattern == null) {
124-
throw new IllegalArgumentException(
125-
"pattern [" + patternName + "] is referencing a non-existent pattern [" + otherPatternName + "]"
126-
);
176+
if (patternReferences.contains(otherPatternName) == false) {
177+
patternReferences.add(otherPatternName);
178+
String otherPattern = bank.get(otherPatternName);
179+
if (otherPattern == null) {
180+
throw new IllegalArgumentException(
181+
"pattern [" + patternName + "] is referencing a non-existent pattern [" + otherPatternName + "]"
182+
);
183+
}
127184
}
128-
129-
innerForbidCircularReferences(bank, patternName, path, otherPattern);
130185
}
131-
}
132-
133-
private static boolean patternReferencesItself(String pattern, String patternName) {
134-
return pattern.contains("%{" + patternName + "}") || pattern.contains("%{" + patternName + ":");
186+
return patternReferences.toArray(new String[0]);
135187
}
136188
}

0 commit comments

Comments
 (0)