8
8
9
9
package org .elasticsearch .grok ;
10
10
11
+ import java .util .ArrayDeque ;
11
12
import java .util .ArrayList ;
12
13
import java .util .Collections ;
14
+ import java .util .Deque ;
15
+ import java .util .HashSet ;
13
16
import java .util .LinkedHashMap ;
17
+ import java .util .LinkedHashSet ;
14
18
import java .util .List ;
15
19
import java .util .Map ;
16
20
import java .util .Objects ;
21
+ import java .util .Set ;
17
22
18
23
public class PatternBank {
19
24
@@ -57,52 +62,102 @@ public PatternBank extendWith(Map<String, String> extraPatterns) {
57
62
}
58
63
59
64
/**
60
- * Checks whether patterns reference each other in a circular manner and if so fail with an exception.
65
+ * Checks whether patterns reference each other in a circular manner and if so fail with an IllegalArgumentException. It will also
66
+ * fail if any pattern value contains a pattern name that does not exist in the bank.
61
67
* <p>
62
68
* In a pattern, anything between <code>%{</code> and <code>}</code> or <code>:</code> is considered
63
69
* a reference to another named pattern. This method will navigate to all these named patterns and
64
70
* check for a circular reference.
65
71
*/
66
72
static void forbidCircularReferences (Map <String , String > bank ) {
67
- // first ensure that the pattern bank contains no simple circular references (i.e., any pattern
68
- // containing an immediate reference to itself) as those can cause the remainder of this algorithm
69
- // to recurse infinitely
70
- for (Map .Entry <String , String > entry : bank .entrySet ()) {
71
- if (patternReferencesItself (entry .getValue (), entry .getKey ())) {
72
- throw new IllegalArgumentException ("circular reference in pattern [" + entry .getKey () + "][" + entry .getValue () + "]" );
73
+ Set <String > allVisitedNodes = new HashSet <>();
74
+ Set <String > nodesVisitedMoreThanOnceInAPath = new HashSet <>();
75
+ // Walk the full path starting at each node in the graph:
76
+ for (String traversalStartNode : bank .keySet ()) {
77
+ if (nodesVisitedMoreThanOnceInAPath .contains (traversalStartNode ) == false && allVisitedNodes .contains (traversalStartNode )) {
78
+ // If we have seen this node before in a path, and it only appeared once in that path, there is no need to check it again
79
+ continue ;
73
80
}
74
- }
75
-
76
- // next, recursively check any other pattern names referenced in each pattern
77
- for (Map .Entry <String , String > entry : bank .entrySet ()) {
78
- String name = entry .getKey ();
79
- String pattern = entry .getValue ();
80
- innerForbidCircularReferences (bank , name , new ArrayList <>(), pattern );
81
+ Set <String > visitedFromThisStartNode = new LinkedHashSet <>();
82
+ /*
83
+ * This stack records where we are in the graph. Each String[] in the stack represents a collection of neighbors to the first
84
+ * non-null node in the layer below it. Null means that the path from that location has been fully traversed. Once all nodes
85
+ * at a layer have been set to null, the layer is popped. So for example say we have the graph
86
+ * ( 1 -> (2 -> (4, 5, 8), 3 -> (6, 7))) then when we are at 6 via 1 -> 3 -> 6, the stack looks like this:
87
+ * [6, 7]
88
+ * [null, 3]
89
+ * [1]
90
+ */
91
+ Deque <String []> stack = new ArrayDeque <>();
92
+ stack .push (new String [] { traversalStartNode });
93
+ // This is used so that we know that we're unwinding the stack and know not to get the current node's neighbors again.
94
+ boolean unwinding = false ;
95
+ while (stack .isEmpty () == false ) {
96
+ String [] currentLevel = stack .peek ();
97
+ int firstNonNullIndex = findFirstNonNull (currentLevel );
98
+ String node = currentLevel [firstNonNullIndex ];
99
+ boolean endOfThisPath = false ;
100
+ if (unwinding ) {
101
+ // We have completed all of this node's neighbors and have popped back to the node
102
+ endOfThisPath = true ;
103
+ } else if (traversalStartNode .equals (node ) && stack .size () > 1 ) {
104
+ Deque <String > reversedPath = new ArrayDeque <>();
105
+ for (String [] level : stack ) {
106
+ reversedPath .push (level [findFirstNonNull (level )]);
107
+ }
108
+ throw new IllegalArgumentException ("circular reference detected: " + String .join ("->" , reversedPath ));
109
+ } else if (visitedFromThisStartNode .contains (node )) {
110
+ /*
111
+ * We are only looking for a cycle starting and ending at traversalStartNode right now. But this node has been
112
+ * visited more than once in the path rooted at traversalStartNode. This could be because it is a cycle, or could be
113
+ * because two nodes in the path both point to it. We add it to nodesVisitedMoreThanOnceInAPath so that we make sure
114
+ * to check the path rooted at this node later.
115
+ */
116
+ nodesVisitedMoreThanOnceInAPath .add (node );
117
+ endOfThisPath = true ;
118
+ } else {
119
+ visitedFromThisStartNode .add (node );
120
+ String [] neighbors = getPatternNamesForPattern (bank , node );
121
+ if (neighbors .length == 0 ) {
122
+ endOfThisPath = true ;
123
+ } else {
124
+ stack .push (neighbors );
125
+ }
126
+ }
127
+ if (endOfThisPath ) {
128
+ if (firstNonNullIndex == currentLevel .length - 1 ) {
129
+ // We have handled all the neighbors at this level -- there are no more non-null ones
130
+ stack .pop ();
131
+ unwinding = true ;
132
+ } else {
133
+ currentLevel [firstNonNullIndex ] = null ;
134
+ unwinding = false ;
135
+ }
136
+ } else {
137
+ unwinding = false ;
138
+ }
139
+ }
140
+ allVisitedNodes .addAll (visitedFromThisStartNode );
81
141
}
82
142
}
83
143
84
- private static void innerForbidCircularReferences (Map <String , String > bank , String patternName , List <String > path , String pattern ) {
85
- if (patternReferencesItself (pattern , patternName )) {
86
- String message ;
87
- if (path .isEmpty ()) {
88
- message = "circular reference in pattern [" + patternName + "][" + pattern + "]" ;
89
- } else {
90
- message = "circular reference in pattern ["
91
- + path .remove (path .size () - 1 )
92
- + "]["
93
- + pattern
94
- + "] back to pattern ["
95
- + patternName
96
- + "]" ;
97
- // add rest of the path:
98
- if (path .isEmpty () == false ) {
99
- message += " via patterns [" + String .join ("=>" , path ) + "]" ;
100
- }
144
+ private static int findFirstNonNull (String [] level ) {
145
+ for (int i = 0 ; i < level .length ; i ++) {
146
+ if (level [i ] != null ) {
147
+ return i ;
101
148
}
102
- throw new IllegalArgumentException (message );
103
149
}
150
+ return -1 ;
151
+ }
104
152
105
- // next check any other pattern names found in the pattern
153
+ /**
154
+ * This method returns the array of pattern names (if any) found in the bank for the pattern named patternName. If no pattern names
155
+ * are found, an empty array is returned. If any of the list of pattern names to be returned does not exist in the bank, an exception
156
+ * is thrown.
157
+ */
158
+ private static String [] getPatternNamesForPattern (Map <String , String > bank , String patternName ) {
159
+ String pattern = bank .get (patternName );
160
+ List <String > patternReferences = new ArrayList <>();
106
161
for (int i = pattern .indexOf ("%{" ); i != -1 ; i = pattern .indexOf ("%{" , i + 1 )) {
107
162
int begin = i + 2 ;
108
163
int bracketIndex = pattern .indexOf ('}' , begin );
@@ -112,25 +167,22 @@ private static void innerForbidCircularReferences(Map<String, String> bank, Stri
112
167
end = bracketIndex ;
113
168
} else if (columnIndex != -1 && bracketIndex == -1 ) {
114
169
end = columnIndex ;
115
- } else if (bracketIndex != -1 && columnIndex != - 1 ) {
170
+ } else if (bracketIndex != -1 ) {
116
171
end = Math .min (bracketIndex , columnIndex );
117
172
} else {
118
173
throw new IllegalArgumentException ("pattern [" + pattern + "] has an invalid syntax" );
119
174
}
120
175
String otherPatternName = pattern .substring (begin , end );
121
- path .add (otherPatternName );
122
- String otherPattern = bank .get (otherPatternName );
123
- if (otherPattern == null ) {
124
- throw new IllegalArgumentException (
125
- "pattern [" + patternName + "] is referencing a non-existent pattern [" + otherPatternName + "]"
126
- );
176
+ if (patternReferences .contains (otherPatternName ) == false ) {
177
+ patternReferences .add (otherPatternName );
178
+ String otherPattern = bank .get (otherPatternName );
179
+ if (otherPattern == null ) {
180
+ throw new IllegalArgumentException (
181
+ "pattern [" + patternName + "] is referencing a non-existent pattern [" + otherPatternName + "]"
182
+ );
183
+ }
127
184
}
128
-
129
- innerForbidCircularReferences (bank , patternName , path , otherPattern );
130
185
}
131
- }
132
-
133
- private static boolean patternReferencesItself (String pattern , String patternName ) {
134
- return pattern .contains ("%{" + patternName + "}" ) || pattern .contains ("%{" + patternName + ":" );
186
+ return patternReferences .toArray (new String [0 ]);
135
187
}
136
188
}
0 commit comments