Skip to content

Commit 122a78a

Browse files
Issue #6473 - canonicalPath refactor & fix alias check in PathResource (#6474)
Reduce multiple canonicalPath calls with single alias check in PathResource Revert to decoding and the normalizing URLs so that subsequent canonicalPath calls are noops. Co-authored-by: Lachlan Roberts <[email protected]>
1 parent a02ade7 commit 122a78a

File tree

29 files changed

+602
-382
lines changed

29 files changed

+602
-382
lines changed

jetty-http/src/main/java/org/eclipse/jetty/http/HttpCompliance.java

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
package org.eclipse.jetty.http;
2020

21+
import java.util.EnumMap;
2122
import java.util.EnumSet;
2223
import java.util.HashMap;
2324
import java.util.Map;
@@ -214,4 +215,46 @@ public EnumSet<HttpComplianceSection> sections()
214215
{
215216
return _sections;
216217
}
218+
219+
private static final EnumMap<HttpURI.Violation, HttpComplianceSection> __uriViolations = new EnumMap<>(HttpURI.Violation.class);
220+
static
221+
{
222+
// create a map from Violation to compliance in a loop, so that any new violations added are detected with ISE
223+
for (HttpURI.Violation violation : HttpURI.Violation.values())
224+
{
225+
switch (violation)
226+
{
227+
case SEPARATOR:
228+
__uriViolations.put(violation, HttpComplianceSection.NO_AMBIGUOUS_PATH_SEPARATORS);
229+
break;
230+
case SEGMENT:
231+
__uriViolations.put(violation, HttpComplianceSection.NO_AMBIGUOUS_PATH_SEGMENTS);
232+
break;
233+
case PARAM:
234+
__uriViolations.put(violation, HttpComplianceSection.NO_AMBIGUOUS_PATH_PARAMETERS);
235+
break;
236+
case ENCODING:
237+
__uriViolations.put(violation, HttpComplianceSection.NO_AMBIGUOUS_PATH_ENCODING);
238+
break;
239+
case EMPTY:
240+
__uriViolations.put(violation, HttpComplianceSection.NO_AMBIGUOUS_EMPTY_SEGMENT);
241+
break;
242+
case UTF16:
243+
__uriViolations.put(violation, HttpComplianceSection.NO_UTF16_ENCODINGS);
244+
break;
245+
default:
246+
throw new IllegalStateException();
247+
}
248+
}
249+
}
250+
251+
public static String checkUriCompliance(HttpCompliance compliance, HttpURI uri)
252+
{
253+
for (HttpURI.Violation violation : HttpURI.Violation.values())
254+
{
255+
if (uri.hasViolation(violation) && (compliance == null || compliance.sections().contains(__uriViolations.get(violation))))
256+
return violation.getMessage();
257+
}
258+
return null;
259+
}
217260
}

jetty-http/src/main/java/org/eclipse/jetty/http/HttpURI.java

Lines changed: 105 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -36,22 +36,45 @@
3636
/**
3737
* Http URI.
3838
* Parse an HTTP URI from a string or byte array. Given a URI
39-
* <code>http://user@host:port/path/info;param?query#fragment</code>
40-
* this class will split it into the following undecoded optional elements:<ul>
39+
* <code>http://user@host:port/path;param1/%2e/info;param2?query#fragment</code>
40+
* this class will split it into the following optional elements:<ul>
4141
* <li>{@link #getScheme()} - http:</li>
4242
* <li>{@link #getAuthority()} - //name@host:port</li>
4343
* <li>{@link #getHost()} - host</li>
4444
* <li>{@link #getPort()} - port</li>
45-
* <li>{@link #getPath()} - /path/info</li>
46-
* <li>{@link #getParam()} - param</li>
45+
* <li>{@link #getPath()} - /path;param1/%2e/info;param2</li>
46+
* <li>{@link #getDecodedPath()} - /path/info</li>
47+
* <li>{@link #getParam()} - param2</li>
4748
* <li>{@link #getQuery()} - query</li>
4849
* <li>{@link #getFragment()} - fragment</li>
4950
* </ul>
5051
*
51-
* <p>Any parameters will be returned from {@link #getPath()}, but are excluded from the
52-
* return value of {@link #getDecodedPath()}. If there are multiple parameters, the
53-
* {@link #getParam()} method returns only the last one.
54-
*/
52+
* <p>The path part of the URI is provided in both raw form ({@link #getPath()}) and
53+
* decoded form ({@link #getDecodedPath}), which has: path parameters removed,
54+
* percent encoded characters expanded and relative segments resolved. This approach
55+
* is somewhat contrary to <a href="https://tools.ietf.org/html/rfc3986#section-3.3">RFC3986</a>
56+
* which no longer defines path parameters (removed after
57+
* <a href="https://tools.ietf.org/html/rfc2396#section-3.3">RFC2396</a>) and specifies
58+
* that relative segment normalization should take place before percent encoded character
59+
* expansion. A literal interpretation of the RFC can result in URI paths with ambiguities
60+
* when viewed as strings. For example, a URI of {@code /foo%2f..%2fbar} is technically a single
61+
* segment of "/foo/../bar", but could easily be misinterpreted as 3 segments resolving to "/bar"
62+
* by a file system.
63+
* </p>
64+
* <p>
65+
* Thus this class avoid and/or detects such ambiguities. Furthermore, by decoding characters and
66+
* removing parameters before relative path normalization, ambiguous paths will be resolved in such
67+
* a way to be non-standard-but-non-ambiguous to down stream interpretation of the decoded path string.
68+
* The violations are recorded and available by API such as {@link #hasViolation(Violation)} so that requests
69+
* containing them may be rejected in case the non-standard-but-non-ambiguous interpretations
70+
* are not satisfactory for a given compliance configuration. Implementations that wish to
71+
* process ambiguous URI paths must configure the compliance modes to accept them and then perform
72+
* their own decoding of {@link #getPath()}.
73+
* </p>
74+
* <p>
75+
* If there are multiple path parameters, only the last one is returned by {@link #getParam()}.
76+
* </p>
77+
**/
5578
public class HttpURI
5679
{
5780
private enum State
@@ -69,28 +92,49 @@ private enum State
6992
ASTERISK
7093
}
7194

72-
enum Violation
95+
/**
96+
* Violations of safe URI interpretations
97+
*/
98+
public enum Violation
7399
{
74-
SEGMENT,
75-
SEPARATOR,
76-
PARAM,
77-
ENCODING,
78-
EMPTY,
79-
UTF16
100+
/**
101+
* Ambiguous path segments e.g. <code>/foo/%2E%2E/bar</code>
102+
*/
103+
SEGMENT("Ambiguous path segments"),
104+
/**
105+
* Ambiguous path separator within a URI segment e.g. <code>/foo%2Fbar</code>
106+
*/
107+
SEPARATOR("Ambiguous path separator"),
108+
/**
109+
* Ambiguous path parameters within a URI segment e.g. <code>/foo/..;/bar</code>
110+
*/
111+
PARAM("Ambiguous path parameters"),
112+
/**
113+
* Ambiguous double encoding within a URI segment e.g. <code>/%2557EB-INF</code>
114+
*/
115+
ENCODING("Ambiguous double encoding"),
116+
/**
117+
* Ambiguous empty segments e.g. <code>/foo//bar</code>
118+
*/
119+
EMPTY("Ambiguous empty segments"),
120+
/**
121+
* Non standard UTF-16 encoding eg <code>/foo%u2192bar</code>.
122+
*/
123+
UTF16("Non standard UTF-16 encoding");
124+
125+
private final String _message;
126+
127+
Violation(String message)
128+
{
129+
_message = message;
130+
}
131+
132+
String getMessage()
133+
{
134+
return _message;
135+
}
80136
}
81137

82-
/**
83-
* The concept of URI path parameters was originally specified in
84-
* <a href="https://tools.ietf.org/html/rfc2396#section-3.3">RFC2396</a>, but that was
85-
* obsoleted by
86-
* <a href="https://tools.ietf.org/html/rfc3986#section-3.3">RFC3986</a> which removed
87-
* a normative definition of path parameters. Specifically it excluded them from the
88-
* <a href="https://tools.ietf.org/html/rfc3986#section-5.2.4">Remove Dot Segments</a>
89-
* algorithm. This results in some ambiguity as dot segments can result from later
90-
* parameter removal or % encoding expansion, that are not removed from the URI
91-
* by {@link URIUtil#canonicalPath(String)}. Thus this class flags such ambiguous
92-
* path segments, so that they may be rejected by the server if so configured.
93-
*/
94138
private static final Trie<Boolean> __ambiguousSegments = new ArrayTrie<>();
95139

96140
static
@@ -179,6 +223,22 @@ public HttpURI(HttpURI uri)
179223
_emptySegment = false;
180224
}
181225

226+
public HttpURI(HttpURI schemeHostPort, HttpURI uri)
227+
{
228+
_scheme = schemeHostPort._scheme;
229+
_user = schemeHostPort._user;
230+
_host = schemeHostPort._host;
231+
_port = schemeHostPort._port;
232+
_path = uri._path;
233+
_param = uri._param;
234+
_query = uri._query;
235+
_fragment = uri._fragment;
236+
_uri = uri._uri;
237+
_decodedPath = uri._decodedPath;
238+
_violations.addAll(uri._violations);
239+
_emptySegment = false;
240+
}
241+
182242
public HttpURI(String uri)
183243
{
184244
_port = -1;
@@ -506,6 +566,8 @@ else if (c == '/')
506566
{
507567
switch (encodedValue)
508568
{
569+
case 0:
570+
throw new IllegalArgumentException("Illegal character in path");
509571
case '/':
510572
_violations.add(Violation.SEPARATOR);
511573
break;
@@ -677,10 +739,12 @@ else if (c == '/')
677739
}
678740
else if (_path != null)
679741
{
680-
String canonical = URIUtil.canonicalPath(_path);
681-
if (canonical == null)
682-
throw new BadMessageException("Bad URI");
683-
_decodedPath = URIUtil.decodePath(canonical);
742+
// The RFC requires this to be canonical before decoding, but this can leave dot segments and dot dot segments
743+
// which are not canonicalized and could be used in an attempt to bypass security checks.
744+
String decodeNonCanonical = URIUtil.decodePath(_path);
745+
_decodedPath = URIUtil.canonicalPath(decodeNonCanonical);
746+
if (_decodedPath == null)
747+
throw new IllegalArgumentException("Bad URI");
684748
}
685749
}
686750

@@ -794,6 +858,11 @@ public boolean hasViolations()
794858
return !_violations.isEmpty();
795859
}
796860

861+
public boolean hasViolation(Violation violation)
862+
{
863+
return _violations.contains(violation);
864+
}
865+
797866
/**
798867
* @return True if the URI encodes UTF-16 characters with '%u'.
799868
*/
@@ -839,6 +908,11 @@ public String getDecodedPath()
839908
return _decodedPath;
840909
}
841910

911+
/**
912+
* Get a URI path parameter. Multiple and in segment parameters are ignored and only
913+
* the last trailing parameter is returned.
914+
* @return The last path parameter or null
915+
*/
842916
public String getParam()
843917
{
844918
return _param;

0 commit comments

Comments
 (0)