diff options
author | Allan Sandfeld Jensen <[email protected]> | 2015-09-28 18:37:14 +0200 |
---|---|---|
committer | Allan Sandfeld Jensen <[email protected]> | 2015-09-29 07:47:06 +0000 |
commit | 0e8ff63a407fe323e215bb1a2c423c09a4747c8a (patch) | |
tree | e27e357e125d2d705bd504e1e3c8a3da1ed20f1d /chromium/url/url_canon_path.cc | |
parent | c3d0bb5bb15d008606b18b865841e19cd9bb5847 (diff) |
BASELINE: Update chromium to 45.0.2454.101upstream-45
Also adds web_cache component
Change-Id: I51238ceea8ee99854cc4989ae70a4fc2fc6bedcb
Reviewed-by: Kai Koehne <[email protected]>
Diffstat (limited to 'chromium/url/url_canon_path.cc')
-rw-r--r-- | chromium/url/url_canon_path.cc | 118 |
1 files changed, 100 insertions, 18 deletions
diff --git a/chromium/url/url_canon_path.cc b/chromium/url/url_canon_path.cc index ceff6896319..d155dfbf4dc 100644 --- a/chromium/url/url_canon_path.cc +++ b/chromium/url/url_canon_path.cc @@ -162,6 +162,76 @@ void BackUpToPreviousSlash(int path_begin_in_output, output->set_length(i + 1); } +// Looks for problematic nested escape sequences and escapes the output as +// needed to ensure they can't be misinterpreted. +// +// Our concern is that in input escape sequence that's invalid because it +// contains nested escape sequences might look valid once those are unescaped. +// For example, "%%300" is not a valid escape sequence, but after unescaping the +// inner "%30" this becomes "%00" which is valid. Leaving this in the output +// string can result in callers re-canonicalizing the string and unescaping this +// sequence, thus resulting in something fundamentally different than the +// original input here. This can cause a variety of problems. +// +// This function is called after we've just unescaped a sequence that's within +// two output characters of a previous '%' that we know didn't begin a valid +// escape sequence in the input string. We look for whether the output is going +// to turn into a valid escape sequence, and if so, convert the initial '%' into +// an escaped "%25" so the output can't be misinterpreted. +// +// |spec| is the input string we're canonicalizing. +// |next_input_index| is the index of the next unprocessed character in |spec|. +// |input_len| is the length of |spec|. +// |last_invalid_percent_index| is the index in |output| of a previously-seen +// '%' character. The caller knows this '%' character isn't followed by a valid +// escape sequence in the input string. +// |output| is the canonicalized output thus far. The caller guarantees this +// ends with a '%' followed by one or two characters, and the '%' is the one +// pointed to by |last_invalid_percent_index|. The last character in the string +// was just unescaped. +template<typename CHAR> +void CheckForNestedEscapes(const CHAR* spec, + int next_input_index, + int input_len, + int last_invalid_percent_index, + CanonOutput* output) { + const int length = output->length(); + const char last_unescaped_char = output->at(length - 1); + + // If |output| currently looks like "%c", we need to try appending the next + // input character to see if this will result in a problematic escape + // sequence. Note that this won't trigger on the first nested escape of a + // two-escape sequence like "%%30%30" -- we'll allow the conversion to + // "%0%30" -- but the second nested escape will be caught by this function + // when it's called again in that case. + const bool append_next_char = last_invalid_percent_index == length - 2; + if (append_next_char) { + // If the input doesn't contain a 7-bit character next, this case won't be a + // problem. + if ((next_input_index == input_len) || (spec[next_input_index] >= 0x80)) + return; + output->push_back(static_cast<char>(spec[next_input_index])); + } + + // Now output ends like "%cc". Try to unescape this. + int begin = last_invalid_percent_index; + unsigned char temp; + if (DecodeEscaped(output->data(), &begin, output->length(), &temp)) { + // New escape sequence found. Overwrite the characters following the '%' + // with "25", and push_back() the one or two characters that were following + // the '%' when we were called. + if (!append_next_char) + output->push_back(output->at(last_invalid_percent_index + 1)); + output->set(last_invalid_percent_index + 1, '2'); + output->set(last_invalid_percent_index + 2, '5'); + output->push_back(last_unescaped_char); + } else if (append_next_char) { + // Not a valid escape sequence, but we still need to undo appending the next + // source character so the caller can process it normally. + output->set_length(length); + } +} + // Appends the given path to the output. It assumes that if the input path // starts with a slash, it should be copied to the output. If no path has // already been appended to the output (the case when not resolving @@ -182,10 +252,15 @@ bool DoPartialPath(const CHAR* spec, CanonOutput* output) { int end = path.end(); + // We use this variable to minimize the amount of work done when unescaping -- + // we'll only call CheckForNestedEscapes() when this points at one of the last + // couple of characters in |output|. + int last_invalid_percent_index = INT_MIN; + bool success = true; for (int i = path.begin; i < end; i++) { UCHAR uch = static_cast<UCHAR>(spec[i]); - if (sizeof(CHAR) > sizeof(char) && uch >= 0x80) { + if (sizeof(CHAR) > 1 && uch >= 0x80) { // We only need to test wide input for having non-ASCII characters. For // narrow input, we'll always just use the lookup table. We don't try to // do anything tricky with decoding/validating UTF-8. This function will @@ -245,33 +320,40 @@ bool DoPartialPath(const CHAR* spec, unsigned char unescaped_value; if (DecodeEscaped(spec, &i, end, &unescaped_value)) { // Valid escape sequence, see if we keep, reject, or unescape it. + // Note that at this point DecodeEscape() will have advanced |i| to + // the last character of the escape sequence. char unescaped_flags = kPathCharLookup[unescaped_value]; if (unescaped_flags & UNESCAPE) { - // This escaped value shouldn't be escaped, copy it. + // This escaped value shouldn't be escaped. Try to copy it. output->push_back(unescaped_value); - } else if (unescaped_flags & INVALID_BIT) { - // Invalid escaped character, copy it and remember the error. - output->push_back('%'); - output->push_back(static_cast<char>(spec[i - 1])); - output->push_back(static_cast<char>(spec[i])); - success = false; + // If we just unescaped a value within 2 output characters of the + // '%' from a previously-detected invalid escape sequence, we + // might have an input string with problematic nested escape + // sequences; detect and fix them. + if (last_invalid_percent_index >= (output->length() - 3)) { + CheckForNestedEscapes(spec, i + 1, end, + last_invalid_percent_index, output); + } } else { - // Valid escaped character but we should keep it escaped. We - // don't want to change the case of any hex letters in case - // the server is sensitive to that, so we just copy the two - // characters without checking (DecodeEscape will have advanced - // to the last character of the pair). + // Either this is an invalid escaped character, or it's a valid + // escaped character we should keep escaped. In the first case we + // should just copy it exactly and remember the error. In the + // second we also copy exactly in case the server is sensitive to + // changing the case of any hex letters. output->push_back('%'); output->push_back(static_cast<char>(spec[i - 1])); output->push_back(static_cast<char>(spec[i])); + if (unescaped_flags & INVALID_BIT) + success = false; } } else { - // Invalid escape sequence. IE7 rejects any URLs with such - // sequences, while Firefox, IE6, and Safari all pass it through - // unchanged. We are more permissive unlike IE7. I don't think this - // can cause significant problems, if it does, we should change - // to be more like IE7. + // Invalid escape sequence. IE7+ rejects any URLs with such + // sequences, while other browsers pass them through unchanged. We + // use the permissive behavior. + // TODO(brettw): Consider testing IE's strict behavior, which would + // allow removing the code to handle nested escapes above. + last_invalid_percent_index = output->length(); output->push_back('%'); } |