diff --git a/twisted/web/http.py b/twisted/web/http.py index ee8f005..562b95c 100644 --- a/twisted/web/http.py +++ b/twisted/web/http.py @@ -311,14 +311,42 @@ def toChunk(data): """ return ("%x\r\n" % len(data), data, "\r\n") + +def _ishexdigits(b): + """ + Is the string case-insensitively hexidecimal? + It must be composed of one or more characters in the ranges a-f, A-F + and 0-9. + """ + for c in b: + if c not in b"0123456789abcdefABCDEF": + return False + return b != b"" + + +def _hexint(b): + """ + Decode a hexadecimal integer. + Unlike L{int(b, 16)}, this raises L{ValueError} when the integer has + a prefix like C{b'0x'}, C{b'+'}, or C{b'-'}, which is desirable when + parsing network protocols. + """ + if not _ishexdigits(b): + raise ValueError(b) + return int(b, 16) + + def fromChunk(data): """ Convert chunk to string. + Note that this function is not specification compliant: it doesn't handle + chunk extensions. + @returns: tuple (result, remaining), may raise ValueError. """ prefix, rest = data.split('\r\n', 1) - length = int(prefix, 16) + length = _hexint(prefix) if length < 0: raise ValueError("Chunk length must be >= 0, not %d" % (length,)) if not rest[length:length + 2] == '\r\n': @@ -1295,6 +1323,16 @@ class PotentialDataLoss(Exception): +class _MalformedChunkedDataError(Exception): + """ + C{_ChunkedTranferDecoder} raises L{_MalformedChunkedDataError} from its + C{dataReceived} method when it encounters malformed data. This exception + indicates a client-side error. If this exception is raised, the connection + should be dropped with a 400 error. + """ + + + class _IdentityTransferDecoder(object): """ Protocol for accumulating bytes up to a specified length. This handles the @@ -1369,6 +1407,38 @@ class _IdentityTransferDecoder(object): raise _DataLoss() +_chunkExtChars = ( + b"\t !\"#$%&'()*+,-./0123456789:;<=>?@" + b"ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_`" + b"abcdefghijklmnopqrstuvwxyz{|}~" + b"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" + b"\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" + b"\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf" + b"\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" + b"\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" + b"\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" + b"\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" + b"\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff" +) +""" +Characters that are valid in a chunk extension. +See RFC 7230 section 4.1.1:: + chunk-ext = *( ";" chunk-ext-name [ "=" chunk-ext-val ] ) + chunk-ext-name = token + chunk-ext-val = token / quoted-string +And section 3.2.6:: + token = 1*tchar + tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" + / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~" + / DIGIT / ALPHA + ; any VCHAR, except delimiters + quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE + qdtext = HTAB / SP /%x21 / %x23-5B / %x5D-7E / obs-text + obs-text = %x80-FF +We don't check if chunk extensions are well-formed beyond validating that they +don't contain characters outside this range. +""" + class _ChunkedTransferDecoder(object): """ @@ -1431,7 +1501,16 @@ class _ChunkedTransferDecoder(object): if '\r\n' in data: line, rest = data.split('\r\n', 1) parts = line.split(';') - self.length = int(parts[0], 16) + try: + self.length = _hexint(parts[0]) + except ValueError: + raise _MalformedChunkedDataError("Chunk-size must be an integer.") + + if len(parts) > 1 and parts[1].translate(None, _chunkExtChars) != "": + raise _MalformedChunkedDataError( + "Invalid characters in chunk extensions: {!r}.".format(parts[1]) + ) + if self.length == 0: self.state = 'trailer' self.finish = True @@ -1552,7 +1631,7 @@ class HTTPChannel(basic.LineReceiver, policies.TimeoutMixin): else: self.setRawMode() elif line[0] in ' \t': - self.__header = self.__header+'\n'+line + self.__header += " " + line.lstrip(" \t") else: if self.__header: self.headerReceived(self.__header) @@ -1577,6 +1656,9 @@ class HTTPChannel(basic.LineReceiver, policies.TimeoutMixin): # Can this header determine the length? if header == b'content-length': + if not data.isdigit(): + fail() + return False try: length = int(data) except ValueError: @@ -1620,7 +1702,7 @@ class HTTPChannel(basic.LineReceiver, policies.TimeoutMixin): """ header, data = line.split(':', 1) header = header.lower() - data = data.strip() + data = data.strip(" \t") if not self._maybeChooseTransferDecoder(header, data): return False