663 lines
21 KiB
Python
663 lines
21 KiB
Python
# -*- coding: utf-8 -*-
|
|
"""
|
|
hpack/hpack
|
|
~~~~~~~~~~~
|
|
|
|
Implements the HPACK header compression algorithm as detailed by the IETF.
|
|
"""
|
|
import collections
|
|
import logging
|
|
|
|
from .compat import to_byte
|
|
from .huffman import HuffmanDecoder, HuffmanEncoder
|
|
from .huffman_constants import (
|
|
REQUEST_CODES, REQUEST_CODES_LENGTH
|
|
)
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
def encode_integer(integer, prefix_bits):
|
|
"""
|
|
This encodes an integer according to the wacky integer encoding rules
|
|
defined in the HPACK spec.
|
|
"""
|
|
log.debug("Encoding %d with %d bits", integer, prefix_bits)
|
|
|
|
max_number = (2 ** prefix_bits) - 1
|
|
|
|
if (integer < max_number):
|
|
return bytearray([integer]) # Seriously?
|
|
else:
|
|
elements = [max_number]
|
|
integer = integer - max_number
|
|
|
|
while integer >= 128:
|
|
elements.append((integer % 128) + 128)
|
|
integer = integer // 128 # We need integer division
|
|
|
|
elements.append(integer)
|
|
|
|
return bytearray(elements)
|
|
|
|
|
|
def decode_integer(data, prefix_bits):
|
|
"""
|
|
This decodes an integer according to the wacky integer encoding rules
|
|
defined in the HPACK spec. Returns a tuple of the decoded integer and the
|
|
number of bytes that were consumed from ``data`` in order to get that
|
|
integer.
|
|
"""
|
|
multiple = lambda index: 128 ** (index - 1)
|
|
max_number = (2 ** prefix_bits) - 1
|
|
mask = 0xFF >> (8 - prefix_bits)
|
|
index = 0
|
|
|
|
number = to_byte(data[index]) & mask
|
|
|
|
if (number == max_number):
|
|
|
|
while True:
|
|
index += 1
|
|
next_byte = to_byte(data[index])
|
|
|
|
if next_byte >= 128:
|
|
number += (next_byte - 128) * multiple(index)
|
|
else:
|
|
number += next_byte * multiple(index)
|
|
break
|
|
|
|
log.debug("Decoded %d, consumed %d bytes", number, index + 1)
|
|
|
|
return (number, index + 1)
|
|
|
|
|
|
def _to_bytes(string):
|
|
"""
|
|
Convert string to bytes.
|
|
"""
|
|
if not isinstance(string, (str, bytes)): # pragma: no cover
|
|
string = str(string)
|
|
|
|
return string if isinstance(string, bytes) else string.encode('utf-8')
|
|
|
|
|
|
def header_table_size(table):
|
|
"""
|
|
Calculates the 'size' of the header table as defined by the HTTP/2
|
|
specification.
|
|
"""
|
|
# It's phenomenally frustrating that the specification feels it is able to
|
|
# tell me how large the header table is, considering that its calculations
|
|
# assume a very particular layout that most implementations will not have.
|
|
# I appreciate it's an attempt to prevent DoS attacks by sending lots of
|
|
# large headers in the header table, but it seems like a better approach
|
|
# would be to limit the size of headers. Ah well.
|
|
return sum(32 + len(name) + len(value) for name, value in table)
|
|
|
|
|
|
class Encoder(object):
|
|
"""
|
|
An HPACK encoder object. This object takes HTTP headers and emits encoded
|
|
HTTP/2 header blocks.
|
|
"""
|
|
# This is the static table of header fields.
|
|
static_table = [
|
|
(b':authority', b''),
|
|
(b':method', b'GET'),
|
|
(b':method', b'POST'),
|
|
(b':path', b'/'),
|
|
(b':path', b'/index.html'),
|
|
(b':scheme', b'http'),
|
|
(b':scheme', b'https'),
|
|
(b':status', b'200'),
|
|
(b':status', b'204'),
|
|
(b':status', b'206'),
|
|
(b':status', b'304'),
|
|
(b':status', b'400'),
|
|
(b':status', b'404'),
|
|
(b':status', b'500'),
|
|
(b'accept-charset', b''),
|
|
(b'accept-encoding', b'gzip, deflate'),
|
|
(b'accept-language', b''),
|
|
(b'accept-ranges', b''),
|
|
(b'accept', b''),
|
|
(b'access-control-allow-origin', b''),
|
|
(b'age', b''),
|
|
(b'allow', b''),
|
|
(b'authorization', b''),
|
|
(b'cache-control', b''),
|
|
(b'content-disposition', b''),
|
|
(b'content-encoding', b''),
|
|
(b'content-language', b''),
|
|
(b'content-length', b''),
|
|
(b'content-location', b''),
|
|
(b'content-range', b''),
|
|
(b'content-type', b''),
|
|
(b'cookie', b''),
|
|
(b'date', b''),
|
|
(b'etag', b''),
|
|
(b'expect', b''),
|
|
(b'expires', b''),
|
|
(b'from', b''),
|
|
(b'host', b''),
|
|
(b'if-match', b''),
|
|
(b'if-modified-since', b''),
|
|
(b'if-none-match', b''),
|
|
(b'if-range', b''),
|
|
(b'if-unmodified-since', b''),
|
|
(b'last-modified', b''),
|
|
(b'link', b''),
|
|
(b'location', b''),
|
|
(b'max-forwards', b''),
|
|
(b'proxy-authenticate', b''),
|
|
(b'proxy-authorization', b''),
|
|
(b'range', b''),
|
|
(b'referer', b''),
|
|
(b'refresh', b''),
|
|
(b'retry-after', b''),
|
|
(b'server', b''),
|
|
(b'set-cookie', b''),
|
|
(b'strict-transport-security', b''),
|
|
(b'transfer-encoding', b''),
|
|
(b'user-agent', b''),
|
|
(b'vary', b''),
|
|
(b'via', b''),
|
|
(b'www-authenticate', b''),
|
|
]
|
|
|
|
def __init__(self):
|
|
self.header_table = collections.deque()
|
|
self._header_table_size = 4096 # This value set by the standard.
|
|
self.huffman_coder = HuffmanEncoder(
|
|
REQUEST_CODES, REQUEST_CODES_LENGTH
|
|
)
|
|
|
|
# We need to keep track of whether the header table size has been
|
|
# changed since we last encoded anything. If it has, we need to signal
|
|
# that change in the HPACK block.
|
|
self._table_size_changed = False
|
|
|
|
@property
|
|
def header_table_size(self):
|
|
return self._header_table_size
|
|
|
|
@header_table_size.setter
|
|
def header_table_size(self, value):
|
|
log.debug(
|
|
"Setting header table size to %d from %d",
|
|
value,
|
|
self._header_table_size
|
|
)
|
|
|
|
# If the new value is larger than the current one, no worries!
|
|
# Otherwise, we may need to shrink the header table.
|
|
if value < self._header_table_size:
|
|
current_size = header_table_size(self.header_table)
|
|
|
|
while value < current_size:
|
|
header = self.header_table.pop()
|
|
n, v = header
|
|
current_size -= (
|
|
32 + len(n) + len(v)
|
|
)
|
|
|
|
log.debug(
|
|
"Removed %s: %s from the encoder header table", n, v
|
|
)
|
|
|
|
if value != self._header_table_size:
|
|
self._table_size_changed = True
|
|
|
|
self._header_table_size = value
|
|
|
|
def encode(self, headers, huffman=True):
|
|
"""
|
|
Takes a set of headers and encodes them into a HPACK-encoded header
|
|
block.
|
|
|
|
Transforming the headers into a header block is a procedure that can
|
|
be modeled as a chain or pipe. First, the headers are encoded. This
|
|
encoding can be done a number of ways. If the header name-value pair
|
|
are already in the header table we can represent them using the indexed
|
|
representation: the same is true if they are in the static table.
|
|
Otherwise, a literal representation will be used.
|
|
"""
|
|
log.debug("HPACK encoding %s", headers)
|
|
header_block = []
|
|
|
|
# Turn the headers into a list of tuples if possible. This is the
|
|
# natural way to interact with them in HPACK.
|
|
if isinstance(headers, dict):
|
|
headers = headers.items()
|
|
|
|
# Next, walk across the headers and turn them all into bytestrings.
|
|
headers = [(_to_bytes(n), _to_bytes(v)) for n, v in headers]
|
|
|
|
# Before we begin, if the header table size has been changed we need
|
|
# to signal that appropriately.
|
|
if self._table_size_changed:
|
|
header_block.append(self._encode_table_size_change())
|
|
self._table_size_changed = False
|
|
|
|
# We can now encode each header in the block.
|
|
header_block.extend(
|
|
(self.add(header, huffman) for header in headers)
|
|
)
|
|
|
|
header_block = b''.join(header_block)
|
|
|
|
log.debug("Encoded header block to %s", header_block)
|
|
|
|
return header_block
|
|
|
|
def add(self, to_add, huffman=False):
|
|
"""
|
|
This function takes a header key-value tuple and serializes it.
|
|
"""
|
|
log.debug("Adding %s to the header table", to_add)
|
|
|
|
name, value = to_add
|
|
|
|
# Search for a matching header in the header table.
|
|
match = self.matching_header(name, value)
|
|
|
|
if match is None:
|
|
# Not in the header table. Encode using the literal syntax,
|
|
# and add it to the header table.
|
|
encoded = self._encode_literal(name, value, True, huffman)
|
|
self._add_to_header_table(to_add)
|
|
return encoded
|
|
|
|
# The header is in the table, break out the values. If we matched
|
|
# perfectly, we can use the indexed representation: otherwise we
|
|
# can use the indexed literal.
|
|
index, perfect = match
|
|
|
|
if perfect:
|
|
# Indexed representation.
|
|
encoded = self._encode_indexed(index)
|
|
else:
|
|
# Indexed literal. We are going to add header to the
|
|
# header table unconditionally. It is a future todo to
|
|
# filter out headers which are known to be ineffective for
|
|
# indexing since they just take space in the table and
|
|
# pushed out other valuable headers.
|
|
encoded = self._encode_indexed_literal(index, value, huffman)
|
|
self._add_to_header_table(to_add)
|
|
|
|
return encoded
|
|
|
|
def matching_header(self, name, value):
|
|
"""
|
|
Scans the header table and the static table. Returns a tuple, where the
|
|
first value is the index of the match, and the second is whether there
|
|
was a full match or not. Prefers full matches to partial ones.
|
|
|
|
Upsettingly, the header table is one-indexed, not zero-indexed.
|
|
"""
|
|
partial_match = None
|
|
static_table_len = len(Encoder.static_table)
|
|
|
|
for (i, (n, v)) in enumerate(Encoder.static_table):
|
|
if n == name:
|
|
if v == value:
|
|
return (i + 1, Encoder.static_table[i])
|
|
elif partial_match is None:
|
|
partial_match = (i + 1, None)
|
|
|
|
for (i, (n, v)) in enumerate(self.header_table):
|
|
if n == name:
|
|
if v == value:
|
|
return (i + static_table_len + 1, self.header_table[i])
|
|
elif partial_match is None:
|
|
partial_match = (i + static_table_len + 1, None)
|
|
|
|
return partial_match
|
|
|
|
def _add_to_header_table(self, header):
|
|
"""
|
|
Adds a header to the header table, evicting old ones if necessary.
|
|
"""
|
|
# Be optimistic: add the header straight away.
|
|
self.header_table.appendleft(header)
|
|
|
|
# Now, work out how big the header table is.
|
|
actual_size = header_table_size(self.header_table)
|
|
|
|
# Loop and remove whatever we need to.
|
|
while actual_size > self.header_table_size:
|
|
header = self.header_table.pop()
|
|
n, v = header
|
|
actual_size -= (
|
|
32 + len(n) + len(v)
|
|
)
|
|
|
|
log.debug("Evicted %s: %s from the header table", n, v)
|
|
|
|
def _encode_indexed(self, index):
|
|
"""
|
|
Encodes a header using the indexed representation.
|
|
"""
|
|
field = encode_integer(index, 7)
|
|
field[0] = field[0] | 0x80 # we set the top bit
|
|
return bytes(field)
|
|
|
|
def _encode_literal(self, name, value, indexing, huffman=False):
|
|
"""
|
|
Encodes a header with a literal name and literal value. If ``indexing``
|
|
is True, the header will be added to the header table: otherwise it
|
|
will not.
|
|
"""
|
|
prefix = b'\x40' if indexing else b'\x00'
|
|
|
|
if huffman:
|
|
name = self.huffman_coder.encode(name)
|
|
value = self.huffman_coder.encode(value)
|
|
|
|
name_len = encode_integer(len(name), 7)
|
|
value_len = encode_integer(len(value), 7)
|
|
|
|
if huffman:
|
|
name_len[0] |= 0x80
|
|
value_len[0] |= 0x80
|
|
|
|
return b''.join([prefix, bytes(name_len), name, bytes(value_len), value])
|
|
|
|
def _encode_indexed_literal(self, index, value, huffman=False):
|
|
"""
|
|
Encodes a header with an indexed name and a literal value and performs
|
|
incremental indexing.
|
|
"""
|
|
prefix = encode_integer(index, 6)
|
|
prefix[0] |= 0x40
|
|
|
|
if huffman:
|
|
value = self.huffman_coder.encode(value)
|
|
|
|
value_len = encode_integer(len(value), 7)
|
|
|
|
if huffman:
|
|
value_len[0] |= 0x80
|
|
|
|
return b''.join([bytes(prefix), bytes(value_len), value])
|
|
|
|
def _encode_table_size_change(self):
|
|
"""
|
|
Produces the encoded form of a header table size change context update.
|
|
"""
|
|
size_bytes = encode_integer(self.header_table_size, 5)
|
|
size_bytes[0] |= 0x20
|
|
return bytes(size_bytes)
|
|
|
|
|
|
class Decoder(object):
|
|
"""
|
|
An HPACK decoder object.
|
|
"""
|
|
static_table = [
|
|
(b':authority', b''),
|
|
(b':method', b'GET'),
|
|
(b':method', b'POST'),
|
|
(b':path', b'/'),
|
|
(b':path', b'/index.html'),
|
|
(b':scheme', b'http'),
|
|
(b':scheme', b'https'),
|
|
(b':status', b'200'),
|
|
(b':status', b'204'),
|
|
(b':status', b'206'),
|
|
(b':status', b'304'),
|
|
(b':status', b'400'),
|
|
(b':status', b'404'),
|
|
(b':status', b'500'),
|
|
(b'accept-charset', b''),
|
|
(b'accept-encoding', b'gzip, deflate'),
|
|
(b'accept-language', b''),
|
|
(b'accept-ranges', b''),
|
|
(b'accept', b''),
|
|
(b'access-control-allow-origin', b''),
|
|
(b'age', b''),
|
|
(b'allow', b''),
|
|
(b'authorization', b''),
|
|
(b'cache-control', b''),
|
|
(b'content-disposition', b''),
|
|
(b'content-encoding', b''),
|
|
(b'content-language', b''),
|
|
(b'content-length', b''),
|
|
(b'content-location', b''),
|
|
(b'content-range', b''),
|
|
(b'content-type', b''),
|
|
(b'cookie', b''),
|
|
(b'date', b''),
|
|
(b'etag', b''),
|
|
(b'expect', b''),
|
|
(b'expires', b''),
|
|
(b'from', b''),
|
|
(b'host', b''),
|
|
(b'if-match', b''),
|
|
(b'if-modified-since', b''),
|
|
(b'if-none-match', b''),
|
|
(b'if-range', b''),
|
|
(b'if-unmodified-since', b''),
|
|
(b'last-modified', b''),
|
|
(b'link', b''),
|
|
(b'location', b''),
|
|
(b'max-forwards', b''),
|
|
(b'proxy-authenticate', b''),
|
|
(b'proxy-authorization', b''),
|
|
(b'range', b''),
|
|
(b'referer', b''),
|
|
(b'refresh', b''),
|
|
(b'retry-after', b''),
|
|
(b'server', b''),
|
|
(b'set-cookie', b''),
|
|
(b'strict-transport-security', b''),
|
|
(b'transfer-encoding', b''),
|
|
(b'user-agent', b''),
|
|
(b'vary', b''),
|
|
(b'via', b''),
|
|
(b'www-authenticate', b''),
|
|
]
|
|
|
|
def __init__(self):
|
|
self.header_table = collections.deque()
|
|
self._header_table_size = 4096 # This value set by the standard.
|
|
self.huffman_coder = HuffmanDecoder(
|
|
REQUEST_CODES, REQUEST_CODES_LENGTH
|
|
)
|
|
|
|
@property
|
|
def header_table_size(self):
|
|
return self._header_table_size
|
|
|
|
@header_table_size.setter
|
|
def header_table_size(self, value):
|
|
log.debug(
|
|
"Resizing decoder header table to %d from %d",
|
|
value,
|
|
self._header_table_size
|
|
)
|
|
|
|
# If the new value is larger than the current one, no worries!
|
|
# Otherwise, we may need to shrink the header table.
|
|
if value < self._header_table_size:
|
|
current_size = header_table_size(self.header_table)
|
|
|
|
while value < current_size:
|
|
header = self.header_table.pop()
|
|
n, v = header
|
|
current_size -= (
|
|
32 + len(n) + len(v)
|
|
)
|
|
|
|
log.debug("Evicting %s: %s from the header table", n, v)
|
|
|
|
self._header_table_size = value
|
|
|
|
def decode(self, data):
|
|
"""
|
|
Takes an HPACK-encoded header block and decodes it into a header set.
|
|
"""
|
|
log.debug("Decoding %s", data)
|
|
|
|
headers = []
|
|
data_len = len(data)
|
|
current_index = 0
|
|
|
|
while current_index < data_len:
|
|
# Work out what kind of header we're decoding.
|
|
# If the high bit is 1, it's an indexed field.
|
|
current = to_byte(data[current_index])
|
|
indexed = bool(current & 0x80)
|
|
|
|
# Otherwise, if the second-highest bit is 1 it's a field that does
|
|
# alter the header table.
|
|
literal_index = bool(current & 0x40)
|
|
|
|
# Otherwise, if the third-highest bit is 1 it's an encoding context
|
|
# update.
|
|
encoding_update = bool(current & 0x20)
|
|
|
|
if indexed:
|
|
header, consumed = self._decode_indexed(data[current_index:])
|
|
elif literal_index:
|
|
# It's a literal header that does affect the header table.
|
|
header, consumed = self._decode_literal_index(
|
|
data[current_index:]
|
|
)
|
|
elif encoding_update:
|
|
# It's an update to the encoding context.
|
|
consumed = self._update_encoding_context(data)
|
|
header = None
|
|
else:
|
|
# It's a literal header that does not affect the header table.
|
|
header, consumed = self._decode_literal_no_index(
|
|
data[current_index:]
|
|
)
|
|
|
|
if header:
|
|
headers.append(header)
|
|
|
|
current_index += consumed
|
|
|
|
return [(n.decode('utf-8'), v.decode('utf-8')) for n, v in headers]
|
|
|
|
def _add_to_header_table(self, new_header):
|
|
"""
|
|
Adds a header to the header table, evicting old ones if necessary.
|
|
"""
|
|
# Be optimistic: add the header straight away.
|
|
self.header_table.appendleft(new_header)
|
|
|
|
# Now, work out how big the header table is.
|
|
actual_size = header_table_size(self.header_table)
|
|
|
|
# Loop and remove whatever we need to.
|
|
while actual_size > self.header_table_size:
|
|
header = self.header_table.pop()
|
|
n, v = header
|
|
actual_size -= (
|
|
32 + len(n) + len(v)
|
|
)
|
|
|
|
log.debug("Evicting %s: %s from the header table", n, v)
|
|
|
|
def _update_encoding_context(self, data):
|
|
"""
|
|
Handles a byte that updates the encoding context.
|
|
"""
|
|
# We've been asked to resize the header table.
|
|
new_size, consumed = decode_integer(data, 5)
|
|
self.header_table_size = new_size
|
|
return consumed
|
|
|
|
def _decode_indexed(self, data):
|
|
"""
|
|
Decodes a header represented using the indexed representation.
|
|
"""
|
|
index, consumed = decode_integer(data, 7)
|
|
index -= 1 # Because this idiot table is 1-indexed. Ugh.
|
|
|
|
if index >= len(Decoder.static_table):
|
|
index -= len(Decoder.static_table)
|
|
header = self.header_table[index]
|
|
else:
|
|
header = Decoder.static_table[index]
|
|
|
|
log.debug("Decoded %s, consumed %d", header, consumed)
|
|
return header, consumed
|
|
|
|
def _decode_literal_no_index(self, data):
|
|
return self._decode_literal(data, False)
|
|
|
|
def _decode_literal_index(self, data):
|
|
return self._decode_literal(data, True)
|
|
|
|
def _decode_literal(self, data, should_index):
|
|
"""
|
|
Decodes a header represented with a literal.
|
|
"""
|
|
total_consumed = 0
|
|
|
|
# When should_index is true, if the low six bits of the first byte are
|
|
# nonzero, the header name is indexed.
|
|
# When should_index is false, if the low four bits of the first byte
|
|
# are nonzero the header name is indexed.
|
|
if should_index:
|
|
indexed_name = to_byte(data[0]) & 0x3F
|
|
name_len = 6
|
|
else:
|
|
indexed_name = to_byte(data[0]) & 0x0F
|
|
name_len = 4
|
|
|
|
if indexed_name:
|
|
# Indexed header name.
|
|
index, consumed = decode_integer(data, name_len)
|
|
index -= 1
|
|
|
|
if index >= len(Decoder.static_table):
|
|
index -= len(Decoder.static_table)
|
|
name = self.header_table[index][0]
|
|
else:
|
|
name = Decoder.static_table[index][0]
|
|
|
|
total_consumed = consumed
|
|
length = 0
|
|
else:
|
|
# Literal header name. The first byte was consumed, so we need to
|
|
# move forward.
|
|
data = data[1:]
|
|
|
|
length, consumed = decode_integer(data, 7)
|
|
name = data[consumed:consumed + length]
|
|
|
|
if to_byte(data[0]) & 0x80:
|
|
name = self.huffman_coder.decode(name)
|
|
total_consumed = consumed + length + 1 # Since we moved forward 1.
|
|
|
|
data = data[consumed + length:]
|
|
|
|
# The header value is definitely length-based.
|
|
length, consumed = decode_integer(data, 7)
|
|
value = data[consumed:consumed + length]
|
|
|
|
if to_byte(data[0]) & 0x80:
|
|
value = self.huffman_coder.decode(value)
|
|
|
|
# Updated the total consumed length.
|
|
total_consumed += length + consumed
|
|
|
|
# If we've been asked to index this, add it to the header table.
|
|
header = (name, value)
|
|
if should_index:
|
|
self._add_to_header_table(header)
|
|
|
|
log.debug(
|
|
"Decoded %s, total consumed %d bytes, indexed %s",
|
|
header,
|
|
total_consumed,
|
|
should_index
|
|
)
|
|
|
|
return header, total_consumed
|