VT-PR/vinetrimmer/vendor/hyper/packages/hpack/hpack.py

# -*- coding: utf-8 -*-
"""
hpack/hpack
~~~~~~~~~~~

Implements the HPACK header compression algorithm as detailed by the IETF.
"""
import collections
import logging

from .compat import to_byte
from .huffman import HuffmanDecoder, HuffmanEncoder
from .huffman_constants import (
    REQUEST_CODES, REQUEST_CODES_LENGTH
)

log = logging.getLogger(__name__)


def encode_integer(integer, prefix_bits):
    """
    This encodes an integer according to the wacky integer encoding rules
    defined in the HPACK spec.
    """
    log.debug("Encoding %d with %d bits", integer, prefix_bits)

    max_number = (2 ** prefix_bits) - 1

    if (integer < max_number):
        return bytearray([integer])  # Seriously?
    else:
        elements = [max_number]
        integer = integer - max_number

        while integer >= 128:
            elements.append((integer % 128) + 128)
            integer = integer // 128  # We need integer division

        elements.append(integer)

        return bytearray(elements)


def decode_integer(data, prefix_bits):
    """
    This decodes an integer according to the wacky integer encoding rules
    defined in the HPACK spec. Returns a tuple of the decoded integer and the
    number of bytes that were consumed from ``data`` in order to get that
    integer.
    """
    multiple = lambda index: 128 ** (index - 1)
    max_number = (2 ** prefix_bits) - 1
    mask = 0xFF >> (8 - prefix_bits)
    index = 0

    number = to_byte(data[index]) & mask

    if (number == max_number):

        while True:
            index += 1
            next_byte = to_byte(data[index])

            if next_byte >= 128:
                number += (next_byte - 128) * multiple(index)
            else:
                number += next_byte * multiple(index)
                break

    log.debug("Decoded %d, consumed %d bytes", number, index + 1)

    return (number, index + 1)


def _to_bytes(string):
    """
    Convert string to bytes.
    """
    if not isinstance(string, (str, bytes)):  # pragma: no cover
        string = str(string)

    return string if isinstance(string, bytes) else string.encode('utf-8')


def header_table_size(table):
    """
    Calculates the 'size' of the header table as defined by the HTTP/2
    specification.
    """
    # It's phenomenally frustrating that the specification feels it is able to
    # tell me how large the header table is, considering that its calculations
    # assume a very particular layout that most implementations will not have.
    # I appreciate it's an attempt to prevent DoS attacks by sending lots of
    # large headers in the header table, but it seems like a better approach
    # would be to limit the size of headers. Ah well.
    return sum(32 + len(name) + len(value) for name, value in table)


class Encoder(object):
    """
    An HPACK encoder object. This object takes HTTP headers and emits encoded
    HTTP/2 header blocks.
    """
    # This is the static table of header fields.
    static_table = [
        (b':authority', b''),
        (b':method', b'GET'),
        (b':method', b'POST'),
        (b':path', b'/'),
        (b':path', b'/index.html'),
        (b':scheme', b'http'),
        (b':scheme', b'https'),
        (b':status', b'200'),
        (b':status', b'204'),
        (b':status', b'206'),
        (b':status', b'304'),
        (b':status', b'400'),
        (b':status', b'404'),
        (b':status', b'500'),
        (b'accept-charset', b''),
        (b'accept-encoding', b'gzip, deflate'),
        (b'accept-language', b''),
        (b'accept-ranges', b''),
        (b'accept', b''),
        (b'access-control-allow-origin', b''),
        (b'age', b''),
        (b'allow', b''),
        (b'authorization', b''),
        (b'cache-control', b''),
        (b'content-disposition', b''),
        (b'content-encoding', b''),
        (b'content-language', b''),
        (b'content-length', b''),
        (b'content-location', b''),
        (b'content-range', b''),
        (b'content-type', b''),
        (b'cookie', b''),
        (b'date', b''),
        (b'etag', b''),
        (b'expect', b''),
        (b'expires', b''),
        (b'from', b''),
        (b'host', b''),
        (b'if-match', b''),
        (b'if-modified-since', b''),
        (b'if-none-match', b''),
        (b'if-range', b''),
        (b'if-unmodified-since', b''),
        (b'last-modified', b''),
        (b'link', b''),
        (b'location', b''),
        (b'max-forwards', b''),
        (b'proxy-authenticate', b''),
        (b'proxy-authorization', b''),
        (b'range', b''),
        (b'referer', b''),
        (b'refresh', b''),
        (b'retry-after', b''),
        (b'server', b''),
        (b'set-cookie', b''),
        (b'strict-transport-security', b''),
        (b'transfer-encoding', b''),
        (b'user-agent', b''),
        (b'vary', b''),
        (b'via', b''),
        (b'www-authenticate', b''),
    ]

    def __init__(self):
        self.header_table = collections.deque()
        self._header_table_size = 4096  # This value set by the standard.
        self.huffman_coder = HuffmanEncoder(
            REQUEST_CODES, REQUEST_CODES_LENGTH
        )

        # We need to keep track of whether the header table size has been
        # changed since we last encoded anything. If it has, we need to signal
        # that change in the HPACK block.
        self._table_size_changed = False

    @property
    def header_table_size(self):
        return self._header_table_size

    @header_table_size.setter
    def header_table_size(self, value):
        log.debug(
            "Setting header table size to %d from %d",
            value,
            self._header_table_size
        )

        # If the new value is larger than the current one, no worries!
        # Otherwise, we may need to shrink the header table.
        if value < self._header_table_size:
            current_size = header_table_size(self.header_table)

            while value < current_size:
                header = self.header_table.pop()
                n, v = header
                current_size -= (
                    32 + len(n) + len(v)
                )

                log.debug(
                    "Removed %s: %s from the encoder header table", n, v
                )

        if value != self._header_table_size:
            self._table_size_changed = True

        self._header_table_size = value

    def encode(self, headers, huffman=True):
        """
        Takes a set of headers and encodes them into a HPACK-encoded header
        block.

        Transforming the headers into a header block is a procedure that can
        be modeled as a chain or pipe. First, the headers are encoded. This
        encoding can be done a number of ways. If the header name-value pair
        are already in the header table we can represent them using the indexed
        representation: the same is true if they are in the static table.
        Otherwise, a literal representation will be used.
        """
        log.debug("HPACK encoding %s", headers)
        header_block = []

        # Turn the headers into a list of tuples if possible. This is the
        # natural way to interact with them in HPACK.
        if isinstance(headers, dict):
            headers = headers.items()

        # Next, walk across the headers and turn them all into bytestrings.
        headers = [(_to_bytes(n), _to_bytes(v)) for n, v in headers]

        # Before we begin, if the header table size has been changed we need
        # to signal that appropriately.
        if self._table_size_changed:
            header_block.append(self._encode_table_size_change())
            self._table_size_changed = False

        # We can now encode each header in the block.
        header_block.extend(
            (self.add(header, huffman) for header in headers)
        )

        header_block = b''.join(header_block)

        log.debug("Encoded header block to %s", header_block)

        return header_block

    def add(self, to_add, huffman=False):
        """
        This function takes a header key-value tuple and serializes it.
        """
        log.debug("Adding %s to the header table", to_add)

        name, value = to_add

        # Search for a matching header in the header table.
        match = self.matching_header(name, value)

        if match is None:
            # Not in the header table. Encode using the literal syntax,
            # and add it to the header table.
            encoded = self._encode_literal(name, value, True, huffman)
            self._add_to_header_table(to_add)
            return encoded

        # The header is in the table, break out the values. If we matched
        # perfectly, we can use the indexed representation: otherwise we
        # can use the indexed literal.
        index, perfect = match

        if perfect:
            # Indexed representation.
            encoded = self._encode_indexed(index)
        else:
            # Indexed literal. We are going to add header to the
            # header table unconditionally. It is a future todo to
            # filter out headers which are known to be ineffective for
            # indexing since they just take space in the table and
            # pushed out other valuable headers.
            encoded = self._encode_indexed_literal(index, value, huffman)
            self._add_to_header_table(to_add)

        return encoded

    def matching_header(self, name, value):
        """
        Scans the header table and the static table. Returns a tuple, where the
        first value is the index of the match, and the second is whether there
        was a full match or not. Prefers full matches to partial ones.

        Upsettingly, the header table is one-indexed, not zero-indexed.
        """
        partial_match = None
        static_table_len = len(Encoder.static_table)

        for (i, (n, v)) in enumerate(Encoder.static_table):
            if n == name:
                if v == value:
                    return (i + 1, Encoder.static_table[i])
                elif partial_match is None:
                    partial_match = (i + 1, None)

        for (i, (n, v)) in enumerate(self.header_table):
            if n == name:
                if v == value:
                    return (i + static_table_len + 1, self.header_table[i])
                elif partial_match is None:
                    partial_match = (i + static_table_len + 1, None)

        return partial_match

    def _add_to_header_table(self, header):
        """
        Adds a header to the header table, evicting old ones if necessary.
        """
        # Be optimistic: add the header straight away.
        self.header_table.appendleft(header)

        # Now, work out how big the header table is.
        actual_size = header_table_size(self.header_table)

        # Loop and remove whatever we need to.
        while actual_size > self.header_table_size:
            header = self.header_table.pop()
            n, v = header
            actual_size -= (
                32 + len(n) + len(v)
            )

            log.debug("Evicted %s: %s from the header table", n, v)

    def _encode_indexed(self, index):
        """
        Encodes a header using the indexed representation.
        """
        field = encode_integer(index, 7)
        field[0] = field[0] | 0x80  # we set the top bit
        return bytes(field)

    def _encode_literal(self, name, value, indexing, huffman=False):
        """
        Encodes a header with a literal name and literal value. If ``indexing``
        is True, the header will be added to the header table: otherwise it
        will not.
        """
        prefix = b'\x40' if indexing else b'\x00'

        if huffman:
            name = self.huffman_coder.encode(name)
            value = self.huffman_coder.encode(value)

        name_len = encode_integer(len(name), 7)
        value_len = encode_integer(len(value), 7)

        if huffman:
            name_len[0] |= 0x80
            value_len[0] |= 0x80

        return b''.join([prefix, bytes(name_len), name, bytes(value_len), value])

    def _encode_indexed_literal(self, index, value, huffman=False):
        """
        Encodes a header with an indexed name and a literal value and performs
        incremental indexing.
        """
        prefix = encode_integer(index, 6)
        prefix[0] |= 0x40

        if huffman:
            value = self.huffman_coder.encode(value)

        value_len = encode_integer(len(value), 7)

        if huffman:
            value_len[0] |= 0x80

        return b''.join([bytes(prefix), bytes(value_len), value])

    def _encode_table_size_change(self):
        """
        Produces the encoded form of a header table size change context update.
        """
        size_bytes = encode_integer(self.header_table_size, 5)
        size_bytes[0] |= 0x20
        return bytes(size_bytes)


class Decoder(object):
    """
    An HPACK decoder object.
    """
    static_table = [
        (b':authority', b''),
        (b':method', b'GET'),
        (b':method', b'POST'),
        (b':path', b'/'),
        (b':path', b'/index.html'),
        (b':scheme', b'http'),
        (b':scheme', b'https'),
        (b':status', b'200'),
        (b':status', b'204'),
        (b':status', b'206'),
        (b':status', b'304'),
        (b':status', b'400'),
        (b':status', b'404'),
        (b':status', b'500'),
        (b'accept-charset', b''),
        (b'accept-encoding', b'gzip, deflate'),
        (b'accept-language', b''),
        (b'accept-ranges', b''),
        (b'accept', b''),
        (b'access-control-allow-origin', b''),
        (b'age', b''),
        (b'allow', b''),
        (b'authorization', b''),
        (b'cache-control', b''),
        (b'content-disposition', b''),
        (b'content-encoding', b''),
        (b'content-language', b''),
        (b'content-length', b''),
        (b'content-location', b''),
        (b'content-range', b''),
        (b'content-type', b''),
        (b'cookie', b''),
        (b'date', b''),
        (b'etag', b''),
        (b'expect', b''),
        (b'expires', b''),
        (b'from', b''),
        (b'host', b''),
        (b'if-match', b''),
        (b'if-modified-since', b''),
        (b'if-none-match', b''),
        (b'if-range', b''),
        (b'if-unmodified-since', b''),
        (b'last-modified', b''),
        (b'link', b''),
        (b'location', b''),
        (b'max-forwards', b''),
        (b'proxy-authenticate', b''),
        (b'proxy-authorization', b''),
        (b'range', b''),
        (b'referer', b''),
        (b'refresh', b''),
        (b'retry-after', b''),
        (b'server', b''),
        (b'set-cookie', b''),
        (b'strict-transport-security', b''),
        (b'transfer-encoding', b''),
        (b'user-agent', b''),
        (b'vary', b''),
        (b'via', b''),
        (b'www-authenticate', b''),
    ]

    def __init__(self):
        self.header_table = collections.deque()
        self._header_table_size = 4096  # This value set by the standard.
        self.huffman_coder = HuffmanDecoder(
            REQUEST_CODES, REQUEST_CODES_LENGTH
        )

    @property
    def header_table_size(self):
        return self._header_table_size

    @header_table_size.setter
    def header_table_size(self, value):
        log.debug(
            "Resizing decoder header table to %d from %d",
            value,
            self._header_table_size
        )

        # If the new value is larger than the current one, no worries!
        # Otherwise, we may need to shrink the header table.
        if value < self._header_table_size:
            current_size = header_table_size(self.header_table)

            while value < current_size:
                header = self.header_table.pop()
                n, v = header
                current_size -= (
                    32 + len(n) + len(v)
                )

                log.debug("Evicting %s: %s from the header table", n, v)

        self._header_table_size = value

    def decode(self, data):
        """
        Takes an HPACK-encoded header block and decodes it into a header set.
        """
        log.debug("Decoding %s", data)

        headers = []
        data_len = len(data)
        current_index = 0

        while current_index < data_len:
            # Work out what kind of header we're decoding.
            # If the high bit is 1, it's an indexed field.
            current = to_byte(data[current_index])
            indexed = bool(current & 0x80)

            # Otherwise, if the second-highest bit is 1 it's a field that does
            # alter the header table.
            literal_index = bool(current & 0x40)

            # Otherwise, if the third-highest bit is 1 it's an encoding context
            # update.
            encoding_update = bool(current & 0x20)

            if indexed:
                header, consumed = self._decode_indexed(data[current_index:])
            elif literal_index:
                # It's a literal header that does affect the header table.
                header, consumed = self._decode_literal_index(
                    data[current_index:]
                )
            elif encoding_update:
                # It's an update to the encoding context.
                consumed = self._update_encoding_context(data)
                header = None
            else:
                # It's a literal header that does not affect the header table.
                header, consumed = self._decode_literal_no_index(
                    data[current_index:]
                )

            if header:
                headers.append(header)

            current_index += consumed

        return [(n.decode('utf-8'), v.decode('utf-8')) for n, v in headers]

    def _add_to_header_table(self, new_header):
        """
        Adds a header to the header table, evicting old ones if necessary.
        """
        # Be optimistic: add the header straight away.
        self.header_table.appendleft(new_header)

        # Now, work out how big the header table is.
        actual_size = header_table_size(self.header_table)

        # Loop and remove whatever we need to.
        while actual_size > self.header_table_size:
            header = self.header_table.pop()
            n, v = header
            actual_size -= (
                32 + len(n) + len(v)
            )

            log.debug("Evicting %s: %s from the header table", n, v)

    def _update_encoding_context(self, data):
        """
        Handles a byte that updates the encoding context.
        """
        # We've been asked to resize the header table.
        new_size, consumed = decode_integer(data, 5)
        self.header_table_size = new_size
        return consumed

    def _decode_indexed(self, data):
        """
        Decodes a header represented using the indexed representation.
        """
        index, consumed = decode_integer(data, 7)
        index -= 1  # Because this idiot table is 1-indexed. Ugh.

        if index >= len(Decoder.static_table):
            index -= len(Decoder.static_table)
            header = self.header_table[index]
        else:
            header = Decoder.static_table[index]

        log.debug("Decoded %s, consumed %d", header, consumed)
        return header, consumed

    def _decode_literal_no_index(self, data):
        return self._decode_literal(data, False)

    def _decode_literal_index(self, data):
        return self._decode_literal(data, True)

    def _decode_literal(self, data, should_index):
        """
        Decodes a header represented with a literal.
        """
        total_consumed = 0

        # When should_index is true, if the low six bits of the first byte are
        # nonzero, the header name is indexed.
        # When should_index is false, if the low four bits of the first byte
        # are nonzero the header name is indexed.
        if should_index:
            indexed_name = to_byte(data[0]) & 0x3F
            name_len = 6
        else:
            indexed_name = to_byte(data[0]) & 0x0F
            name_len = 4

        if indexed_name:
            # Indexed header name.
            index, consumed = decode_integer(data, name_len)
            index -= 1

            if index >= len(Decoder.static_table):
                index -= len(Decoder.static_table)
                name = self.header_table[index][0]
            else:
                name = Decoder.static_table[index][0]

            total_consumed = consumed
            length = 0
        else:
            # Literal header name. The first byte was consumed, so we need to
            # move forward.
            data = data[1:]

            length, consumed = decode_integer(data, 7)
            name = data[consumed:consumed + length]

            if to_byte(data[0]) & 0x80:
                name = self.huffman_coder.decode(name)
            total_consumed = consumed + length + 1  # Since we moved forward 1.

        data = data[consumed + length:]

        # The header value is definitely length-based.
        length, consumed = decode_integer(data, 7)
        value = data[consumed:consumed + length]

        if to_byte(data[0]) & 0x80:
            value = self.huffman_coder.decode(value)

        # Updated the total consumed length.
        total_consumed += length + consumed

        # If we've been asked to index this, add it to the header table.
        header = (name, value)
        if should_index:
            self._add_to_header_table(header)

        log.debug(
            "Decoded %s, total consumed %d bytes, indexed %s",
            header,
            total_consumed,
            should_index
        )

        return header, total_consumed