105 lines
4.5 KiB
Python
105 lines
4.5 KiB
Python
|
from typing import List, Optional
|
||
|
|
||
|
|
||
|
class Block:
|
||
|
def __init__(self, bits=None, index=None, offset=None, chain=None):
|
||
|
self.chain = chain
|
||
|
self.bits = bits
|
||
|
self.index = index
|
||
|
self.offset = offset
|
||
|
|
||
|
|
||
|
INITIAL_OFFSET = 1
|
||
|
MAX_SCALE = 50
|
||
|
|
||
|
|
||
|
def offset_ceiling(index, offset_limit):
|
||
|
return offset_limit if index > offset_limit else (INITIAL_OFFSET if index < INITIAL_OFFSET else index)
|
||
|
|
||
|
|
||
|
def elias_gamma_needed_bits(value):
|
||
|
bits = 1
|
||
|
while value > 1:
|
||
|
value >>= 1
|
||
|
bits += 2
|
||
|
return bits
|
||
|
|
||
|
|
||
|
def optimize(input_data, skip, offset_limit):
|
||
|
# The algorithm has a floating window of size window_size describing the previous chain of matches
|
||
|
input_size = len(input_data)
|
||
|
window_size = offset_ceiling(input_size - 1, offset_limit) + 1
|
||
|
|
||
|
last_literal: List[Optional[Block]] = [None] * window_size
|
||
|
last_match: List[Optional[Block]] = [None] * window_size
|
||
|
match_length = [0] * window_size
|
||
|
|
||
|
# The algorithm is looking for the best match for each index of the input data
|
||
|
optimal = [None] * input_size
|
||
|
best_length = [0] * input_size
|
||
|
|
||
|
if input_size > 2:
|
||
|
best_length[2] = 2
|
||
|
|
||
|
# Kickstart the algorithm by assigning a fake block
|
||
|
last_match[INITIAL_OFFSET] = Block(-1, skip - 1, INITIAL_OFFSET, None)
|
||
|
|
||
|
# The algorithm is checking for the best match for each index of the input data (skipping the skip part)
|
||
|
for index in range(skip, input_size):
|
||
|
best_length_size = 2 # It's useless to check for a match of length 1
|
||
|
max_offset = offset_ceiling(index, offset_limit)
|
||
|
|
||
|
for offset in range(1, max_offset + 1):
|
||
|
# Checking for a match in the previous part of the input data, backwards
|
||
|
if index != skip and index >= offset and input_data[index] == input_data[index - offset]:
|
||
|
current_literal = last_literal[offset]
|
||
|
if current_literal is not None:
|
||
|
length = index - current_literal.index
|
||
|
bits = current_literal.bits + 1 + elias_gamma_needed_bits(length)
|
||
|
|
||
|
# Chain the current match to the previous literal
|
||
|
last_match[offset] = Block(bits, index, offset, current_literal)
|
||
|
|
||
|
# Update the best match
|
||
|
if not optimal[index] or optimal[index].bits > bits:
|
||
|
optimal[index] = last_match[offset]
|
||
|
|
||
|
match_length[offset] += 1
|
||
|
|
||
|
if match_length[offset] > 1:
|
||
|
if best_length_size < match_length[offset]:
|
||
|
bits = (optimal[index - best_length[best_length_size]].bits +
|
||
|
elias_gamma_needed_bits(best_length[best_length_size] - 1))
|
||
|
|
||
|
while True:
|
||
|
best_length_size += 1
|
||
|
bits2 = (optimal[index - best_length_size].bits +
|
||
|
elias_gamma_needed_bits(best_length_size - 1))
|
||
|
if bits2 <= bits:
|
||
|
best_length[best_length_size] = best_length_size
|
||
|
bits = bits2
|
||
|
else:
|
||
|
best_length[best_length_size] = best_length[best_length_size - 1]
|
||
|
|
||
|
if best_length_size >= match_length[offset]:
|
||
|
break
|
||
|
|
||
|
length = best_length[match_length[offset]]
|
||
|
bits = (optimal[index - length].bits + 8 +
|
||
|
elias_gamma_needed_bits((offset - 1) // 128 + 1) +
|
||
|
elias_gamma_needed_bits(length - 1))
|
||
|
if not last_match[offset] or last_match[offset].index != index or last_match[offset].bits > bits:
|
||
|
last_match[offset] = Block(bits, index, offset, optimal[index - length])
|
||
|
if not optimal[index] or optimal[index].bits > bits:
|
||
|
optimal[index] = last_match[offset]
|
||
|
else:
|
||
|
match_length[offset] = 0 # Resetting the match length
|
||
|
if last_match[offset]:
|
||
|
length = index - last_match[offset].index
|
||
|
bits = last_match[offset].bits + 1 + elias_gamma_needed_bits(length) + length * 8
|
||
|
last_literal[offset] = Block(bits, index, 0, last_match[offset])
|
||
|
if not optimal[index] or optimal[index].bits > bits:
|
||
|
optimal[index] = last_literal[offset]
|
||
|
|
||
|
return optimal[input_size - 1]
|