Files
a0_basic_app
a1_vehicle
a2_async_sim
ab_glyph
ab_glyph_rasterizer
adler
adler32
agents
aho_corasick
anyhow
approx
aquamarine
ash
atty
bitflags
bytemuck
byteorder
cache_padded
cfg_if
chrono
color_quant
crc32fast
crossbeam_channel
crossbeam_deque
crossbeam_epoch
crossbeam_utils
deflate
draw2d
either
flexi_logger
generic_array
gif
glfw
glfw_sys
glob
image
indoc
itertools
jpeg_decoder
lazy_static
libc
libloading
log
matrixmultiply
memchr
memoffset
miniz_oxide
nalgebra
base
geometry
linalg
third_party
num_complex
num_cpus
num_integer
num_iter
num_rational
num_traits
owned_ttf_parser
paste
png
proc_macro2
proc_macro_error
proc_macro_error_attr
quote
raw_window_handle
rawpointer
rayon
rayon_core
regex
regex_syntax
scoped_threadpool
scopeguard
semver
semver_parser
serde
serde_derive
simba
smawk
spin_sleep
syn
terminal_size
textwrap
thiserror
thiserror_impl
tiff
time
triple_buffer
ttf_parser
typenum
unicode_width
unicode_xid
unindent
vk_sys
weezl
yansi
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
use std::u16;

use crate::huffman_table::{
    get_distance_code, get_length_code, END_OF_BLOCK_POSITION, NUM_DISTANCE_CODES,
    NUM_LITERALS_AND_LENGTHS,
};
use crate::lzvalue::LZValue;

/// The type used for representing how many times a literal, length or distance code has been output
/// to the current buffer.
/// As we are limiting the blocks to be at most 2^16 bytes long, we can represent frequencies using
/// 16-bit values.
pub type FrequencyType = u16;

/// The maximum number of literals/lengths in the buffer, which in practice also means the maximum
/// number of literals/lengths output before a new block is started.
/// This should not be larger than the maximum value `FrequencyType` can represent to prevent
/// overflowing (which would degrade, or in the worst case break compression).
pub const MAX_BUFFER_LENGTH: usize = 1024 * 31;

#[derive(Debug, PartialEq)]
pub enum BufferStatus {
    NotFull,
    Full,
}

/// Struct that buffers lz77 data and keeps track of the usage of different codes
pub struct DynamicWriter {
    buffer: Vec<LZValue>,
    // The two last length codes are not actually used, but only participates in code construction
    // Therefore, we ignore them to get the correct number of lengths
    frequencies: [FrequencyType; NUM_LITERALS_AND_LENGTHS],
    distance_frequencies: [FrequencyType; NUM_DISTANCE_CODES],
}

impl DynamicWriter {
    #[inline]
    pub fn check_buffer_length(&self) -> BufferStatus {
        if self.buffer.len() >= MAX_BUFFER_LENGTH {
            BufferStatus::Full
        } else {
            BufferStatus::NotFull
        }
    }

    #[inline]
    pub fn write_literal(&mut self, literal: u8) -> BufferStatus {
        debug_assert!(self.buffer.len() < MAX_BUFFER_LENGTH);
        self.buffer.push(LZValue::literal(literal));
        self.frequencies[usize::from(literal)] += 1;
        self.check_buffer_length()
    }

    #[inline]
    pub fn write_length_distance(&mut self, length: u16, distance: u16) -> BufferStatus {
        self.buffer.push(LZValue::length_distance(length, distance));
        let l_code_num = get_length_code(length);
        // As we limit the buffer to 2^16 values, this should be safe from overflowing.
        self.frequencies[l_code_num] += 1;

        let d_code_num = get_distance_code(distance);
        // The compiler seems to be able to evade the bounds check here somehow.
        self.distance_frequencies[usize::from(d_code_num)] += 1;
        self.check_buffer_length()
    }

    pub fn buffer_length(&self) -> usize {
        self.buffer.len()
    }

    pub fn get_buffer(&self) -> &[LZValue] {
        &self.buffer
    }

    pub fn new() -> DynamicWriter {
        let mut w = DynamicWriter {
            buffer: Vec::with_capacity(MAX_BUFFER_LENGTH),
            frequencies: [0; NUM_LITERALS_AND_LENGTHS],
            distance_frequencies: [0; NUM_DISTANCE_CODES],
        };
        // This will always be 1,
        // since there will always only be one end of block marker in each block
        w.frequencies[END_OF_BLOCK_POSITION] = 1;
        w
    }

    /// Special output function used with RLE compression
    /// that avoids bothering to lookup a distance code.
    #[inline]
    pub fn write_length_rle(&mut self, length: u16) -> BufferStatus {
        self.buffer.push(LZValue::length_distance(length, 1));
        let l_code_num = get_length_code(length);
        // As we limit the buffer to 2^16 values, this should be safe from overflowing.
        self.frequencies[l_code_num] += 1;

        self.distance_frequencies[0] += 1;
        self.check_buffer_length()
    }

    pub fn get_frequencies(&self) -> (&[u16], &[u16]) {
        (&self.frequencies, &self.distance_frequencies)
    }

    pub fn clear_frequencies(&mut self) {
        self.frequencies = [0; NUM_LITERALS_AND_LENGTHS];
        self.distance_frequencies = [0; NUM_DISTANCE_CODES];
        self.frequencies[END_OF_BLOCK_POSITION] = 1;
    }

    pub fn clear_data(&mut self) {
        self.buffer.clear()
    }

    pub fn clear(&mut self) {
        self.clear_frequencies();
        self.clear_data();
    }
}

#[cfg(test)]
mod test {
    use super::*;
    use crate::huffman_table::{get_distance_code, get_length_code};
    #[test]
    /// Ensure that these function won't produce values that would overflow the output_writer
    /// tables since we use some unsafe indexing.
    fn array_bounds() {
        let w = DynamicWriter::new();

        for i in 0..u16::max_value() {
            assert!(get_length_code(i) < w.frequencies.len());
        }

        for i in 0..u16::max_value() {
            assert!(get_distance_code(i) < w.distance_frequencies.len() as u8);
        }
    }
}