Skip to content

Commit b1a798a

Browse files
committed
Reduce unnecessary allocations and indirections
* Changed literal_probs array from a Vec<Vec<u16>> to a Vec2D backed by a contiguous allocation * BitTrees in LenDecoder and DecoderState are now stored inline. The actual BitTree data still lives in a Vec but one level of indirection is reduced. * Don't bother with filling stack-allocated DecoderState arrays on reset, and just recreate the arrays dropping the existing ones.
1 parent 6e1f0d7 commit b1a798a

File tree

5 files changed

+227
-42
lines changed

5 files changed

+227
-42
lines changed

src/decode/lzma.rs

Lines changed: 40 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
use crate::decode::lzbuffer::{LzBuffer, LzCircularBuffer};
2-
use crate::decode::rangecoder;
3-
use crate::decode::rangecoder::RangeDecoder;
4-
use crate::decompress::Options;
5-
use crate::decompress::UnpackedSize;
2+
use crate::decode::rangecoder::{BitTree, LenDecoder, RangeDecoder};
3+
use crate::decompress::{Options, UnpackedSize};
64
use crate::error;
5+
use crate::util::vec2d::Vec2D;
76
use byteorder::{LittleEndian, ReadBytesExt};
87
use std::io;
98

@@ -167,9 +166,9 @@ pub(crate) struct DecoderState {
167166
partial_input_buf: std::io::Cursor<[u8; MAX_REQUIRED_INPUT]>,
168167
pub(crate) lzma_props: LzmaProperties,
169168
unpacked_size: Option<u64>,
170-
literal_probs: Vec<Vec<u16>>,
171-
pos_slot_decoder: Vec<rangecoder::BitTree>,
172-
align_decoder: rangecoder::BitTree,
169+
literal_probs: Vec2D<u16>,
170+
pos_slot_decoder: [BitTree; 4],
171+
align_decoder: BitTree,
173172
pos_decoders: [u16; 115],
174173
is_match: [u16; 192], // true = LZ, false = literal
175174
is_rep: [u16; 12],
@@ -179,8 +178,8 @@ pub(crate) struct DecoderState {
179178
is_rep_0long: [u16; 192],
180179
state: usize,
181180
rep: [usize; 4],
182-
len_decoder: rangecoder::LenDecoder,
183-
rep_len_decoder: rangecoder::LenDecoder,
181+
len_decoder: LenDecoder,
182+
rep_len_decoder: LenDecoder,
184183
}
185184

186185
impl DecoderState {
@@ -190,9 +189,14 @@ impl DecoderState {
190189
partial_input_buf: std::io::Cursor::new([0; MAX_REQUIRED_INPUT]),
191190
lzma_props,
192191
unpacked_size,
193-
literal_probs: vec![vec![0x400; 0x300]; 1 << (lzma_props.lc + lzma_props.lp)],
194-
pos_slot_decoder: vec![rangecoder::BitTree::new(6); 4],
195-
align_decoder: rangecoder::BitTree::new(4),
192+
literal_probs: Vec2D::init(0x400, (1 << (lzma_props.lc + lzma_props.lp), 0x300)),
193+
pos_slot_decoder: [
194+
BitTree::new(6),
195+
BitTree::new(6),
196+
BitTree::new(6),
197+
BitTree::new(6),
198+
],
199+
align_decoder: BitTree::new(4),
196200
pos_decoders: [0x400; 115],
197201
is_match: [0x400; 192],
198202
is_rep: [0x400; 12],
@@ -202,33 +206,34 @@ impl DecoderState {
202206
is_rep_0long: [0x400; 192],
203207
state: 0,
204208
rep: [0; 4],
205-
len_decoder: rangecoder::LenDecoder::new(),
206-
rep_len_decoder: rangecoder::LenDecoder::new(),
209+
len_decoder: LenDecoder::new(),
210+
rep_len_decoder: LenDecoder::new(),
207211
}
208212
}
209213

210214
pub fn reset_state(&mut self, new_props: LzmaProperties) {
211215
new_props.validate();
212216
if self.lzma_props.lc + self.lzma_props.lp == new_props.lc + new_props.lp {
213217
// We can reset here by filling the existing buffer with 0x400.
214-
self.literal_probs.iter_mut().for_each(|v| v.fill(0x400))
218+
self.literal_probs.fill(0x400);
215219
} else {
216220
// We need to reallocate because of the new size of `lc+lp`.
217-
self.literal_probs = vec![vec![0x400; 0x300]; 1 << (new_props.lc + new_props.lp)];
221+
self.literal_probs = Vec2D::init(0x400, (1 << (new_props.lc + new_props.lp), 0x300));
218222
}
219223

220224
self.lzma_props = new_props;
221225
self.pos_slot_decoder.iter_mut().for_each(|t| t.reset());
222226
self.align_decoder.reset();
223-
self.pos_decoders.fill(0x400);
224-
self.is_match.fill(0x400);
225-
self.is_rep.fill(0x400);
226-
self.is_rep_g0.fill(0x400);
227-
self.is_rep_g1.fill(0x400);
228-
self.is_rep_g2.fill(0x400);
229-
self.is_rep_0long.fill(0x400);
227+
self.pos_decoders = [0x400; 115];
228+
self.is_match = [0x400; 192];
229+
self.is_rep = [0x400; 12];
230+
self.is_rep_g0 = [0x400; 12];
231+
self.is_rep_g1 = [0x400; 12];
232+
self.is_rep_g2 = [0x400; 12];
233+
self.is_rep_0long = [0x400; 192];
234+
self.state = 0;
235+
self.rep = [0; 4];
230236
self.state = 0;
231-
self.rep.fill(0);
232237
self.len_decoder.reset();
233238
self.rep_len_decoder.reset();
234239
}
@@ -240,7 +245,7 @@ impl DecoderState {
240245
pub fn process<'a, W: io::Write, LZB: LzBuffer<W>, R: io::BufRead>(
241246
&mut self,
242247
output: &mut LZB,
243-
rangecoder: &mut rangecoder::RangeDecoder<'a, R>,
248+
rangecoder: &mut RangeDecoder<'a, R>,
244249
) -> error::Result<()> {
245250
self.process_mode(output, rangecoder, ProcessingMode::Finish)
246251
}
@@ -249,7 +254,7 @@ impl DecoderState {
249254
pub fn process_stream<'a, W: io::Write, LZB: LzBuffer<W>, R: io::BufRead>(
250255
&mut self,
251256
output: &mut LZB,
252-
rangecoder: &mut rangecoder::RangeDecoder<'a, R>,
257+
rangecoder: &mut RangeDecoder<'a, R>,
253258
) -> error::Result<()> {
254259
self.process_mode(output, rangecoder, ProcessingMode::Partial)
255260
}
@@ -263,7 +268,7 @@ impl DecoderState {
263268
fn process_next_inner<'a, W: io::Write, LZB: LzBuffer<W>, R: io::BufRead>(
264269
&mut self,
265270
output: &mut LZB,
266-
rangecoder: &mut rangecoder::RangeDecoder<'a, R>,
271+
rangecoder: &mut RangeDecoder<'a, R>,
267272
update: bool,
268273
) -> error::Result<ProcessingStatus> {
269274
let pos_state = output.len() & ((1 << self.lzma_props.pb) - 1);
@@ -380,7 +385,7 @@ impl DecoderState {
380385
fn process_next<'a, W: io::Write, LZB: LzBuffer<W>, R: io::BufRead>(
381386
&mut self,
382387
output: &mut LZB,
383-
rangecoder: &mut rangecoder::RangeDecoder<'a, R>,
388+
rangecoder: &mut RangeDecoder<'a, R>,
384389
) -> error::Result<ProcessingStatus> {
385390
self.process_next_inner(output, rangecoder, true)
386391
}
@@ -398,15 +403,15 @@ impl DecoderState {
398403
code: u32,
399404
) -> error::Result<()> {
400405
let mut temp = std::io::Cursor::new(buf);
401-
let mut rangecoder = rangecoder::RangeDecoder::from_parts(&mut temp, range, code);
406+
let mut rangecoder = RangeDecoder::from_parts(&mut temp, range, code);
402407
let _ = self.process_next_inner(output, &mut rangecoder, false)?;
403408
Ok(())
404409
}
405410

406411
/// Utility function to read data into the partial input buffer.
407412
fn read_partial_input_buf<'a, R: io::BufRead>(
408413
&mut self,
409-
rangecoder: &mut rangecoder::RangeDecoder<'a, R>,
414+
rangecoder: &mut RangeDecoder<'a, R>,
410415
) -> error::Result<()> {
411416
// Fill as much of the tmp buffer as possible
412417
let start = self.partial_input_buf.position() as usize;
@@ -420,7 +425,7 @@ impl DecoderState {
420425
fn process_mode<'a, W: io::Write, LZB: LzBuffer<W>, R: io::BufRead>(
421426
&mut self,
422427
output: &mut LZB,
423-
rangecoder: &mut rangecoder::RangeDecoder<'a, R>,
428+
rangecoder: &mut RangeDecoder<'a, R>,
424429
mode: ProcessingMode,
425430
) -> error::Result<()> {
426431
loop {
@@ -461,11 +466,8 @@ impl DecoderState {
461466
// Run the decompressor on the tmp buffer
462467
let mut tmp_reader =
463468
io::Cursor::new(&tmp[..self.partial_input_buf.position() as usize]);
464-
let mut tmp_rangecoder = rangecoder::RangeDecoder::from_parts(
465-
&mut tmp_reader,
466-
rangecoder.range,
467-
rangecoder.code,
468-
);
469+
let mut tmp_rangecoder =
470+
RangeDecoder::from_parts(&mut tmp_reader, rangecoder.range, rangecoder.code);
469471
let res = self.process_next(output, &mut tmp_rangecoder)?;
470472

471473
// Update the actual rangecoder
@@ -514,7 +516,7 @@ impl DecoderState {
514516
fn decode_literal<'a, W: io::Write, LZB: LzBuffer<W>, R: io::BufRead>(
515517
&mut self,
516518
output: &mut LZB,
517-
rangecoder: &mut rangecoder::RangeDecoder<'a, R>,
519+
rangecoder: &mut RangeDecoder<'a, R>,
518520
update: bool,
519521
) -> error::Result<u8> {
520522
let def_prev_byte = 0u8;
@@ -550,7 +552,7 @@ impl DecoderState {
550552

551553
fn decode_distance<'a, R: io::BufRead>(
552554
&mut self,
553-
rangecoder: &mut rangecoder::RangeDecoder<'a, R>,
555+
rangecoder: &mut RangeDecoder<'a, R>,
554556
length: usize,
555557
update: bool,
556558
) -> error::Result<usize> {

src/decode/rangecoder.rs

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -190,8 +190,8 @@ impl BitTree {
190190
pub struct LenDecoder {
191191
choice: u16,
192192
choice2: u16,
193-
low_coder: Vec<BitTree>,
194-
mid_coder: Vec<BitTree>,
193+
low_coder: [BitTree; 16],
194+
mid_coder: [BitTree; 16],
195195
high_coder: BitTree,
196196
}
197197

@@ -200,8 +200,42 @@ impl LenDecoder {
200200
LenDecoder {
201201
choice: 0x400,
202202
choice2: 0x400,
203-
low_coder: vec![BitTree::new(3); 16],
204-
mid_coder: vec![BitTree::new(3); 16],
203+
low_coder: [
204+
BitTree::new(3),
205+
BitTree::new(3),
206+
BitTree::new(3),
207+
BitTree::new(3),
208+
BitTree::new(3),
209+
BitTree::new(3),
210+
BitTree::new(3),
211+
BitTree::new(3),
212+
BitTree::new(3),
213+
BitTree::new(3),
214+
BitTree::new(3),
215+
BitTree::new(3),
216+
BitTree::new(3),
217+
BitTree::new(3),
218+
BitTree::new(3),
219+
BitTree::new(3),
220+
],
221+
mid_coder: [
222+
BitTree::new(3),
223+
BitTree::new(3),
224+
BitTree::new(3),
225+
BitTree::new(3),
226+
BitTree::new(3),
227+
BitTree::new(3),
228+
BitTree::new(3),
229+
BitTree::new(3),
230+
BitTree::new(3),
231+
BitTree::new(3),
232+
BitTree::new(3),
233+
BitTree::new(3),
234+
BitTree::new(3),
235+
BitTree::new(3),
236+
BitTree::new(3),
237+
BitTree::new(3),
238+
],
205239
high_coder: BitTree::new(8),
206240
}
207241
}

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ mod encode;
1212

1313
pub mod error;
1414

15+
mod util;
1516
mod xz;
1617

1718
use std::io;

src/util/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
pub mod vec2d;

0 commit comments

Comments
 (0)