From c6735f92c1b7e24a434d3882d5e2dd716cbbb0d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?John=20K=C3=A5re=20Alsaker?= Date: Wed, 26 Mar 2025 07:12:00 +0100 Subject: [PATCH 1/5] Add index to the dep graph format and encode via `MemEncoder` --- .../src/dep_graph/serialized.rs | 122 ++++++++++++------ compiler/rustc_serialize/src/opaque.rs | 2 + .../rustc_serialize/src/opaque/mem_encoder.rs | 121 +++++++++++++++++ 3 files changed, 206 insertions(+), 39 deletions(-) create mode 100644 compiler/rustc_serialize/src/opaque/mem_encoder.rs diff --git a/compiler/rustc_query_system/src/dep_graph/serialized.rs b/compiler/rustc_query_system/src/dep_graph/serialized.rs index ac24628447d82..cfea43d62e51c 100644 --- a/compiler/rustc_query_system/src/dep_graph/serialized.rs +++ b/compiler/rustc_query_system/src/dep_graph/serialized.rs @@ -46,6 +46,7 @@ use rustc_data_structures::profiling::SelfProfilerRef; use rustc_data_structures::sync::Lock; use rustc_data_structures::unhash::UnhashMap; use rustc_index::{Idx, IndexVec}; +use rustc_serialize::opaque::mem_encoder::MemEncoder; use rustc_serialize::opaque::{FileEncodeResult, FileEncoder, IntEncodedWithFixedSize, MemDecoder}; use rustc_serialize::{Decodable, Decoder, Encodable, Encoder}; use tracing::{debug, instrument}; @@ -105,18 +106,11 @@ impl SerializedDepGraph { ) -> impl Iterator + Clone { let header = self.edge_list_indices[source]; let mut raw = &self.edge_list_data[header.start()..]; - // Figure out where the edge list for `source` ends by getting the start index of the next - // edge list, or the end of the array if this is the last edge. - let end = self - .edge_list_indices - .get(source + 1) - .map(|h| h.start()) - .unwrap_or_else(|| self.edge_list_data.len() - DEP_NODE_PAD); // The number of edges for this node is implicitly stored in the combination of the byte // width and the length. let bytes_per_index = header.bytes_per_index(); - let len = (end - header.start()) / bytes_per_index; + let len = header.edges; // LLVM doesn't hoist EdgeHeader::mask so we do it ourselves. let mask = header.mask(); @@ -163,6 +157,7 @@ impl SerializedDepGraph { #[derive(Debug, Clone, Copy)] struct EdgeHeader { repr: usize, + edges: u32, } impl EdgeHeader { @@ -205,9 +200,17 @@ impl SerializedDepGraph { let graph_bytes = d.len() - (2 * IntEncodedWithFixedSize::ENCODED_SIZE) - d.position(); - let mut nodes = IndexVec::with_capacity(node_count); - let mut fingerprints = IndexVec::with_capacity(node_count); - let mut edge_list_indices = IndexVec::with_capacity(node_count); + let mut nodes: IndexVec = (0..node_count) + .map(|_| DepNode { + kind: D::DEP_KIND_NULL, + hash: PackedFingerprint::from(Fingerprint::ZERO), + }) + .collect(); + let mut fingerprints: IndexVec = + (0..node_count).map(|_| Fingerprint::ZERO).collect(); + let mut edge_list_indices: IndexVec = + (0..node_count).map(|_| EdgeHeader { repr: 0, edges: 0 }).collect(); + // This estimation assumes that all of the encoded bytes are for the edge lists or for the // fixed-size node headers. But that's not necessarily true; if any edge list has a length // that spills out of the size we can bit-pack into SerializedNodeHeader then some of the @@ -226,11 +229,10 @@ impl SerializedDepGraph { let node_header = SerializedNodeHeader:: { bytes: d.read_array(), _marker: PhantomData }; - let _i: SerializedDepNodeIndex = nodes.push(node_header.node()); - debug_assert_eq!(_i.index(), _index); + let index = node_header.index(); - let _i: SerializedDepNodeIndex = fingerprints.push(node_header.fingerprint()); - debug_assert_eq!(_i.index(), _index); + nodes[index] = node_header.node(); + fingerprints[index] = node_header.fingerprint(); // If the length of this node's edge list is small, the length is stored in the header. // If it is not, we fall back to another decoder call. @@ -242,12 +244,11 @@ impl SerializedDepGraph { let edges_len_bytes = node_header.bytes_per_index() * (num_edges as usize); // The in-memory structure for the edges list stores the byte width of the edges on // this node with the offset into the global edge data array. - let edges_header = node_header.edges_header(&edge_list_data); + let edges_header = node_header.edges_header(&edge_list_data, num_edges); edge_list_data.extend(d.read_raw_bytes(edges_len_bytes)); - let _i: SerializedDepNodeIndex = edge_list_indices.push(edges_header); - debug_assert_eq!(_i.index(), _index); + edge_list_indices[index] = edges_header; } // When we access the edge list data, we do a fixed-size read from the edge list data then @@ -298,9 +299,10 @@ impl SerializedDepGraph { /// * In whatever bits remain, the length of the edge list for this node, if it fits struct SerializedNodeHeader { // 2 bytes for the DepNode + // 4 bytes for the index // 16 for Fingerprint in DepNode // 16 for Fingerprint in NodeInfo - bytes: [u8; 34], + bytes: [u8; 38], _marker: PhantomData, } @@ -310,6 +312,7 @@ struct Unpacked { len: Option, bytes_per_index: usize, kind: DepKind, + index: SerializedDepNodeIndex, hash: PackedFingerprint, fingerprint: Fingerprint, } @@ -331,6 +334,7 @@ impl SerializedNodeHeader { #[inline] fn new( node: DepNode, + index: DepNodeIndex, fingerprint: Fingerprint, edge_max_index: u32, edge_count: usize, @@ -352,10 +356,11 @@ impl SerializedNodeHeader { let hash: Fingerprint = node.hash.into(); // Using half-open ranges ensures an unconditional panic if we get the magic numbers wrong. - let mut bytes = [0u8; 34]; + let mut bytes = [0u8; 38]; bytes[..2].copy_from_slice(&head.to_le_bytes()); - bytes[2..18].copy_from_slice(&hash.to_le_bytes()); - bytes[18..].copy_from_slice(&fingerprint.to_le_bytes()); + bytes[2..6].copy_from_slice(&index.as_u32().to_le_bytes()); + bytes[6..22].copy_from_slice(&hash.to_le_bytes()); + bytes[22..].copy_from_slice(&fingerprint.to_le_bytes()); #[cfg(debug_assertions)] { @@ -372,8 +377,9 @@ impl SerializedNodeHeader { #[inline] fn unpack(&self) -> Unpacked { let head = u16::from_le_bytes(self.bytes[..2].try_into().unwrap()); - let hash = self.bytes[2..18].try_into().unwrap(); - let fingerprint = self.bytes[18..].try_into().unwrap(); + let index = u32::from_le_bytes(self.bytes[2..6].try_into().unwrap()); + let hash = self.bytes[6..22].try_into().unwrap(); + let fingerprint = self.bytes[22..].try_into().unwrap(); let kind = head & mask(Self::KIND_BITS) as u16; let bytes_per_index = (head >> Self::KIND_BITS) & mask(Self::WIDTH_BITS) as u16; @@ -383,6 +389,7 @@ impl SerializedNodeHeader { len: len.checked_sub(1), bytes_per_index: bytes_per_index as usize + 1, kind: DepKind::new(kind), + index: SerializedDepNodeIndex::from_u32(index), hash: Fingerprint::from_le_bytes(hash).into(), fingerprint: Fingerprint::from_le_bytes(fingerprint), } @@ -398,6 +405,11 @@ impl SerializedNodeHeader { self.unpack().bytes_per_index } + #[inline] + fn index(&self) -> SerializedDepNodeIndex { + self.unpack().index + } + #[inline] fn fingerprint(&self) -> Fingerprint { self.unpack().fingerprint @@ -410,9 +422,10 @@ impl SerializedNodeHeader { } #[inline] - fn edges_header(&self, edge_list_data: &[u8]) -> EdgeHeader { + fn edges_header(&self, edge_list_data: &[u8], edges: u32) -> EdgeHeader { EdgeHeader { repr: (edge_list_data.len() << DEP_NODE_WIDTH_BITS) | (self.bytes_per_index() - 1), + edges, } } } @@ -425,10 +438,15 @@ struct NodeInfo { } impl NodeInfo { - fn encode(&self, e: &mut FileEncoder) { + fn encode(&self, e: &mut MemEncoder, index: DepNodeIndex) { let NodeInfo { node, fingerprint, ref edges } = *self; - let header = - SerializedNodeHeader::::new(node, fingerprint, edges.max_index(), edges.len()); + let header = SerializedNodeHeader::::new( + node, + index, + fingerprint, + edges.max_index(), + edges.len(), + ); e.write_array(header.bytes); if header.len().is_none() { @@ -450,8 +468,9 @@ impl NodeInfo { /// This avoids the overhead of constructing `EdgesVec`, which would be needed to call `encode`. #[inline] fn encode_promoted( - e: &mut FileEncoder, + e: &mut MemEncoder, node: DepNode, + index: DepNodeIndex, fingerprint: Fingerprint, prev_index: SerializedDepNodeIndex, colors: &DepNodeColorMap, @@ -464,7 +483,7 @@ impl NodeInfo { let edge_max = edges.clone().map(|i| colors.current(i).unwrap().as_u32()).max().unwrap_or(0); - let header = SerializedNodeHeader::::new(node, fingerprint, edge_max, edge_count); + let header = SerializedNodeHeader::::new(node, index, fingerprint, edge_max, edge_count); e.write_array(header.bytes); if header.len().is_none() { @@ -498,6 +517,8 @@ struct EncoderState { total_edge_count: usize, stats: Option>, + mem_encoder: MemEncoder, + /// Stores the number of times we've encoded each dep kind. kind_stats: Vec, marker: PhantomData, @@ -511,22 +532,28 @@ impl EncoderState { total_edge_count: 0, total_node_count: 0, stats: record_stats.then(FxHashMap::default), + mem_encoder: MemEncoder::new(), kind_stats: iter::repeat(0).take(D::DEP_KIND_MAX as usize + 1).collect(), marker: PhantomData, } } + #[inline] + fn alloc_index(&mut self) -> DepNodeIndex { + let index = DepNodeIndex::new(self.total_node_count); + self.total_node_count += 1; + index + } + #[inline] fn record( &mut self, node: DepNode, + index: DepNodeIndex, edge_count: usize, edges: impl FnOnce(&mut Self) -> Vec, record_graph: &Option>, ) -> DepNodeIndex { - let index = DepNodeIndex::new(self.total_node_count); - - self.total_node_count += 1; self.kind_stats[node.kind.as_usize()] += 1; self.total_edge_count += edge_count; @@ -558,14 +585,25 @@ impl EncoderState { index } + #[inline] + fn flush_mem_encoder(&mut self) { + let data = &mut self.mem_encoder.data; + if data.len() > 64 * 1024 { + self.encoder.emit_raw_bytes(&data[..]); + data.clear(); + } + } + /// Encodes a node to the current graph. fn encode_node( &mut self, node: &NodeInfo, record_graph: &Option>, ) -> DepNodeIndex { - node.encode::(&mut self.encoder); - self.record(node.node, node.edges.len(), |_| node.edges[..].to_vec(), record_graph) + let index = self.alloc_index(); + node.encode::(&mut self.mem_encoder, index); + self.flush_mem_encoder(); + self.record(node.node, index, node.edges.len(), |_| node.edges[..].to_vec(), record_graph) } /// Encodes a node that was promoted from the previous graph. It reads the information directly from @@ -581,20 +619,22 @@ impl EncoderState { record_graph: &Option>, colors: &DepNodeColorMap, ) -> DepNodeIndex { + let index = self.alloc_index(); let node = self.previous.index_to_node(prev_index); - let fingerprint = self.previous.fingerprint_by_index(prev_index); let edge_count = NodeInfo::encode_promoted::( - &mut self.encoder, + &mut self.mem_encoder, node, + index, fingerprint, prev_index, colors, &self.previous, ); - + self.flush_mem_encoder(); self.record( node, + index, edge_count, |this| { this.previous @@ -603,12 +643,14 @@ impl EncoderState { .collect() }, record_graph, - ) + ); + index } fn finish(self, profiler: &SelfProfilerRef) -> FileEncodeResult { let Self { mut encoder, + mem_encoder, total_node_count, total_edge_count, stats: _, @@ -617,6 +659,8 @@ impl EncoderState { previous, } = self; + encoder.emit_raw_bytes(&mem_encoder.data); + let node_count = total_node_count.try_into().unwrap(); let edge_count = total_edge_count.try_into().unwrap(); diff --git a/compiler/rustc_serialize/src/opaque.rs b/compiler/rustc_serialize/src/opaque.rs index 0a55504a556d2..00bad8e70cf2f 100644 --- a/compiler/rustc_serialize/src/opaque.rs +++ b/compiler/rustc_serialize/src/opaque.rs @@ -10,6 +10,8 @@ use crate::int_overflow::DebugStrictAdd; use crate::leb128; use crate::serialize::{Decodable, Decoder, Encodable, Encoder}; +pub mod mem_encoder; + // ----------------------------------------------------------------------------- // Encoder // ----------------------------------------------------------------------------- diff --git a/compiler/rustc_serialize/src/opaque/mem_encoder.rs b/compiler/rustc_serialize/src/opaque/mem_encoder.rs new file mode 100644 index 0000000000000..5104df0421572 --- /dev/null +++ b/compiler/rustc_serialize/src/opaque/mem_encoder.rs @@ -0,0 +1,121 @@ +use super::IntEncodedWithFixedSize; +use crate::{Encodable, Encoder, leb128}; + +pub struct MemEncoder { + pub data: Vec, +} + +impl MemEncoder { + pub fn new() -> MemEncoder { + MemEncoder { data: vec![] } + } + + #[inline] + pub fn position(&self) -> usize { + self.data.len() + } + + pub fn finish(self) -> Vec { + self.data + } + + /// Write up to `N` bytes to this encoder. + /// + /// This function can be used to avoid the overhead of calling memcpy for writes that + /// have runtime-variable length, but are small and have a small fixed upper bound. + /// + /// This can be used to do in-place encoding as is done for leb128 (without this function + /// we would need to write to a temporary buffer then memcpy into the encoder), and it can + /// also be used to implement the varint scheme we use for rmeta and dep graph encoding, + /// where we only want to encode the first few bytes of an integer. Note that common + /// architectures support fixed-size writes up to 8 bytes with one instruction, so while this + /// does in some sense do wasted work, we come out ahead. + #[inline] + pub fn write_with(&mut self, visitor: impl FnOnce(&mut [u8; N]) -> usize) { + self.data.reserve(N); + + let old_len = self.data.len(); + // SAFETY: fix + let buf = unsafe { &mut *(self.data.as_mut_ptr().add(old_len) as *mut [u8; N]) }; + let written = visitor(buf); + if written > N { + Self::panic_invalid_write::(written); + } + unsafe { self.data.set_len(old_len + written) }; + } + + #[cold] + #[inline(never)] + fn panic_invalid_write(written: usize) { + panic!("MemEncoder::write_with::<{N}> cannot be used to write {written} bytes"); + } + + /// Helper for calls where [`MemEncoder::write_with`] always writes the whole array. + #[inline] + pub fn write_array(&mut self, buf: [u8; N]) { + self.write_with(|dest| { + *dest = buf; + N + }) + } +} + +macro_rules! write_leb128 { + ($this_fn:ident, $int_ty:ty, $write_leb_fn:ident) => { + #[inline] + fn $this_fn(&mut self, v: $int_ty) { + self.write_with(|buf| leb128::$write_leb_fn(buf, v)) + } + }; +} + +impl Encoder for MemEncoder { + write_leb128!(emit_usize, usize, write_usize_leb128); + write_leb128!(emit_u128, u128, write_u128_leb128); + write_leb128!(emit_u64, u64, write_u64_leb128); + write_leb128!(emit_u32, u32, write_u32_leb128); + + #[inline] + fn emit_u16(&mut self, v: u16) { + self.write_array(v.to_le_bytes()); + } + + #[inline] + fn emit_u8(&mut self, v: u8) { + self.write_array([v]); + } + + write_leb128!(emit_isize, isize, write_isize_leb128); + write_leb128!(emit_i128, i128, write_i128_leb128); + write_leb128!(emit_i64, i64, write_i64_leb128); + write_leb128!(emit_i32, i32, write_i32_leb128); + + #[inline] + fn emit_i16(&mut self, v: i16) { + self.write_array(v.to_le_bytes()); + } + + #[inline] + fn emit_raw_bytes(&mut self, s: &[u8]) { + self.data.extend_from_slice(s); + } +} + +// Specialize encoding byte slices. This specialization also applies to encoding `Vec`s, etc., +// since the default implementations call `encode` on their slices internally. +impl Encodable for [u8] { + fn encode(&self, e: &mut MemEncoder) { + Encoder::emit_usize(e, self.len()); + e.emit_raw_bytes(self); + } +} + +impl Encodable for IntEncodedWithFixedSize { + #[inline] + fn encode(&self, e: &mut MemEncoder) { + let start_pos = e.position(); + e.write_array(self.0.to_le_bytes()); + let end_pos = e.position(); + debug_assert_eq!((end_pos - start_pos), IntEncodedWithFixedSize::ENCODED_SIZE); + } +} From 4ec74df81f9a56e2eef5defea6edaa9eecd4fd10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?John=20K=C3=A5re=20Alsaker?= Date: Tue, 22 Apr 2025 03:43:10 +0200 Subject: [PATCH 2/5] Use `IndexVec::from_elem_n` --- .../src/dep_graph/serialized.rs | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/compiler/rustc_query_system/src/dep_graph/serialized.rs b/compiler/rustc_query_system/src/dep_graph/serialized.rs index cfea43d62e51c..9dabb26e8b281 100644 --- a/compiler/rustc_query_system/src/dep_graph/serialized.rs +++ b/compiler/rustc_query_system/src/dep_graph/serialized.rs @@ -200,16 +200,13 @@ impl SerializedDepGraph { let graph_bytes = d.len() - (2 * IntEncodedWithFixedSize::ENCODED_SIZE) - d.position(); - let mut nodes: IndexVec = (0..node_count) - .map(|_| DepNode { - kind: D::DEP_KIND_NULL, - hash: PackedFingerprint::from(Fingerprint::ZERO), - }) - .collect(); - let mut fingerprints: IndexVec = - (0..node_count).map(|_| Fingerprint::ZERO).collect(); - let mut edge_list_indices: IndexVec = - (0..node_count).map(|_| EdgeHeader { repr: 0, edges: 0 }).collect(); + let mut nodes = IndexVec::from_elem_n( + DepNode { kind: D::DEP_KIND_NULL, hash: PackedFingerprint::from(Fingerprint::ZERO) }, + node_count, + ); + let mut fingerprints = IndexVec::from_elem_n(Fingerprint::ZERO, node_count); + let mut edge_list_indices = + IndexVec::from_elem_n(EdgeHeader { repr: 0, edges: 0 }, node_count); // This estimation assumes that all of the encoded bytes are for the edge lists or for the // fixed-size node headers. But that's not necessarily true; if any edge list has a length From 36fa327623b6013b0a3da6e39ee1e85ac5275cd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?John=20K=C3=A5re=20Alsaker?= Date: Tue, 22 Apr 2025 03:47:22 +0200 Subject: [PATCH 3/5] Tweak edges --- .../rustc_query_system/src/dep_graph/serialized.rs | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/compiler/rustc_query_system/src/dep_graph/serialized.rs b/compiler/rustc_query_system/src/dep_graph/serialized.rs index 9dabb26e8b281..0d4913e798d55 100644 --- a/compiler/rustc_query_system/src/dep_graph/serialized.rs +++ b/compiler/rustc_query_system/src/dep_graph/serialized.rs @@ -107,14 +107,11 @@ impl SerializedDepGraph { let header = self.edge_list_indices[source]; let mut raw = &self.edge_list_data[header.start()..]; - // The number of edges for this node is implicitly stored in the combination of the byte - // width and the length. let bytes_per_index = header.bytes_per_index(); - let len = header.edges; // LLVM doesn't hoist EdgeHeader::mask so we do it ourselves. let mask = header.mask(); - (0..len).map(move |_| { + (0..header.num_edges).map(move |_| { // Doing this slicing in this order ensures that the first bounds check suffices for // all the others. let index = &raw[..DEP_NODE_SIZE]; @@ -157,7 +154,7 @@ impl SerializedDepGraph { #[derive(Debug, Clone, Copy)] struct EdgeHeader { repr: usize, - edges: u32, + num_edges: u32, } impl EdgeHeader { @@ -206,7 +203,7 @@ impl SerializedDepGraph { ); let mut fingerprints = IndexVec::from_elem_n(Fingerprint::ZERO, node_count); let mut edge_list_indices = - IndexVec::from_elem_n(EdgeHeader { repr: 0, edges: 0 }, node_count); + IndexVec::from_elem_n(EdgeHeader { repr: 0, num_edges: 0 }, node_count); // This estimation assumes that all of the encoded bytes are for the edge lists or for the // fixed-size node headers. But that's not necessarily true; if any edge list has a length @@ -419,10 +416,10 @@ impl SerializedNodeHeader { } #[inline] - fn edges_header(&self, edge_list_data: &[u8], edges: u32) -> EdgeHeader { + fn edges_header(&self, edge_list_data: &[u8], num_edges: u32) -> EdgeHeader { EdgeHeader { repr: (edge_list_data.len() << DEP_NODE_WIDTH_BITS) | (self.bytes_per_index() - 1), - edges, + num_edges, } } } From 9a6e8b3807e91836f1936df507a231e38cf76d16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?John=20K=C3=A5re=20Alsaker?= Date: Tue, 22 Apr 2025 03:55:57 +0200 Subject: [PATCH 4/5] Make sure there's no duplicate indices in the dep graph --- compiler/rustc_query_system/src/dep_graph/serialized.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/compiler/rustc_query_system/src/dep_graph/serialized.rs b/compiler/rustc_query_system/src/dep_graph/serialized.rs index 0d4913e798d55..d2bcde143835e 100644 --- a/compiler/rustc_query_system/src/dep_graph/serialized.rs +++ b/compiler/rustc_query_system/src/dep_graph/serialized.rs @@ -225,7 +225,11 @@ impl SerializedDepGraph { let index = node_header.index(); - nodes[index] = node_header.node(); + let node = &mut nodes[index]; + // Make sure there's no duplicate indices in the dep graph. + assert!(node_header.node().kind != D::DEP_KIND_NULL && node.kind == D::DEP_KIND_NULL); + *node = node_header.node(); + fingerprints[index] = node_header.fingerprint(); // If the length of this node's edge list is small, the length is stored in the header. From c8e7c6261ea5bc581a3e10323677e720bf1dc42b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?John=20K=C3=A5re=20Alsaker?= Date: Fri, 25 Apr 2025 07:14:27 +0200 Subject: [PATCH 5/5] Zero the buffer passed from `write_with` --- compiler/rustc_serialize/src/opaque/mem_encoder.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/compiler/rustc_serialize/src/opaque/mem_encoder.rs b/compiler/rustc_serialize/src/opaque/mem_encoder.rs index 5104df0421572..56beebb49af73 100644 --- a/compiler/rustc_serialize/src/opaque/mem_encoder.rs +++ b/compiler/rustc_serialize/src/opaque/mem_encoder.rs @@ -35,8 +35,15 @@ impl MemEncoder { self.data.reserve(N); let old_len = self.data.len(); - // SAFETY: fix - let buf = unsafe { &mut *(self.data.as_mut_ptr().add(old_len) as *mut [u8; N]) }; + + // SAFETY: The above `reserve` ensures that there is enough + // room to write the encoded value to the vector's internal buffer. + // The memory is also initialized as 0. + let buf = unsafe { + let buf = self.data.as_mut_ptr().add(old_len) as *mut [u8; N]; + *buf = [0; N]; + &mut *buf + }; let written = visitor(buf); if written > N { Self::panic_invalid_write::(written);