create bench_cmp command

s7tya · s7tya · commit 3dc4ea8102f0 · 2024-08-06T17:14:46.000+09:00
diff --git a/collector/src/bin/collector.rs b/collector/src/bin/collector.rs
@@ -18,6 +18,7 @@ use std::{str, time::Instant};
 use anyhow::Context;
 use clap::builder::TypedValueParser;
 use clap::{Arg, Parser};
+use collector::compare::compare_artifacts;
 use humansize::{format_size, BINARY};
 use rayon::iter::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator};
 use tabled::builder::Builder;
@@ -628,6 +629,18 @@ enum Commands {
         #[command(flatten)]
         db: DbOption,
     },
+
+    /// Displays diff between two local bench results.
+    BenchCmp {
+        #[command(flatten)]
+        db: DbOption,
+
+        /// The name of the base artifact to be compared.
+        base: String,
+
+        /// The name of the modified artifact to be compared.
+        modified: String,
+    },
 }
 
 #[derive(Debug, clap::Parser)]
@@ -1187,6 +1200,13 @@ Make sure to modify `{dir}/perf-config.json` if the category/artifact don't matc
             println!("Data of artifact {name} were removed");
             Ok(0)
         }
+        Commands::BenchCmp { db, base, modified } => {
+            let pool = Pool::open(&db.db);
+            let rt = build_async_runtime();
+            let conn = rt.block_on(pool.connection());
+            rt.block_on(compare_artifacts(conn, base, modified))?;
+            Ok(0)
+        }
     }
 }
 
@@ -1736,7 +1756,6 @@ fn bench_compile(
                 category,
             ));
             print_intro();
-
             let mut processor = BenchProcessor::new(
                 tx.conn(),
                 benchmark_name,
diff --git a/collector/src/compare.rs b/collector/src/compare.rs
@@ -0,0 +1,129 @@
+use std::sync::Arc;
+
+use database::{
+    metric::Metric,
+    selector::{BenchmarkQuery, CompileBenchmarkQuery},
+    ArtifactId, Connection,
+};
+use tabled::{Table, Tabled};
+
+const SIGNIFICANCE_THRESHOLD: f64 = 0.002;
+
+/// Compare 2 artifacts and print the result.
+pub async fn compare_artifacts(
+    mut conn: Box<dyn Connection>,
+    base: String,
+    modified: String,
+) -> anyhow::Result<()> {
+    let index = database::Index::load(&mut *conn).await;
+
+    let query = CompileBenchmarkQuery::default()
+        .metric(database::selector::Selector::One(Metric::InstructionsUser));
+    let resp = query
+        .execute(
+            &mut *conn,
+            &index,
+            Arc::new(vec![ArtifactId::Tag(base), ArtifactId::Tag(modified)]),
+        )
+        .await
+        .unwrap();
+
+    let tuple_pstats = resp
+        .into_iter()
+        .map(|resp| {
+            let points = resp.series.points.collect::<Vec<_>>();
+            (points[0], points[1])
+        })
+        .collect::<Vec<_>>();
+
+    #[derive(Tabled)]
+    struct Regression {
+        count: usize,
+        #[tabled(display_with = "display_range")]
+        range: (Option<f64>, Option<f64>),
+        #[tabled(display_with = "display_mean")]
+        mean: Option<f64>,
+    }
+
+    fn format_value(value: Option<f64>) -> String {
+        match value {
+            Some(value) => format!("{:+.2}%", value),
+            None => "-".to_string(),
+        }
+    }
+
+    fn display_range(&(min, max): &(Option<f64>, Option<f64>)) -> String {
+        format!("[{}, {}]", &format_value(min), &format_value(max))
+    }
+
+    fn display_mean(value: &Option<f64>) -> String {
+        match value {
+            Some(value) => format!("{:+.2}%", value),
+            None => "-".to_string(),
+        }
+    }
+
+    impl From<&Vec<f64>> for Regression {
+        fn from(value: &Vec<f64>) -> Self {
+            let min = value.iter().copied().min_by(|a, b| a.total_cmp(b));
+            let max = value.iter().copied().max_by(|a, b| a.total_cmp(b));
+            let count = value.len();
+
+            Regression {
+                range: (min, max),
+                count,
+                mean: if count == 0 {
+                    None
+                } else {
+                    Some(value.iter().sum::<f64>() / count as f64)
+                },
+            }
+        }
+    }
+
+    let change = tuple_pstats
+        .iter()
+        .filter_map(|&(a, b)| match (a, b) {
+            (Some(a), Some(b)) => {
+                if a == 0.0 {
+                    None
+                } else {
+                    Some((b - a) / a)
+                }
+            }
+            (_, _) => None,
+        })
+        .filter(|c| c.abs() >= SIGNIFICANCE_THRESHOLD)
+        .collect::<Vec<_>>();
+    let negative_change = change
+        .iter()
+        .copied()
+        .filter(|&c| c < 0.0)
+        .collect::<Vec<_>>();
+    let positive_change = change
+        .iter()
+        .copied()
+        .filter(|&c| c > 0.0)
+        .collect::<Vec<_>>();
+
+    #[derive(Tabled)]
+    struct NamedRegression {
+        name: String,
+        #[tabled(inline)]
+        regression: Regression,
+    }
+
+    let regressions = [negative_change, positive_change, change]
+        .into_iter()
+        .map(|c| Regression::from(&c))
+        .zip(["❌", "✅", "✅, ❌"])
+        .map(|(c, label)| NamedRegression {
+            name: label.to_string(),
+            regression: c,
+        })
+        .collect::<Vec<_>>();
+
+    println!("{}", Table::new(regressions));
+
+    Ok(())
+}
diff --git a/collector/src/lib.rs b/collector/src/lib.rs
@@ -9,6 +9,7 @@ pub mod api;
 pub mod artifact_stats;
 pub mod cargo;
 pub mod codegen;
+pub mod compare;
 pub mod compile;
 pub mod runtime;
 pub mod toolchain;