Skip to content

Commit 8170f91

Browse files
[ENH] Reorganize load-service traces (#4518)
## Description of changes Remove the parent "workload" trace for the entire workload. It's too long-lasting, and contains too many sub-spans. Instead, generate a "step" root trace for each workload step. Events for each workload step will be associated with the step's root trace. This will make spans easier to read, and debug. ## Test plan Tested locally via tilt and jaeger. ## Documentation Changes N/A --------- Co-authored-by: propel-code-bot[bot] <203372662+propel-code-bot[bot]@users.noreply.github.com>
1 parent ed884fa commit 8170f91

File tree

1 file changed

+24
-42
lines changed

1 file changed

+24
-42
lines changed

rust/load/src/lib.rs

Lines changed: 24 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1271,13 +1271,11 @@ impl LoadService {
12711271
for declared in declared {
12721272
if let Entry::Vacant(entry) = running.entry(declared.uuid) {
12731273
tracing::info!("spawning workload {}", declared.uuid);
1274-
let root = tracing::info_span!(parent: None, "workload");
12751274
let this = Arc::clone(self);
12761275
let done = Arc::new(AtomicBool::new(false));
12771276
let done_p = Arc::clone(&done);
12781277
let inhibit = Arc::clone(&self.inhibit);
12791278
let task = tokio::task::spawn(async move {
1280-
let _enter = root.enter();
12811279
this.run_one_workload(done, inhibit, declared).await
12821280
});
12831281
entry.insert((done_p, task));
@@ -1295,36 +1293,6 @@ impl LoadService {
12951293
let client = Arc::new(client(spec.connection.clone()).await);
12961294
let mut guac = Guacamole::new(spec.expires.timestamp_millis() as u64);
12971295
let mut next_op = Instant::now();
1298-
let (tx, mut rx) = tokio::sync::mpsc::channel(1000);
1299-
let _ = tx
1300-
.send(tokio::spawn(async move { Ok::<(), Error>(()) }))
1301-
.await;
1302-
let this = Arc::clone(&self);
1303-
let data_set = Arc::clone(&spec.data_set);
1304-
let reaper = tokio::spawn(async move {
1305-
while let Some(task) = rx.recv().await {
1306-
if let Err(err) = task.await.unwrap() {
1307-
if !format!("{err:?}").contains("429") {
1308-
this.metrics.failed.add(
1309-
1,
1310-
&[KeyValue::new(
1311-
Key::from_static_str("data_set"),
1312-
Value::from(data_set.name()),
1313-
)],
1314-
);
1315-
tracing::error!("workload task failed: {err:?}");
1316-
} else {
1317-
this.metrics.limited.add(
1318-
1,
1319-
&[KeyValue::new(
1320-
Key::from_static_str("data_set"),
1321-
Value::from(data_set.name()),
1322-
)],
1323-
);
1324-
}
1325-
}
1326-
}
1327-
});
13281296

13291297
// Initialize the data set.
13301298
let data_set = Arc::clone(&spec.data_set);
@@ -1408,30 +1376,44 @@ impl LoadService {
14081376
.await
14091377
.map_err(|err| Error::FailWorkload(err.to_string()))
14101378
{
1411-
Ok(()) => Ok(()),
1379+
Ok(()) => (),
14121380
Err(err) => {
1413-
if err.to_string().contains("invalid request: No results") {
1381+
if format!("{err:?}").contains("invalid request: No results") {
14141382
this.metrics.no_results.add(
14151383
1,
14161384
&[KeyValue::new(
14171385
Key::from_static_str("data_set"),
14181386
Value::from(data_set.name()),
14191387
)],
14201388
);
1421-
Ok(())
1389+
tracing::warn!("workload step no results: {err:?}");
1390+
} else if !format!("{err:?}").contains("429") {
1391+
this.metrics.failed.add(
1392+
1,
1393+
&[KeyValue::new(
1394+
Key::from_static_str("data_set"),
1395+
Value::from(data_set.name()),
1396+
)],
1397+
);
1398+
tracing::error!("workload step failed: {err:?}");
14221399
} else {
1423-
Err(err)
1400+
this.metrics.limited.add(
1401+
1,
1402+
&[KeyValue::new(
1403+
Key::from_static_str("data_set"),
1404+
Value::from(data_set.name()),
1405+
)],
1406+
);
1407+
tracing::warn!("workload step rate limited: {err:?}");
14241408
}
14251409
}
1426-
}
1410+
};
14271411
};
1428-
tx.send(tokio::spawn(fut)).await.unwrap();
1412+
let span = tracing::info_span!(parent: None, "step", workload_uuid = %spec.uuid);
1413+
tokio::spawn(fut.instrument(span));
14291414
}
14301415
}
1431-
// Not an error, just needs to show up in stdout.
1432-
tracing::error!("workload done: {}/{}", spec.name, spec.description());
1433-
drop(tx);
1434-
reaper.await.unwrap();
1416+
tracing::info!("workload done: {}/{}", spec.name, spec.description());
14351417
}
14361418

14371419
fn load_persistent(&self) -> Result<(), Error> {

0 commit comments

Comments
 (0)