Skip to content

Commit dc95c54

Browse files
gdankelilia-cher
authored andcommitted
Refactor trace activities (#59360)
Summary: Pull Request resolved: pytorch/pytorch#59360 Pull Request resolved: #206 Replace ClientTraceActivity with GenericActivity. In addition: * Add a couple of new activity types for user annotations * Simplify code for GPU-side user annotations * Add accessor to containing trace span object in activities. Later we can replace this with a trace context / trace session object. * Simplified MemoryTraceLogger * Added early exit for cupti push/pop correlation ID Reviewed By: ilia-cher Differential Revision: D28231675 fbshipit-source-id: 7129f2493016efb4d3697094f24475e2c39e6e65
1 parent 540289c commit dc95c54

20 files changed

+276
-306
lines changed

libkineto/include/ActivityType.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,29 @@
77

88
#pragma once
99

10+
#include <array>
11+
#include <string>
12+
1013
namespace libkineto {
1114

1215
enum class ActivityType {
1316
CPU_OP = 0, // cpu side ops
17+
USER_ANNOTATION,
18+
GPU_USER_ANNOTATION,
1419
GPU_MEMCPY,
1520
GPU_MEMSET,
1621
CONCURRENT_KERNEL, // on-device kernels
1722
EXTERNAL_CORRELATION,
1823
CUDA_RUNTIME, // host side cuda runtime events
1924
CPU_INSTANT_EVENT, // host side point-like events
25+
ENUM_COUNT
2026
};
2127

28+
const char* toString(ActivityType t);
29+
ActivityType toActivityType(const std::string& str);
30+
31+
// Return an array of all activity types except COUNT
32+
constexpr int activityTypeCount = (int)ActivityType::ENUM_COUNT;
33+
const std::array<ActivityType, activityTypeCount> activityTypes();
34+
2235
} // namespace libkineto

libkineto/include/GenericTraceActivity.h

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,19 +14,28 @@
1414

1515
#include "ThreadUtil.h"
1616
#include "TraceActivity.h"
17+
#include "TraceSpan.h"
1718

1819
namespace libkineto {
1920

2021
// @lint-ignore-every CLANGTIDY cppcoreguidelines-non-private-member-variables-in-classes
2122
// @lint-ignore-every CLANGTIDY cppcoreguidelines-pro-type-member-init
22-
struct GenericTraceActivity : TraceActivity {
23+
class GenericTraceActivity : public TraceActivity {
24+
25+
public:
26+
GenericTraceActivity() = delete;
27+
28+
GenericTraceActivity(
29+
const TraceSpan& trace, ActivityType type, const std::string& name)
30+
: activityType(type), activityName(name), traceSpan_(&trace) {
31+
}
2332

2433
int64_t deviceId() const override {
2534
return device;
2635
}
2736

2837
int64_t resourceId() const override {
29-
return sysThreadId;
38+
return resource;
3039
}
3140

3241
int64_t timestamp() const override {
@@ -38,7 +47,7 @@ struct GenericTraceActivity : TraceActivity {
3847
}
3948

4049
int64_t correlationId() const override {
41-
return correlation;
50+
return id;
4251
}
4352

4453
ActivityType type() const override {
@@ -53,6 +62,10 @@ struct GenericTraceActivity : TraceActivity {
5362
return nullptr;
5463
}
5564

65+
const TraceSpan* traceSpan() const override {
66+
return traceSpan_;
67+
}
68+
5669
void log(ActivityLogger& logger) const override;
5770

5871
//Encode client side metadata as a key/value string.
@@ -68,13 +81,14 @@ struct GenericTraceActivity : TraceActivity {
6881

6982
int64_t startTime{0};
7083
int64_t endTime{0};
71-
int64_t correlation{0};
72-
int device{-1};
73-
int32_t sysThreadId{0};
74-
std::string activityName;
84+
int32_t id{0};
85+
int32_t device{0};
86+
int32_t resource{0};
7587
ActivityType activityType;
88+
std::string activityName;
7689

7790
private:
91+
const TraceSpan* traceSpan_;
7892
std::vector<std::string> metadata_;
7993
};
8094

libkineto/include/TraceActivity.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
namespace libkineto {
1515

1616
class ActivityLogger;
17+
struct TraceSpan;
1718

1819
// Generic activity interface is borrowed from tensorboard protobuf format.
1920
struct TraceActivity {
@@ -33,6 +34,8 @@ struct TraceActivity {
3334
virtual const std::string name() const = 0;
3435
// Optional linked activity
3536
virtual const TraceActivity* linkedActivity() const = 0;
37+
// Optional containing trace object
38+
virtual const TraceSpan* traceSpan() const = 0;
3639
// Log activity
3740
virtual void log(ActivityLogger& logger) const = 0;
3841

libkineto/include/TraceSpan.h

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,21 +7,37 @@
77

88
#pragma once
99

10+
#include <atomic>
1011
#include <string>
1112
#include <thread>
1213

1314
namespace libkineto {
1415

1516
struct TraceSpan {
17+
TraceSpan() = delete;
18+
TraceSpan(
19+
int64_t startTime, int64_t endTime, std::string name)
20+
: startTime(startTime), endTime(endTime), name(std::move(name)) {
21+
}
22+
TraceSpan(
23+
int opCount, int it, std::string name, std::string prefix)
24+
: opCount(opCount),
25+
iteration(it),
26+
name(std::move(name)),
27+
prefix(std::move(prefix)) {
28+
}
29+
1630
// FIXME: change to duration?
1731
int64_t startTime{0};
1832
int64_t endTime{0};
1933
int opCount{0};
2034
int iteration{-1};
2135
// Name is used to identify timeline
2236
std::string name;
23-
// Prefix used to distinguish sub-nets on the same timeline
37+
// Prefix used to distinguish trace spans on the same timeline
2438
std::string prefix;
39+
// Tracked by profiler for iteration trigger
40+
bool tracked{false};
2541
};
2642

2743
} // namespace libkineto

libkineto/include/libkineto.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ namespace libkineto {
3838
class Config;
3939

4040
struct CpuTraceBuffer {
41-
TraceSpan span;
41+
TraceSpan span{0, 0, "none"};
4242
int gpuOpCount;
4343
std::vector<GenericTraceActivity> activities;
4444
};

0 commit comments

Comments
 (0)