Skip to content

Commit 28ad5ab

Browse files
authored
fix(pyroscope.java): Ensure the jfr is deleted on shutdown (#3630)
* fix(pyroscope.java): Ensure the jfr is deleted on shutdown * Address review comments * Improve log messages
1 parent f60b059 commit 28ad5ab

File tree

2 files changed

+158
-5
lines changed

2 files changed

+158
-5
lines changed

internal/component/pyroscope/java/loop.go

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ type profilingLoop struct {
3838
dist *asprof.Distribution
3939
jfrFile string
4040
startTime time.Time
41-
profiler *asprof.Profiler
41+
profiler Profiler
4242
sampleRate int
4343

4444
error error
@@ -49,7 +49,13 @@ type profilingLoop struct {
4949
totalSamples int64
5050
}
5151

52-
func newProfilingLoop(pid int, target discovery.Target, logger log.Logger, profiler *asprof.Profiler, output *pyroscope.Fanout, cfg ProfilingConfig) *profilingLoop {
52+
type Profiler interface {
53+
CopyLib(dist *asprof.Distribution, pid int) error
54+
Execute(dist *asprof.Distribution, argv []string) (string, string, error)
55+
Distribution() *asprof.Distribution
56+
}
57+
58+
func newProfilingLoop(pid int, target discovery.Target, logger log.Logger, profiler Profiler, output *pyroscope.Fanout, cfg ProfilingConfig) *profilingLoop {
5359
ctx, cancel := context.WithCancel(context.Background())
5460
dist := profiler.Distribution()
5561
p := &profilingLoop{
@@ -114,15 +120,37 @@ func (p *profilingLoop) loop(ctx context.Context) {
114120
}
115121
}
116122

123+
func (p *profilingLoop) cleanupJFR() {
124+
// first try to find through process path
125+
jfrFile := asprof.ProcessPath(p.jfrFile, p.pid)
126+
if err := os.Remove(jfrFile); os.IsNotExist(err) {
127+
// the process path was not found, this is possible when the target process stopped in the meantime.
128+
129+
if jfrFile == p.jfrFile {
130+
// nothing we can do, the process path was not actually a /proc path
131+
return
132+
}
133+
134+
jfrFile = p.jfrFile
135+
if err := os.Remove(jfrFile); os.IsNotExist(err) {
136+
_ = level.Debug(p.logger).Log("msg", "unable to delete jfr file, likely because target process is stopped and was containerized", "path", jfrFile, "err", err)
137+
// file not found on the host system, process was likely containerized and we can't delete this file anymore
138+
return
139+
} else if err != nil {
140+
_ = level.Warn(p.logger).Log("msg", "failed to delete jfr file at host path", "path", jfrFile, "err", err)
141+
}
142+
} else if err != nil {
143+
_ = level.Warn(p.logger).Log("msg", "failed to delete jfr file at process path", "path", jfrFile, "err", err)
144+
}
145+
}
146+
117147
func (p *profilingLoop) reset() error {
118148
jfrFile := asprof.ProcessPath(p.jfrFile, p.pid)
119149
startTime := p.startTime
120150
endTime := time.Now()
121151
sampleRate := p.sampleRate
122152
p.startTime = endTime
123-
defer func() {
124-
os.Remove(jfrFile)
125-
}()
153+
defer p.cleanupJFR()
126154

127155
err := p.stop()
128156
if err != nil {
@@ -262,6 +290,7 @@ func (p *profilingLoop) update(target discovery.Target, config ProfilingConfig)
262290
func (p *profilingLoop) Close() error {
263291
p.cancel()
264292
p.wg.Wait()
293+
p.cleanupJFR()
265294
return nil
266295
}
267296

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
//go:build (linux || darwin) && (amd64 || arm64)
2+
3+
package java
4+
5+
import (
6+
"context"
7+
"fmt"
8+
"os"
9+
"strconv"
10+
"testing"
11+
"time"
12+
13+
"github.com/go-kit/log"
14+
"github.com/prometheus/client_golang/prometheus"
15+
"github.com/prometheus/prometheus/model/labels"
16+
"github.com/stretchr/testify/mock"
17+
"github.com/stretchr/testify/require"
18+
19+
"github.com/grafana/alloy/internal/component/discovery"
20+
"github.com/grafana/alloy/internal/component/pyroscope"
21+
"github.com/grafana/alloy/internal/component/pyroscope/java/asprof"
22+
)
23+
24+
type mockProfiler struct {
25+
mock.Mock
26+
dist *asprof.Distribution
27+
}
28+
29+
func (m *mockProfiler) CopyLib(dist *asprof.Distribution, pid int) error {
30+
args := m.Called(dist, pid)
31+
return args.Error(0)
32+
}
33+
34+
func (m *mockProfiler) Execute(dist *asprof.Distribution, argv []string) (string, string, error) {
35+
args := m.Called(dist, argv)
36+
return args.String(0), args.String(1), args.Error(2)
37+
}
38+
39+
func (m *mockProfiler) Distribution() *asprof.Distribution {
40+
return m.dist
41+
}
42+
43+
type mockAppendable struct {
44+
mock.Mock
45+
}
46+
47+
func (m *mockAppendable) Appender() pyroscope.Appender {
48+
return m
49+
}
50+
51+
func (m *mockAppendable) Append(ctx context.Context, labels labels.Labels, samples []*pyroscope.RawSample) error {
52+
args := m.Called(ctx, labels, samples)
53+
return args.Error(0)
54+
}
55+
56+
func (m *mockAppendable) AppendIngest(ctx context.Context, profile *pyroscope.IncomingProfile) error {
57+
args := m.Called(ctx, profile)
58+
return args.Error(0)
59+
}
60+
61+
func newTestProfilingLoop(_ *testing.T, profiler *mockProfiler, appendable pyroscope.Appendable) *profilingLoop {
62+
reg := prometheus.NewRegistry()
63+
output := pyroscope.NewFanout([]pyroscope.Appendable{appendable}, "test-appendable", reg)
64+
logger := log.NewNopLogger()
65+
cfg := ProfilingConfig{
66+
Interval: 10 * time.Millisecond,
67+
SampleRate: 1000,
68+
CPU: true,
69+
Event: "cpu",
70+
}
71+
target := discovery.NewTargetFromMap(map[string]string{"foo": "bar"})
72+
return newProfilingLoop(os.Getpid(), target, logger, profiler, output, cfg)
73+
}
74+
75+
func TestProfilingLoop_StartStop(t *testing.T) {
76+
profiler := &mockProfiler{dist: &asprof.Distribution{}}
77+
appendable := &mockAppendable{}
78+
pid := os.Getpid()
79+
jfrPath := fmt.Sprintf("/tmp/asprof-%d-%d.jfr", pid, pid)
80+
81+
pCh := make(chan *profilingLoop)
82+
83+
profiler.On("CopyLib", profiler.dist, pid).Return(nil).Once()
84+
85+
// expect the profiler to be executed with the correct arguments to start it
86+
profiler.On("Execute", profiler.dist, []string{
87+
"-f",
88+
jfrPath,
89+
"-o", "jfr",
90+
"-e", "cpu",
91+
"-i", "1000000",
92+
"start",
93+
"--timeout", "0",
94+
strconv.Itoa(pid),
95+
}).Run(func(args mock.Arguments) {
96+
// wait for the profiling loop to be created
97+
p := <-pCh
98+
99+
// create the jfr file
100+
f, err := os.Create(p.jfrFile)
101+
require.NoError(t, err)
102+
defer f.Close()
103+
}).Return("", "", nil).Once()
104+
105+
// expect the profiler to be executed with the correct arguments to stop it
106+
profiler.On("Execute", profiler.dist, []string{
107+
"stop",
108+
"-o", "jfr",
109+
strconv.Itoa(pid),
110+
}).Return("", "", nil).Once()
111+
112+
p := newTestProfilingLoop(t, profiler, appendable)
113+
pCh <- p
114+
115+
// wait for the profiling loop to finish
116+
require.NoError(t, p.Close())
117+
118+
// expect the profiler to clean up the jfr file
119+
_, err := os.Stat(p.jfrFile)
120+
require.True(t, os.IsNotExist(err))
121+
122+
profiler.AssertExpectations(t)
123+
appendable.AssertExpectations(t)
124+
}

0 commit comments

Comments
 (0)