1
- // +build linux
1
+ // +build linux,!static_build
2
2
3
3
package systemd
4
4
5
5
import (
6
6
"errors"
7
7
"fmt"
8
- "io/ioutil"
9
8
"os"
10
9
"path/filepath"
11
10
"strings"
12
11
"sync"
12
+ "time"
13
13
14
14
systemdDbus "github.com/coreos/go-systemd/dbus"
15
15
systemdUtil "github.com/coreos/go-systemd/util"
16
16
"github.com/godbus/dbus"
17
17
"github.com/opencontainers/runc/libcontainer/cgroups"
18
18
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
19
19
"github.com/opencontainers/runc/libcontainer/configs"
20
+ "github.com/Sirupsen/logrus"
20
21
)
21
22
22
23
type Manager struct {
@@ -69,8 +70,13 @@ const (
69
70
)
70
71
71
72
var (
72
- connLock sync.Mutex
73
- theConn * systemdDbus.Conn
73
+ connLock sync.Mutex
74
+ theConn * systemdDbus.Conn
75
+ hasStartTransientUnit bool
76
+ hasStartTransientSliceUnit bool
77
+ hasTransientDefaultDependencies bool
78
+ hasDelegateScope bool
79
+ hasDelegateSlice bool
74
80
)
75
81
76
82
func newProp (name string , units interface {}) systemdDbus.Property {
@@ -81,7 +87,128 @@ func newProp(name string, units interface{}) systemdDbus.Property {
81
87
}
82
88
83
89
func UseSystemd () bool {
84
- return systemdUtil .IsRunningSystemd ()
90
+ if ! systemdUtil .IsRunningSystemd () {
91
+ return false
92
+ }
93
+
94
+ connLock .Lock ()
95
+ defer connLock .Unlock ()
96
+
97
+ if theConn == nil {
98
+ var err error
99
+ theConn , err = systemdDbus .New ()
100
+ if err != nil {
101
+ return false
102
+ }
103
+
104
+ // Assume we have StartTransientUnit
105
+ hasStartTransientUnit = true
106
+
107
+ // But if we get UnknownMethod error we don't
108
+ if _ , err := theConn .StartTransientUnit ("test.scope" , "invalid" , nil , nil ); err != nil {
109
+ if dbusError , ok := err .(dbus.Error ); ok {
110
+ if dbusError .Name == "org.freedesktop.DBus.Error.UnknownMethod" {
111
+ hasStartTransientUnit = false
112
+ return hasStartTransientUnit
113
+ }
114
+ }
115
+ }
116
+
117
+ // Ensure the scope name we use doesn't exist. Use the Pid to
118
+ // avoid collisions between multiple libcontainer users on a
119
+ // single host.
120
+ scope := fmt .Sprintf ("libcontainer-%d-systemd-test-default-dependencies.scope" , os .Getpid ())
121
+ testScopeExists := true
122
+ for i := 0 ; i <= testScopeWait ; i ++ {
123
+ if _ , err := theConn .StopUnit (scope , "replace" , nil ); err != nil {
124
+ if dbusError , ok := err .(dbus.Error ); ok {
125
+ if strings .Contains (dbusError .Name , "org.freedesktop.systemd1.NoSuchUnit" ) {
126
+ testScopeExists = false
127
+ break
128
+ }
129
+ }
130
+ }
131
+ time .Sleep (time .Millisecond )
132
+ }
133
+
134
+ // Bail out if we can't kill this scope without testing for DefaultDependencies
135
+ if testScopeExists {
136
+ return hasStartTransientUnit
137
+ }
138
+
139
+ // Assume StartTransientUnit on a scope allows DefaultDependencies
140
+ hasTransientDefaultDependencies = true
141
+ ddf := newProp ("DefaultDependencies" , false )
142
+ if _ , err := theConn .StartTransientUnit (scope , "replace" , []systemdDbus.Property {ddf }, nil ); err != nil {
143
+ if dbusError , ok := err .(dbus.Error ); ok {
144
+ if strings .Contains (dbusError .Name , "org.freedesktop.DBus.Error.PropertyReadOnly" ) {
145
+ hasTransientDefaultDependencies = false
146
+ }
147
+ }
148
+ }
149
+
150
+ // Not critical because of the stop unit logic above.
151
+ theConn .StopUnit (scope , "replace" , nil )
152
+
153
+ // Assume StartTransientUnit on a scope allows Delegate
154
+ hasDelegateScope = true
155
+ dlScope := newProp ("Delegate" , true )
156
+ if _ , err := theConn .StartTransientUnit (scope , "replace" , []systemdDbus.Property {dlScope }, nil ); err != nil {
157
+ if dbusError , ok := err .(dbus.Error ); ok {
158
+ if strings .Contains (dbusError .Name , "org.freedesktop.DBus.Error.PropertyReadOnly" ) {
159
+ hasDelegateScope = false
160
+ }
161
+ }
162
+ }
163
+
164
+ // Assume we have the ability to start a transient unit as a slice
165
+ // This was broken until systemd v229, but has been back-ported on RHEL environments >= 219
166
+ // For details, see: https://bugzilla.redhat.com/show_bug.cgi?id=1370299
167
+ hasStartTransientSliceUnit = true
168
+
169
+ // To ensure simple clean-up, we create a slice off the root with no hierarchy
170
+ slice := fmt .Sprintf ("libcontainer_%d_systemd_test_default.slice" , os .Getpid ())
171
+ if _ , err := theConn .StartTransientUnit (slice , "replace" , nil , nil ); err != nil {
172
+ if _ , ok := err .(dbus.Error ); ok {
173
+ hasStartTransientSliceUnit = false
174
+ }
175
+ }
176
+
177
+ for i := 0 ; i <= testSliceWait ; i ++ {
178
+ if _ , err := theConn .StopUnit (slice , "replace" , nil ); err != nil {
179
+ if dbusError , ok := err .(dbus.Error ); ok {
180
+ if strings .Contains (dbusError .Name , "org.freedesktop.systemd1.NoSuchUnit" ) {
181
+ hasStartTransientSliceUnit = false
182
+ break
183
+ }
184
+ }
185
+ } else {
186
+ break
187
+ }
188
+ time .Sleep (time .Millisecond )
189
+ }
190
+
191
+ // Not critical because of the stop unit logic above.
192
+ theConn .StopUnit (slice , "replace" , nil )
193
+
194
+ // Assume StartTransientUnit on a slice allows Delegate
195
+ hasDelegateSlice = true
196
+ dlSlice := newProp ("Delegate" , true )
197
+ if _ , err := theConn .StartTransientUnit (slice , "replace" , []systemdDbus.Property {dlSlice }, nil ); err != nil {
198
+ if dbusError , ok := err .(dbus.Error ); ok {
199
+ // Starting with systemd v237, Delegate is not even a property of slices anymore,
200
+ // so the D-Bus call fails with "InvalidArgs" error.
201
+ if strings .Contains (dbusError .Name , "org.freedesktop.DBus.Error.PropertyReadOnly" ) || strings .Contains (dbusError .Name , "org.freedesktop.DBus.Error.InvalidArgs" ) {
202
+ hasDelegateSlice = false
203
+ }
204
+ }
205
+ }
206
+
207
+ // Not critical because of the stop unit logic above.
208
+ theConn .StopUnit (scope , "replace" , nil )
209
+ theConn .StopUnit (slice , "replace" , nil )
210
+ }
211
+ return hasStartTransientUnit
85
212
}
86
213
87
214
func (m * Manager ) Apply (pid int ) error {
@@ -117,6 +244,10 @@ func (m *Manager) Apply(pid int) error {
117
244
118
245
// if we create a slice, the parent is defined via a Wants=
119
246
if strings .HasSuffix (unitName , ".slice" ) {
247
+ // This was broken until systemd v229, but has been back-ported on RHEL environments >= 219
248
+ if ! hasStartTransientSliceUnit {
249
+ return fmt .Errorf ("systemd version does not support ability to start a slice as transient unit" )
250
+ }
120
251
properties = append (properties , systemdDbus .PropWants (slice ))
121
252
} else {
122
253
// otherwise, we use Slice=
@@ -128,8 +259,17 @@ func (m *Manager) Apply(pid int) error {
128
259
properties = append (properties , newProp ("PIDs" , []uint32 {uint32 (pid )}))
129
260
}
130
261
131
- // This is only supported on systemd versions 218 and above.
132
- properties = append (properties , newProp ("Delegate" , true ))
262
+ // Check if we can delegate. This is only supported on systemd versions 218 and above.
263
+ if strings .HasSuffix (unitName , ".slice" ) {
264
+ if hasDelegateSlice {
265
+ // systemd 237 and above no longer allows delegation on a slice
266
+ properties = append (properties , newProp ("Delegate" , true ))
267
+ }
268
+ } else {
269
+ if hasDelegateScope {
270
+ properties = append (properties , newProp ("Delegate" , true ))
271
+ }
272
+ }
133
273
134
274
// Always enable accounting, this gets us the same behaviour as the fs implementation,
135
275
// plus the kernel has some problems with joining the memory cgroup at a later time.
@@ -138,7 +278,10 @@ func (m *Manager) Apply(pid int) error {
138
278
newProp ("CPUAccounting" , true ),
139
279
newProp ("BlockIOAccounting" , true ))
140
280
141
- properties = append (properties , newProp ("DefaultDependencies" , false ))
281
+ if hasTransientDefaultDependencies {
282
+ properties = append (properties ,
283
+ newProp ("DefaultDependencies" , false ))
284
+ }
142
285
143
286
if c .Resources .Memory != 0 {
144
287
properties = append (properties ,
@@ -147,14 +290,21 @@ func (m *Manager) Apply(pid int) error {
147
290
148
291
if c .Resources .CpuShares != 0 {
149
292
properties = append (properties ,
150
- newProp ("CPUShares" , uint64 ( c .Resources .CpuShares ) ))
293
+ newProp ("CPUShares" , c .Resources .CpuShares ))
151
294
}
152
295
153
296
// cpu.cfs_quota_us and cpu.cfs_period_us are controlled by systemd.
154
297
if c .Resources .CpuQuota != 0 && c .Resources .CpuPeriod != 0 {
155
- cpuQuotaPerSecUSec := c .Resources .CpuQuota * 1000000 / c .Resources .CpuPeriod
298
+ cpuQuotaPerSecUSec := c .Resources .CpuQuota * 1000000 / c .Resources .CpuPeriod
299
+ // systemd converts CPUQuotaPerSecUSec (microseconds per CPU second) to CPUQuota
300
+ // (integer percentage of CPU) internally. This means that if a fractional percent of
301
+ // CPU is indicated by Resources.CpuQuota, we need to round up to the nearest
302
+ // 10ms (1% of a second) such that child cgroups can set the cpu.cfs_quota_us they expect.
303
+ if cpuQuotaPerSecUSec % 10000 != 0 {
304
+ cpuQuotaPerSecUSec = ((cpuQuotaPerSecUSec / 10000 ) + 1 ) * 10000
305
+ }
156
306
properties = append (properties ,
157
- newProp ("CPUQuotaPerSecUSec" , uint64 ( cpuQuotaPerSecUSec ) ))
307
+ newProp ("CPUQuotaPerSecUSec" , cpuQuotaPerSecUSec ))
158
308
}
159
309
160
310
if c .Resources .BlkioWeight != 0 {
@@ -170,17 +320,16 @@ func (m *Manager) Apply(pid int) error {
170
320
}
171
321
}
172
322
173
- var err error
174
- theConn , err = systemdDbus .New ()
175
- if err != nil {
176
- return err
177
- }
178
-
179
323
statusChan := make (chan string )
180
- if _ , err := theConn .StartTransientUnit (unitName , "replace" , properties , statusChan ); err != nil && ! isUnitExists (err ) {
324
+ if _ , err := theConn .StartTransientUnit (unitName , "replace" , properties , statusChan ); err == nil {
325
+ select {
326
+ case <- statusChan :
327
+ case <- time .After (time .Second ):
328
+ logrus .Warnf ("Timed out while waiting for StartTransientUnit(%s) completion signal from dbus. Continuing..." , unitName )
329
+ }
330
+ } else if ! isUnitExists (err ) {
181
331
return err
182
332
}
183
- <- statusChan
184
333
185
334
if err := joinCgroups (c , pid ); err != nil {
186
335
return err
@@ -208,14 +357,6 @@ func (m *Manager) Destroy() error {
208
357
}
209
358
m .mu .Lock ()
210
359
defer m .mu .Unlock ()
211
-
212
- if theConn == nil {
213
- var err error
214
- theConn , err = systemdDbus .New ()
215
- if err != nil {
216
- return err
217
- }
218
- }
219
360
theConn .StopUnit (getUnitName (m .Cgroups ), "replace" , nil )
220
361
if err := cgroups .RemovePaths (m .Paths ); err != nil {
221
362
return err
@@ -231,15 +372,6 @@ func (m *Manager) GetPaths() map[string]string {
231
372
return paths
232
373
}
233
374
234
- func writeFile (dir , file , data string ) error {
235
- // Normally dir should not be empty, one case is that cgroup subsystem
236
- // is not mounted, we will get empty dir, and we want it fail here.
237
- if dir == "" {
238
- return fmt .Errorf ("no such directory for %s" , file )
239
- }
240
- return ioutil .WriteFile (filepath .Join (dir , file ), []byte (data ), 0700 )
241
- }
242
-
243
375
func join (c * configs.Cgroup , subsystem string , pid int ) (string , error ) {
244
376
path , err := getSubsystemPath (c , subsystem )
245
377
if err != nil {
@@ -260,7 +392,6 @@ func joinCgroups(c *configs.Cgroup, pid int) error {
260
392
switch name {
261
393
case "name=systemd" :
262
394
// let systemd handle this
263
- break
264
395
case "cpuset" :
265
396
path , err := getSubsystemPath (c , name )
266
397
if err != nil && ! cgroups .IsNotFound (err ) {
@@ -270,7 +401,6 @@ func joinCgroups(c *configs.Cgroup, pid int) error {
270
401
if err := s .ApplyDir (path , c , pid ); err != nil {
271
402
return err
272
403
}
273
- break
274
404
default :
275
405
_ , err := join (c , name , pid )
276
406
if err != nil {
@@ -294,7 +424,7 @@ func joinCgroups(c *configs.Cgroup, pid int) error {
294
424
295
425
// systemd represents slice hierarchy using `-`, so we need to follow suit when
296
426
// generating the path of slice. Essentially, test-a-b.slice becomes
297
- // test.slice/test-a.slice/test-a-b.slice.
427
+ // / test.slice/test-a.slice/test-a-b.slice.
298
428
func ExpandSlice (slice string ) (string , error ) {
299
429
suffix := ".slice"
300
430
// Name has to end with ".slice", but can't be just ".slice".
@@ -320,10 +450,9 @@ func ExpandSlice(slice string) (string, error) {
320
450
}
321
451
322
452
// Append the component to the path and to the prefix.
323
- path += prefix + component + suffix + "/"
453
+ path += "/" + prefix + component + suffix
324
454
prefix += component + "-"
325
455
}
326
-
327
456
return path , nil
328
457
}
329
458
@@ -333,7 +462,7 @@ func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) {
333
462
return "" , err
334
463
}
335
464
336
- initPath , err := cgroups .GetInitCgroupDir (subsystem )
465
+ initPath , err := cgroups .GetInitCgroup (subsystem )
337
466
if err != nil {
338
467
return "" , err
339
468
}
0 commit comments