Skip to content

Commit a9d1096

Browse files
authored
Merge pull request #9 from projectatomic/rhel_cgroups_fixes
Backport of cgroups fixes
2 parents e9c345b + 613d265 commit a9d1096

File tree

2 files changed

+197
-44
lines changed

2 files changed

+197
-44
lines changed

libcontainer/cgroups/systemd/apply_systemd.go

Lines changed: 171 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,23 @@
1-
// +build linux
1+
// +build linux,!static_build
22

33
package systemd
44

55
import (
66
"errors"
77
"fmt"
8-
"io/ioutil"
98
"os"
109
"path/filepath"
1110
"strings"
1211
"sync"
12+
"time"
1313

1414
systemdDbus "github.com/coreos/go-systemd/dbus"
1515
systemdUtil "github.com/coreos/go-systemd/util"
1616
"github.com/godbus/dbus"
1717
"github.com/opencontainers/runc/libcontainer/cgroups"
1818
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
1919
"github.com/opencontainers/runc/libcontainer/configs"
20+
"github.com/Sirupsen/logrus"
2021
)
2122

2223
type Manager struct {
@@ -69,8 +70,13 @@ const (
6970
)
7071

7172
var (
72-
connLock sync.Mutex
73-
theConn *systemdDbus.Conn
73+
connLock sync.Mutex
74+
theConn *systemdDbus.Conn
75+
hasStartTransientUnit bool
76+
hasStartTransientSliceUnit bool
77+
hasTransientDefaultDependencies bool
78+
hasDelegateScope bool
79+
hasDelegateSlice bool
7480
)
7581

7682
func newProp(name string, units interface{}) systemdDbus.Property {
@@ -81,7 +87,128 @@ func newProp(name string, units interface{}) systemdDbus.Property {
8187
}
8288

8389
func UseSystemd() bool {
84-
return systemdUtil.IsRunningSystemd()
90+
if !systemdUtil.IsRunningSystemd() {
91+
return false
92+
}
93+
94+
connLock.Lock()
95+
defer connLock.Unlock()
96+
97+
if theConn == nil {
98+
var err error
99+
theConn, err = systemdDbus.New()
100+
if err != nil {
101+
return false
102+
}
103+
104+
// Assume we have StartTransientUnit
105+
hasStartTransientUnit = true
106+
107+
// But if we get UnknownMethod error we don't
108+
if _, err := theConn.StartTransientUnit("test.scope", "invalid", nil, nil); err != nil {
109+
if dbusError, ok := err.(dbus.Error); ok {
110+
if dbusError.Name == "org.freedesktop.DBus.Error.UnknownMethod" {
111+
hasStartTransientUnit = false
112+
return hasStartTransientUnit
113+
}
114+
}
115+
}
116+
117+
// Ensure the scope name we use doesn't exist. Use the Pid to
118+
// avoid collisions between multiple libcontainer users on a
119+
// single host.
120+
scope := fmt.Sprintf("libcontainer-%d-systemd-test-default-dependencies.scope", os.Getpid())
121+
testScopeExists := true
122+
for i := 0; i <= testScopeWait; i++ {
123+
if _, err := theConn.StopUnit(scope, "replace", nil); err != nil {
124+
if dbusError, ok := err.(dbus.Error); ok {
125+
if strings.Contains(dbusError.Name, "org.freedesktop.systemd1.NoSuchUnit") {
126+
testScopeExists = false
127+
break
128+
}
129+
}
130+
}
131+
time.Sleep(time.Millisecond)
132+
}
133+
134+
// Bail out if we can't kill this scope without testing for DefaultDependencies
135+
if testScopeExists {
136+
return hasStartTransientUnit
137+
}
138+
139+
// Assume StartTransientUnit on a scope allows DefaultDependencies
140+
hasTransientDefaultDependencies = true
141+
ddf := newProp("DefaultDependencies", false)
142+
if _, err := theConn.StartTransientUnit(scope, "replace", []systemdDbus.Property{ddf}, nil); err != nil {
143+
if dbusError, ok := err.(dbus.Error); ok {
144+
if strings.Contains(dbusError.Name, "org.freedesktop.DBus.Error.PropertyReadOnly") {
145+
hasTransientDefaultDependencies = false
146+
}
147+
}
148+
}
149+
150+
// Not critical because of the stop unit logic above.
151+
theConn.StopUnit(scope, "replace", nil)
152+
153+
// Assume StartTransientUnit on a scope allows Delegate
154+
hasDelegateScope = true
155+
dlScope := newProp("Delegate", true)
156+
if _, err := theConn.StartTransientUnit(scope, "replace", []systemdDbus.Property{dlScope}, nil); err != nil {
157+
if dbusError, ok := err.(dbus.Error); ok {
158+
if strings.Contains(dbusError.Name, "org.freedesktop.DBus.Error.PropertyReadOnly") {
159+
hasDelegateScope = false
160+
}
161+
}
162+
}
163+
164+
// Assume we have the ability to start a transient unit as a slice
165+
// This was broken until systemd v229, but has been back-ported on RHEL environments >= 219
166+
// For details, see: https://bugzilla.redhat.com/show_bug.cgi?id=1370299
167+
hasStartTransientSliceUnit = true
168+
169+
// To ensure simple clean-up, we create a slice off the root with no hierarchy
170+
slice := fmt.Sprintf("libcontainer_%d_systemd_test_default.slice", os.Getpid())
171+
if _, err := theConn.StartTransientUnit(slice, "replace", nil, nil); err != nil {
172+
if _, ok := err.(dbus.Error); ok {
173+
hasStartTransientSliceUnit = false
174+
}
175+
}
176+
177+
for i := 0; i <= testSliceWait; i++ {
178+
if _, err := theConn.StopUnit(slice, "replace", nil); err != nil {
179+
if dbusError, ok := err.(dbus.Error); ok {
180+
if strings.Contains(dbusError.Name, "org.freedesktop.systemd1.NoSuchUnit") {
181+
hasStartTransientSliceUnit = false
182+
break
183+
}
184+
}
185+
} else {
186+
break
187+
}
188+
time.Sleep(time.Millisecond)
189+
}
190+
191+
// Not critical because of the stop unit logic above.
192+
theConn.StopUnit(slice, "replace", nil)
193+
194+
// Assume StartTransientUnit on a slice allows Delegate
195+
hasDelegateSlice = true
196+
dlSlice := newProp("Delegate", true)
197+
if _, err := theConn.StartTransientUnit(slice, "replace", []systemdDbus.Property{dlSlice}, nil); err != nil {
198+
if dbusError, ok := err.(dbus.Error); ok {
199+
// Starting with systemd v237, Delegate is not even a property of slices anymore,
200+
// so the D-Bus call fails with "InvalidArgs" error.
201+
if strings.Contains(dbusError.Name, "org.freedesktop.DBus.Error.PropertyReadOnly") || strings.Contains(dbusError.Name, "org.freedesktop.DBus.Error.InvalidArgs") {
202+
hasDelegateSlice = false
203+
}
204+
}
205+
}
206+
207+
// Not critical because of the stop unit logic above.
208+
theConn.StopUnit(scope, "replace", nil)
209+
theConn.StopUnit(slice, "replace", nil)
210+
}
211+
return hasStartTransientUnit
85212
}
86213

87214
func (m *Manager) Apply(pid int) error {
@@ -117,6 +244,10 @@ func (m *Manager) Apply(pid int) error {
117244

118245
// if we create a slice, the parent is defined via a Wants=
119246
if strings.HasSuffix(unitName, ".slice") {
247+
// This was broken until systemd v229, but has been back-ported on RHEL environments >= 219
248+
if !hasStartTransientSliceUnit {
249+
return fmt.Errorf("systemd version does not support ability to start a slice as transient unit")
250+
}
120251
properties = append(properties, systemdDbus.PropWants(slice))
121252
} else {
122253
// otherwise, we use Slice=
@@ -128,8 +259,17 @@ func (m *Manager) Apply(pid int) error {
128259
properties = append(properties, newProp("PIDs", []uint32{uint32(pid)}))
129260
}
130261

131-
// This is only supported on systemd versions 218 and above.
132-
properties = append(properties, newProp("Delegate", true))
262+
// Check if we can delegate. This is only supported on systemd versions 218 and above.
263+
if strings.HasSuffix(unitName, ".slice") {
264+
if hasDelegateSlice {
265+
// systemd 237 and above no longer allows delegation on a slice
266+
properties = append(properties, newProp("Delegate", true))
267+
}
268+
} else {
269+
if hasDelegateScope {
270+
properties = append(properties, newProp("Delegate", true))
271+
}
272+
}
133273

134274
// Always enable accounting, this gets us the same behaviour as the fs implementation,
135275
// plus the kernel has some problems with joining the memory cgroup at a later time.
@@ -138,7 +278,10 @@ func (m *Manager) Apply(pid int) error {
138278
newProp("CPUAccounting", true),
139279
newProp("BlockIOAccounting", true))
140280

141-
properties = append(properties, newProp("DefaultDependencies", false))
281+
if hasTransientDefaultDependencies {
282+
properties = append(properties,
283+
newProp("DefaultDependencies", false))
284+
}
142285

143286
if c.Resources.Memory != 0 {
144287
properties = append(properties,
@@ -147,14 +290,21 @@ func (m *Manager) Apply(pid int) error {
147290

148291
if c.Resources.CpuShares != 0 {
149292
properties = append(properties,
150-
newProp("CPUShares", uint64(c.Resources.CpuShares)))
293+
newProp("CPUShares", c.Resources.CpuShares))
151294
}
152295

153296
// cpu.cfs_quota_us and cpu.cfs_period_us are controlled by systemd.
154297
if c.Resources.CpuQuota != 0 && c.Resources.CpuPeriod != 0 {
155-
cpuQuotaPerSecUSec := c.Resources.CpuQuota * 1000000 / c.Resources.CpuPeriod
298+
cpuQuotaPerSecUSec := c.Resources.CpuQuota*1000000 / c.Resources.CpuPeriod
299+
// systemd converts CPUQuotaPerSecUSec (microseconds per CPU second) to CPUQuota
300+
// (integer percentage of CPU) internally. This means that if a fractional percent of
301+
// CPU is indicated by Resources.CpuQuota, we need to round up to the nearest
302+
// 10ms (1% of a second) such that child cgroups can set the cpu.cfs_quota_us they expect.
303+
if cpuQuotaPerSecUSec%10000 != 0 {
304+
cpuQuotaPerSecUSec = ((cpuQuotaPerSecUSec / 10000) + 1) * 10000
305+
}
156306
properties = append(properties,
157-
newProp("CPUQuotaPerSecUSec", uint64(cpuQuotaPerSecUSec)))
307+
newProp("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec))
158308
}
159309

160310
if c.Resources.BlkioWeight != 0 {
@@ -170,17 +320,16 @@ func (m *Manager) Apply(pid int) error {
170320
}
171321
}
172322

173-
var err error
174-
theConn, err = systemdDbus.New()
175-
if err != nil {
176-
return err
177-
}
178-
179323
statusChan := make(chan string)
180-
if _, err := theConn.StartTransientUnit(unitName, "replace", properties, statusChan); err != nil && !isUnitExists(err) {
324+
if _, err := theConn.StartTransientUnit(unitName, "replace", properties, statusChan); err == nil {
325+
select {
326+
case <-statusChan:
327+
case <-time.After(time.Second):
328+
logrus.Warnf("Timed out while waiting for StartTransientUnit(%s) completion signal from dbus. Continuing...", unitName)
329+
}
330+
} else if !isUnitExists(err) {
181331
return err
182332
}
183-
<-statusChan
184333

185334
if err := joinCgroups(c, pid); err != nil {
186335
return err
@@ -208,14 +357,6 @@ func (m *Manager) Destroy() error {
208357
}
209358
m.mu.Lock()
210359
defer m.mu.Unlock()
211-
212-
if theConn == nil {
213-
var err error
214-
theConn, err = systemdDbus.New()
215-
if err != nil {
216-
return err
217-
}
218-
}
219360
theConn.StopUnit(getUnitName(m.Cgroups), "replace", nil)
220361
if err := cgroups.RemovePaths(m.Paths); err != nil {
221362
return err
@@ -231,15 +372,6 @@ func (m *Manager) GetPaths() map[string]string {
231372
return paths
232373
}
233374

234-
func writeFile(dir, file, data string) error {
235-
// Normally dir should not be empty, one case is that cgroup subsystem
236-
// is not mounted, we will get empty dir, and we want it fail here.
237-
if dir == "" {
238-
return fmt.Errorf("no such directory for %s", file)
239-
}
240-
return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700)
241-
}
242-
243375
func join(c *configs.Cgroup, subsystem string, pid int) (string, error) {
244376
path, err := getSubsystemPath(c, subsystem)
245377
if err != nil {
@@ -260,7 +392,6 @@ func joinCgroups(c *configs.Cgroup, pid int) error {
260392
switch name {
261393
case "name=systemd":
262394
// let systemd handle this
263-
break
264395
case "cpuset":
265396
path, err := getSubsystemPath(c, name)
266397
if err != nil && !cgroups.IsNotFound(err) {
@@ -270,7 +401,6 @@ func joinCgroups(c *configs.Cgroup, pid int) error {
270401
if err := s.ApplyDir(path, c, pid); err != nil {
271402
return err
272403
}
273-
break
274404
default:
275405
_, err := join(c, name, pid)
276406
if err != nil {
@@ -294,7 +424,7 @@ func joinCgroups(c *configs.Cgroup, pid int) error {
294424

295425
// systemd represents slice hierarchy using `-`, so we need to follow suit when
296426
// generating the path of slice. Essentially, test-a-b.slice becomes
297-
// test.slice/test-a.slice/test-a-b.slice.
427+
// /test.slice/test-a.slice/test-a-b.slice.
298428
func ExpandSlice(slice string) (string, error) {
299429
suffix := ".slice"
300430
// Name has to end with ".slice", but can't be just ".slice".
@@ -320,10 +450,9 @@ func ExpandSlice(slice string) (string, error) {
320450
}
321451

322452
// Append the component to the path and to the prefix.
323-
path += prefix + component + suffix + "/"
453+
path += "/" + prefix + component + suffix
324454
prefix += component + "-"
325455
}
326-
327456
return path, nil
328457
}
329458

@@ -333,7 +462,7 @@ func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) {
333462
return "", err
334463
}
335464

336-
initPath, err := cgroups.GetInitCgroupDir(subsystem)
465+
initPath, err := cgroups.GetInitCgroup(subsystem)
337466
if err != nil {
338467
return "", err
339468
}

libcontainer/cgroups/utils.go

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -235,8 +235,7 @@ func GetThisCgroupDir(subsystem string) (string, error) {
235235
return getControllerPath(subsystem, cgroups)
236236
}
237237

238-
func GetInitCgroupDir(subsystem string) (string, error) {
239-
238+
func GetInitCgroup(subsystem string) (string, error) {
240239
cgroups, err := ParseCgroupFile("/proc/1/cgroup")
241240
if err != nil {
242241
return "", err
@@ -245,6 +244,31 @@ func GetInitCgroupDir(subsystem string) (string, error) {
245244
return getControllerPath(subsystem, cgroups)
246245
}
247246

247+
func GetInitCgroupPath(subsystem string) (string, error) {
248+
cgroup, err := GetInitCgroup(subsystem)
249+
if err != nil {
250+
return "", err
251+
}
252+
253+
return getCgroupPathHelper(subsystem, cgroup)
254+
}
255+
256+
func getCgroupPathHelper(subsystem, cgroup string) (string, error) {
257+
mnt, root, err := FindCgroupMountpointAndRoot(subsystem)
258+
if err != nil {
259+
return "", err
260+
}
261+
262+
// This is needed for nested containers, because in /proc/self/cgroup we
263+
// see pathes from host, which don't exist in container.
264+
relCgroup, err := filepath.Rel(root, cgroup)
265+
if err != nil {
266+
return "", err
267+
}
268+
269+
return filepath.Join(mnt, relCgroup), nil
270+
}
271+
248272
func readProcsFile(dir string) ([]int, error) {
249273
f, err := os.Open(filepath.Join(dir, CgroupProcesses))
250274
if err != nil {

0 commit comments

Comments
 (0)