Skip to content

Commit

Permalink
libct/cg: add CFS bandwidth burst for CPU
Browse files Browse the repository at this point in the history
Burstable CFS controller is introduced in Linux 5.14. This helps with
parallel workloads that might be bursty. They can get throttled even
when their average utilization is under quota. And they may be latency
sensitive at the same time so that throttling them is undesired.

This feature borrows time now against the future underrun, at the cost
of increased interference against the other system users, by introducing
cfs_burst_us into CFS bandwidth control to enact the cap on unused
bandwidth accumulation, which will then used additionally for burst.

The patch adds the support/control for CFS bandwidth burst.

runtime-spec: opencontainers/runtime-spec#1120

Co-authored-by: Akihiro Suda <suda.kyoto@gmail.com>
Co-authored-by: Nadeshiko Manju <lizheao940510@gmail.com>
Signed-off-by: Kailun Qin <kailun.qin@intel.com>
  • Loading branch information
3 people committed Aug 10, 2023
1 parent a698552 commit 3ead5f2
Show file tree
Hide file tree
Showing 12 changed files with 108 additions and 4 deletions.
1 change: 1 addition & 0 deletions contrib/completions/bash/runc
Original file line number Diff line number Diff line change
Expand Up @@ -735,6 +735,7 @@ _runc_update() {
--blkio-weight
--cpu-period
--cpu-quota
--cpu-burst
--cpu-rt-period
--cpu-rt-runtime
--cpu-share
Expand Down
1 change: 0 additions & 1 deletion docs/spec-conformance.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ Spec version | Feature | PR
v1.0.0 | `SCMP_ARCH_PARISC` | Unplanned, due to lack of users
v1.0.0 | `SCMP_ARCH_PARISC64` | Unplanned, due to lack of users
v1.0.2 | `.linux.personality` | [#3126](https://github.com/opencontainers/runc/pull/3126)
v1.1.0 | `.linux.resources.cpu.burst` | [#3749](https://github.com/opencontainers/runc/pull/3749)
v1.1.0 | `SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV` | [#3862](https://github.com/opencontainers/runc/pull/3862)
v1.1.0 | time namespaces | [#3876](https://github.com/opencontainers/runc/pull/3876)
v1.1.0 | rsvd hugetlb cgroup | TODO ([#3859](https://github.com/opencontainers/runc/issues/3859))
Expand Down
27 changes: 27 additions & 0 deletions libcontainer/cgroups/fs/cpu.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,28 @@ func (s *CpuGroup) Set(path string, r *configs.Resources) error {
period = ""
}
}

var burst string
if r.CpuBurst != 0 {
burst = strconv.FormatUint(r.CpuBurst, 10)
if err := cgroups.WriteFile(path, "cpu.cfs_burst_us", burst); err != nil {
// Sometimes when the burst to be set is larger
// than the current one, it is rejected by the kernel
// (EINVAL) as old_quota/new_burst exceeds the parent
// cgroup quota limit. If this happens and the quota is
// going to be set, ignore the error for now and retry
// after setting the quota.
if !errors.Is(err, unix.ENOENT) {
// this is a special trick for burst feature, the current systemd and low version of kernel will not support it.
// So, an `no such file or directory` error would be raised, and we can ignore it .
if !errors.Is(err, unix.EINVAL) || r.CpuQuota == 0 {
return err
}
}
} else {
burst = ""
}
}
if r.CpuQuota != 0 {
if err := cgroups.WriteFile(path, "cpu.cfs_quota_us", strconv.FormatInt(r.CpuQuota, 10)); err != nil {
return err
Expand All @@ -93,6 +115,11 @@ func (s *CpuGroup) Set(path string, r *configs.Resources) error {
return err
}
}
if burst != "" {
if err := cgroups.WriteFile(path, "cpu.cfs_burst_us", burst); err != nil {
return err
}
}
}

if r.CPUIdle != nil {
Expand Down
12 changes: 12 additions & 0 deletions libcontainer/cgroups/fs/cpu_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ func TestCpuSetBandWidth(t *testing.T) {
const (
quotaBefore = 8000
quotaAfter = 5000
burstBefore = 2000
burstAfter = 1000
periodBefore = 10000
periodAfter = 7000
rtRuntimeBefore = 8000
Expand All @@ -55,13 +57,15 @@ func TestCpuSetBandWidth(t *testing.T) {

writeFileContents(t, path, map[string]string{
"cpu.cfs_quota_us": strconv.Itoa(quotaBefore),
"cpu.cfs_burst_us": strconv.Itoa(burstBefore),
"cpu.cfs_period_us": strconv.Itoa(periodBefore),
"cpu.rt_runtime_us": strconv.Itoa(rtRuntimeBefore),
"cpu.rt_period_us": strconv.Itoa(rtPeriodBefore),
})

r := &configs.Resources{
CpuQuota: quotaAfter,
CpuBurst: burstAfter,
CpuPeriod: periodAfter,
CpuRtRuntime: rtRuntimeAfter,
CpuRtPeriod: rtPeriodAfter,
Expand All @@ -79,6 +83,14 @@ func TestCpuSetBandWidth(t *testing.T) {
t.Fatal("Got the wrong value, set cpu.cfs_quota_us failed.")
}

burst, err := fscommon.GetCgroupParamUint(path, "cpu.cfs_burst_us")
if err != nil {
t.Fatal(err)
}
if burst != burstAfter {
t.Fatal("Got the wrong value, set cpu.cfs_burst_us failed.")
}

period, err := fscommon.GetCgroupParamUint(path, "cpu.cfs_period_us")
if err != nil {
t.Fatal(err)
Expand Down
2 changes: 1 addition & 1 deletion libcontainer/cgroups/fs/fs.go
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ func (m *Manager) Set(r *configs.Resources) error {
if path == "" {
// We never created a path for this cgroup, so we cannot set
// limits for it (though we have already tried at this point).
return fmt.Errorf("cannot set %s limit: container could not join or create cgroup", sys.Name())
return fmt.Errorf("cannot set %s limit: container could not join or create cgroup, and the error is %w", sys.Name(), err)
}
return err
}
Expand Down
27 changes: 26 additions & 1 deletion libcontainer/cgroups/fs2/cpu.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,19 @@ package fs2

import (
"bufio"
"errors"
"os"
"strconv"

"golang.org/x/sys/unix"

"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
"github.com/opencontainers/runc/libcontainer/configs"
)

func isCpuSet(r *configs.Resources) bool {
return r.CpuWeight != 0 || r.CpuQuota != 0 || r.CpuPeriod != 0 || r.CPUIdle != nil
return r.CpuWeight != 0 || r.CpuQuota != 0 || r.CpuPeriod != 0 || r.CPUIdle != nil || r.CpuBurst != 0
}

func setCpu(dirPath string, r *configs.Resources) error {
Expand All @@ -32,6 +35,23 @@ func setCpu(dirPath string, r *configs.Resources) error {
}
}

var burst string
if r.CpuBurst != 0 {
burst = strconv.FormatUint(r.CpuBurst, 10)
if err := cgroups.WriteFile(dirPath, "cpu.max.burst", burst); err != nil {
// Sometimes when the burst to be set is larger
// than the current one, it is rejected by the kernel
// (EINVAL) as old_quota/new_burst exceeds the parent
// cgroup quota limit. If this happens and the quota is
// going to be set, ignore the error for now and retry
// after setting the quota.
if !errors.Is(err, unix.EINVAL) || r.CpuQuota == 0 {
return err
}
} else {
burst = ""
}
}
if r.CpuQuota != 0 || r.CpuPeriod != 0 {
str := "max"
if r.CpuQuota > 0 {
Expand All @@ -47,6 +67,11 @@ func setCpu(dirPath string, r *configs.Resources) error {
if err := cgroups.WriteFile(dirPath, "cpu.max", str); err != nil {
return err
}
if burst != "" {
if err := cgroups.WriteFile(dirPath, "cpu.max.burst", burst); err != nil {
return err
}
}
}

return nil
Expand Down
3 changes: 3 additions & 0 deletions libcontainer/configs/cgroup_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,9 @@ type Resources struct {
// CPU hardcap limit (in usecs). Allowed cpu time in a given period.
CpuQuota int64 `json:"cpu_quota"`

// CPU hardcap burst limit (in usecs). Allowed accumulated cpu time additionally for burst in a given period.
CpuBurst uint64 `json:"cpu_burst"` //nolint:revive

// CPU period to be used for hardcapping (in usecs). 0 to use system default.
CpuPeriod uint64 `json:"cpu_period"`

Expand Down
3 changes: 3 additions & 0 deletions libcontainer/specconv/spec_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -756,6 +756,9 @@ func CreateCgroupConfig(opts *CreateOpts, defaultDevs []*devices.Device) (*confi
if r.CPU.Quota != nil {
c.Resources.CpuQuota = *r.CPU.Quota
}
if r.CPU.Burst != nil {
c.Resources.CpuBurst = *r.CPU.Burst
}
if r.CPU.Period != nil {
c.Resources.CpuPeriod = *r.CPU.Period
}
Expand Down
4 changes: 4 additions & 0 deletions man/runc-update.8.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ In case **-r** is used, the JSON format is like this:
"cpu": {
"shares": 0,
"quota": 0,
"burst": 0,
"period": 0,
"realtimeRuntime": 0,
"realtimePeriod": 0,
Expand All @@ -53,6 +54,9 @@ stdin. If this option is used, all other options are ignored.
**--cpu-quota** _num_
: Set CPU usage limit within a given period (in microseconds).

**--cpu-burst** _num_
: Set CPU burst limit within a given period (in microseconds).

**--cpu-rt-period** _num_
: Set CPU realtime period to be used for hardcapping (in microseconds).

Expand Down
16 changes: 15 additions & 1 deletion tests/integration/helpers.bash
Original file line number Diff line number Diff line change
Expand Up @@ -260,11 +260,16 @@ function get_cgroup_value() {
# Helper to check a if value in a cgroup file matches the expected one.
function check_cgroup_value() {
local current
local cgroup
cgroup="$(get_cgroup_path "$1")"
if [ ! -f "$cgroup/$1" ]; then
skip "$cgroup/$1 does not exist"
fi
current="$(get_cgroup_value "$1")"
local expected=$2

echo "current $current !? $expected"
[ "$current" = "$expected" ]
[ "$current" = "$expected" ] || [ "$current" = "$((expected / 1000))" ]
}

# Helper to check a value in systemd.
Expand Down Expand Up @@ -310,6 +315,15 @@ function check_cpu_quota() {
check_systemd_value "CPUQuotaPeriodUSec" $sd_period $sd_infinity
}

function check_cpu_burst() {
local burst=$1
if [ -v CGROUP_V2 ]; then
check_cgroup_value "cpu.max.burst" "$burst"
else
check_cgroup_value "cpu.cfs_burst_us" "$burst"
fi
}

# Works for cgroup v1 and v2, accepts v1 shares as an argument.
function check_cpu_shares() {
local shares=$1
Expand Down
3 changes: 3 additions & 0 deletions tests/integration/update.bats
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,9 @@ EOF
runc update test_update --cpu-share 200
[ "$status" -eq 0 ]
check_cpu_shares 200
runc update test_update --cpu-period 900000 --cpu-burst 500000
[ "$status" -eq 0 ]
check_cpu_burst 500000

# Revert to the test initial value via json on stding
runc update -r - test_update <<EOF
Expand Down
13 changes: 13 additions & 0 deletions update.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ The accepted format is as follow (unchanged values can be omitted):
"cpu": {
"shares": 0,
"quota": 0,
"burst": 0,
"period": 0,
"realtimeRuntime": 0,
"realtimePeriod": 0,
Expand Down Expand Up @@ -73,6 +74,10 @@ other options are ignored.
Name: "cpu-quota",
Usage: "CPU CFS hardcap limit (in usecs). Allowed cpu time in a given period",
},
cli.StringFlag{
Name: "cpu-burst",
Usage: "CPU CFS hardcap burst limit (in usecs). Allowed accumulated cpu time additionally for burst a given period",
},
cli.StringFlag{
Name: "cpu-share",
Usage: "CPU shares (relative weight vs. other containers)",
Expand Down Expand Up @@ -153,6 +158,7 @@ other options are ignored.
CPU: &specs.LinuxCPU{
Shares: u64Ptr(0),
Quota: i64Ptr(0),
Burst: u64Ptr(0),
Period: u64Ptr(0),
RealtimeRuntime: i64Ptr(0),
RealtimePeriod: u64Ptr(0),
Expand Down Expand Up @@ -209,6 +215,7 @@ other options are ignored.
opt string
dest *uint64
}{
{"cpu-burst", r.CPU.Burst},
{"cpu-period", r.CPU.Period},
{"cpu-rt-period", r.CPU.RealtimePeriod},
{"cpu-share", r.CPU.Shares},
Expand Down Expand Up @@ -298,6 +305,12 @@ other options are ignored.
}
}

b := *r.CPU.Burst
if config.Cgroups.Resources.CpuPeriod == 0 && b != 0 {
return errors.New("period is not set when setting burst")
}
config.Cgroups.Resources.CpuBurst = b

config.Cgroups.Resources.CpuShares = *r.CPU.Shares
// CpuWeight is used for cgroupv2 and should be converted
config.Cgroups.Resources.CpuWeight = cgroups.ConvertCPUSharesToCgroupV2Value(*r.CPU.Shares)
Expand Down

0 comments on commit 3ead5f2

Please sign in to comment.