Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make one-shot stats faster #46448

Merged
merged 2 commits into from
Oct 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
39 changes: 34 additions & 5 deletions daemon/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"runtime"
"time"

"github.com/containerd/containerd/log"
"github.com/docker/docker/api/types"
"github.com/docker/docker/api/types/backend"
"github.com/docker/docker/api/types/versions"
Expand Down Expand Up @@ -43,6 +44,15 @@ func (daemon *Daemon) ContainerStats(ctx context.Context, prefixOrName string, c
})
}

// Get container stats directly if OneShot is set
if config.OneShot {
stats, err := daemon.GetContainerStats(ctr)
xinfengliu marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
return err
}
return json.NewEncoder(config.OutStream).Encode(stats)
}

outStream := config.OutStream
if config.Stream {
wf := ioutils.NewWriteFlusher(outStream)
Expand Down Expand Up @@ -148,15 +158,34 @@ func (daemon *Daemon) unsubscribeToContainerStats(c *container.Container, ch cha
func (daemon *Daemon) GetContainerStats(container *container.Container) (*types.StatsJSON, error) {
stats, err := daemon.stats(container)
if err != nil {
return nil, err
goto done
}

// Sample system CPU usage close to container usage to avoid
// noise in metric calculations.
// FIXME: move to containerd on Linux (not Windows)
stats.CPUStats.SystemUsage, stats.CPUStats.OnlineCPUs, err = getSystemCPUUsage()
if err != nil {
goto done
}

// We already have the network stats on Windows directly from HCS.
if !container.Config.NetworkDisabled && runtime.GOOS != "windows" {
if stats.Networks, err = daemon.getNetworkStats(container); err != nil {
return nil, err
}
stats.Networks, err = daemon.getNetworkStats(container)
}

return stats, nil
done:
switch err.(type) {
case nil:
return stats, nil
case errdefs.ErrConflict, errdefs.ErrNotFound:
// return empty stats containing only name and ID if not running or not found
return &types.StatsJSON{
Name: container.Name,
ID: container.ID,
}, nil
default:
log.G(context.TODO()).Errorf("collecting stats for container %s: %v", container.Name, err)
return nil, err
}
}
44 changes: 4 additions & 40 deletions daemon/stats/collector.go
Original file line number Diff line number Diff line change
@@ -1,15 +1,11 @@
package stats // import "github.com/docker/docker/daemon/stats"

import (
"bufio"
"context"
"sync"
"time"

"github.com/containerd/containerd/log"
"github.com/docker/docker/api/types"
"github.com/docker/docker/container"
"github.com/docker/docker/errdefs"
"github.com/moby/pubsub"
)

Expand All @@ -20,7 +16,6 @@ type Collector struct {
supervisor supervisor
interval time.Duration
publishers map[*container.Container]*pubsub.Publisher
bufReader *bufio.Reader
}

// NewCollector creates a stats collector that will poll the supervisor with the specified interval
Expand All @@ -29,7 +24,6 @@ func NewCollector(supervisor supervisor, interval time.Duration) *Collector {
interval: interval,
supervisor: supervisor,
publishers: make(map[*container.Container]*pubsub.Publisher),
bufReader: bufio.NewReaderSize(nil, 128),
}
s.cond = sync.NewCond(&s.m)
return s
Expand Down Expand Up @@ -108,45 +102,15 @@ func (s *Collector) Run() {

s.cond.L.Unlock()

onlineCPUs, err := s.getNumberOnlineCPUs()
if err != nil {
log.G(context.TODO()).Errorf("collecting system online cpu count: %v", err)
continue
}

for _, pair := range pairs {
stats, err := s.supervisor.GetContainerStats(pair.container)

switch err.(type) {
case nil:
// Sample system CPU usage close to container usage to avoid
// noise in metric calculations.
systemUsage, err := s.getSystemCPUUsage()
if err != nil {
log.G(context.TODO()).WithError(err).WithField("container_id", pair.container.ID).Errorf("collecting system cpu usage")
continue
}

// FIXME: move to containerd on Linux (not Windows)
stats.CPUStats.SystemUsage = systemUsage
stats.CPUStats.OnlineCPUs = onlineCPUs

pair.publisher.Publish(*stats)

case errdefs.ErrConflict, errdefs.ErrNotFound:
// publish empty stats containing only name and ID if not running or not found
pair.publisher.Publish(types.StatsJSON{
if err != nil {
stats = &types.StatsJSON{
Name: pair.container.Name,
ID: pair.container.ID,
})

default:
log.G(context.TODO()).Errorf("collecting stats for %s: %v", pair.container.ID, err)
pair.publisher.Publish(types.StatsJSON{
Name: pair.container.Name,
ID: pair.container.ID,
})
}
}
pair.publisher.Publish(*stats)
}

time.Sleep(s.interval)
Expand Down
75 changes: 0 additions & 75 deletions daemon/stats/collector_unix.go

This file was deleted.

12 changes: 0 additions & 12 deletions daemon/stats/collector_windows.go

This file was deleted.

61 changes: 61 additions & 0 deletions daemon/stats_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@
package daemon // import "github.com/docker/docker/daemon"

import (
"bufio"
"context"
"fmt"
"os"
"strconv"
"strings"

statsV1 "github.com/containerd/cgroups/v3/cgroup1/stats"
Expand Down Expand Up @@ -296,3 +300,60 @@ func (daemon *Daemon) getNetworkStats(c *container.Container) (map[string]types.

return stats, nil
}

const (
// The value comes from `C.sysconf(C._SC_CLK_TCK)`, and
// on Linux it's a constant which is safe to be hard coded,
// so we can avoid using cgo here. For details, see:
// https://github.com/containerd/cgroups/pull/12
clockTicksPerSecond = 100
nanoSecondsPerSecond = 1e9
)

// getSystemCPUUsage returns the host system's cpu usage in
// nanoseconds and number of online CPUs. An error is returned
// if the format of the underlying file does not match.
//
// Uses /proc/stat defined by POSIX. Looks for the cpu
// statistics line and then sums up the first seven fields
// provided. See `man 5 proc` for details on specific field
// information.
func getSystemCPUUsage() (cpuUsage uint64, cpuNum uint32, err error) {
f, err := os.Open("/proc/stat")
if err != nil {
return 0, 0, err
}
defer f.Close()

scanner := bufio.NewScanner(f)
for scanner.Scan() {
line := scanner.Text()
if len(line) < 4 || line[:3] != "cpu" {
break // Assume all cpu* records are at the front, like glibc https://github.com/bminor/glibc/blob/5d00c201b9a2da768a79ea8d5311f257871c0b43/sysdeps/unix/sysv/linux/getsysstats.c#L108-L135
}
if line[3] == ' ' {
parts := strings.Fields(line)
if len(parts) < 8 {
return 0, 0, fmt.Errorf("invalid number of cpu fields")
}
var totalClockTicks uint64
for _, i := range parts[1:8] {
v, err := strconv.ParseUint(i, 10, 64)
if err != nil {
return 0, 0, fmt.Errorf("Unable to convert value %s to int: %w", i, err)
}
totalClockTicks += v
}
cpuUsage = (totalClockTicks * nanoSecondsPerSecond) /
clockTicksPerSecond
}
if '0' <= line[3] && line[3] <= '9' {
cpuNum++
}
}

if err := scanner.Err(); err != nil {
return 0, 0, fmt.Errorf("error scanning '/proc/stat' file: %w", err)
}
return
}
8 changes: 8 additions & 0 deletions daemon/stats_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,11 @@ func (daemon *Daemon) stats(c *container.Container) (*types.StatsJSON, error) {
func (daemon *Daemon) getNetworkStats(c *container.Container) (map[string]types.NetworkStats, error) {
return make(map[string]types.NetworkStats), nil
}

// getSystemCPUUsage returns the host system's cpu usage in
// nanoseconds and number of online CPUs. An error is returned
// if the format of the underlying file does not match.
// This is a no-op on Windows.
func getSystemCPUUsage() (uint64, uint32, error) {
return 0, 0, nil
}