Skip to content

Commit

Permalink
balancer/weightedroundrobin: add load balancing policy (A58) (#6241)
Browse files Browse the repository at this point in the history
  • Loading branch information
dfawley committed May 8, 2023
1 parent c44f77e commit 5c4bee5
Show file tree
Hide file tree
Showing 10 changed files with 1,545 additions and 21 deletions.
532 changes: 532 additions & 0 deletions balancer/weightedroundrobin/balancer.go

Large diffs are not rendered by default.

713 changes: 713 additions & 0 deletions balancer/weightedroundrobin/balancer_test.go

Large diffs are not rendered by default.

60 changes: 60 additions & 0 deletions balancer/weightedroundrobin/config.go
@@ -0,0 +1,60 @@
/*
*
* Copyright 2023 gRPC authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/

package weightedroundrobin

import (
"time"

"google.golang.org/grpc/serviceconfig"
)

type lbConfig struct {
serviceconfig.LoadBalancingConfig `json:"-"`

// Whether to enable out-of-band utilization reporting collection from the
// endpoints. By default, per-request utilization reporting is used.
EnableOOBLoadReport bool `json:"enableOobLoadReport,omitempty"`

// Load reporting interval to request from the server. Note that the
// server may not provide reports as frequently as the client requests.
// Used only when enable_oob_load_report is true. Default is 10 seconds.
OOBReportingPeriod time.Duration `json:"oobReportingPeriod,omitempty"`

// A given endpoint must report load metrics continuously for at least this
// long before the endpoint weight will be used. This avoids churn when
// the set of endpoint addresses changes. Takes effect both immediately
// after we establish a connection to an endpoint and after
// weight_expiration_period has caused us to stop using the most recent
// load metrics. Default is 10 seconds.
BlackoutPeriod time.Duration `json:"blackoutPeriod,omitempty"`

// If a given endpoint has not reported load metrics in this long,
// then we stop using the reported weight. This ensures that we do
// not continue to use very stale weights. Once we stop using a stale
// value, if we later start seeing fresh reports again, the
// blackout_period applies. Defaults to 3 minutes.
WeightExpirationPeriod time.Duration `json:"weightExpirationPeriod,omitempty"`

// How often endpoint weights are recalculated. Default is 1 second.
WeightUpdatePeriod time.Duration `json:"weightUpdatePeriod,omitempty"`

// The multiplier used to adjust endpoint weights with the error rate
// calculated as eps/qps. Default is 1.0.
ErrorUtilizationPenalty float64 `json:"errorUtilizationPenalty,omitempty"`
}
44 changes: 44 additions & 0 deletions balancer/weightedroundrobin/internal/internal.go
@@ -0,0 +1,44 @@
/*
*
* Copyright 2023 gRPC authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/

// Package internal allows for easier testing of the weightedroundrobin
// package.
package internal

import (
"time"
)

// AllowAnyWeightUpdatePeriod permits any setting of WeightUpdatePeriod for
// testing. Normally a minimum of 100ms is applied.
var AllowAnyWeightUpdatePeriod bool

// LBConfig allows tests to produce a JSON form of the config from the struct
// instead of using a string.
type LBConfig struct {
EnableOOBLoadReport *bool `json:"enableOobLoadReport,omitempty"`
OOBReportingPeriod *time.Duration `json:"oobReportingPeriod,omitempty"`
BlackoutPeriod *time.Duration `json:"blackoutPeriod,omitempty"`
WeightExpirationPeriod *time.Duration `json:"weightExpirationPeriod,omitempty"`
WeightUpdatePeriod *time.Duration `json:"weightUpdatePeriod,omitempty"`
ErrorUtilizationPenalty *float64 `json:"errorUtilizationPenalty,omitempty"`
}

// TimeNow can be overridden by tests to return a different value for the
// current time.
var TimeNow = time.Now
34 changes: 34 additions & 0 deletions balancer/weightedroundrobin/logging.go
@@ -0,0 +1,34 @@
/*
*
* Copyright 2023 gRPC authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/

package weightedroundrobin

import (
"fmt"

"google.golang.org/grpc/grpclog"
internalgrpclog "google.golang.org/grpc/internal/grpclog"
)

const prefix = "[%p] "

var logger = grpclog.Component("weighted-round-robin")

func prefixLogger(p *wrrBalancer) *internalgrpclog.PrefixLogger {
return internalgrpclog.NewPrefixLogger(logger, fmt.Sprintf(prefix, p))
}
138 changes: 138 additions & 0 deletions balancer/weightedroundrobin/scheduler.go
@@ -0,0 +1,138 @@
/*
*
* Copyright 2023 gRPC authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/

package weightedroundrobin

import (
"math"
)

type scheduler interface {
nextIndex() int
}

// newScheduler uses scWeights to create a new scheduler for selecting subconns
// in a picker. It will return a round robin implementation if at least
// len(scWeights)-1 are zero or there is only a single subconn, otherwise it
// will return an Earliest Deadline First (EDF) scheduler implementation that
// selects the subchannels according to their weights.
func newScheduler(scWeights []float64, inc func() uint32) scheduler {
n := len(scWeights)
if n == 0 {
return nil
}
if n == 1 {
return &rrScheduler{numSCs: 1, inc: inc}
}
sum := float64(0)
numZero := 0
max := float64(0)
for _, w := range scWeights {
sum += w
if w > max {
max = w
}
if w == 0 {
numZero++
}
}
if numZero >= n-1 {
return &rrScheduler{numSCs: uint32(n), inc: inc}
}
unscaledMean := sum / float64(n-numZero)
scalingFactor := maxWeight / max
mean := uint16(math.Round(scalingFactor * unscaledMean))

weights := make([]uint16, n)
allEqual := true
for i, w := range scWeights {
if w == 0 {
// Backends with weight = 0 use the mean.
weights[i] = mean
} else {
scaledWeight := uint16(math.Round(scalingFactor * w))
weights[i] = scaledWeight
if scaledWeight != mean {
allEqual = false
}
}
}

if allEqual {
return &rrScheduler{numSCs: uint32(n), inc: inc}
}

logger.Infof("using edf scheduler with weights: %v", weights)
return &edfScheduler{weights: weights, inc: inc}
}

const maxWeight = math.MaxUint16

// edfScheduler implements EDF using the same algorithm as grpc-c++ here:
//
// https://github.com/grpc/grpc/blob/master/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.cc
type edfScheduler struct {
inc func() uint32
weights []uint16
}

// Returns the index in s.weights for the picker to choose.
func (s *edfScheduler) nextIndex() int {
const offset = maxWeight / 2

for {
idx := uint64(s.inc())

// The sequence number (idx) is split in two: the lower %n gives the
// index of the backend, and the rest gives the number of times we've
// iterated through all backends. `generation` is used to
// deterministically decide whether we pick or skip the backend on this
// iteration, in proportion to the backend's weight.

backendIndex := idx % uint64(len(s.weights))
generation := idx / uint64(len(s.weights))
weight := uint64(s.weights[backendIndex])

// We pick a backend `weight` times per `maxWeight` generations. The
// multiply and modulus ~evenly spread out the picks for a given
// backend between different generations. The offset by `backendIndex`
// helps to reduce the chance of multiple consecutive non-picks: if we
// have two consecutive backends with an equal, say, 80% weight of the
// max, with no offset we would see 1/5 generations that skipped both.
// TODO(b/190488683): add test for offset efficacy.
mod := uint64(weight*generation+backendIndex*offset) % maxWeight

if mod < maxWeight-weight {
continue
}
return int(backendIndex)
}
}

// A simple RR scheduler to use for fallback when fewer than two backends have
// non-zero weights, or all backends have the the same weight, or when only one
// subconn exists.
type rrScheduler struct {
inc func() uint32
numSCs uint32
}

func (s *rrScheduler) nextIndex() int {
idx := s.inc()
return int(idx % s.numSCs)
}
23 changes: 9 additions & 14 deletions balancer/weightedroundrobin/weightedroundrobin.go
Expand Up @@ -16,16 +16,21 @@
*
*/

// Package weightedroundrobin defines a weighted roundrobin balancer.
// Package weightedroundrobin provides an implementation of the weighted round
// robin LB policy, as defined in [gRFC A58].
//
// # Experimental
//
// Notice: This package is EXPERIMENTAL and may be changed or removed in a
// later release.
//
// [gRFC A58]: https://github.com/grpc/proposal/blob/master/A58-client-side-weighted-round-robin-lb-policy.md
package weightedroundrobin

import (
"google.golang.org/grpc/resolver"
)

// Name is the name of weighted_round_robin balancer.
const Name = "weighted_round_robin"

// attributeKey is the type used as the key to store AddrInfo in the
// BalancerAttributes field of resolver.Address.
type attributeKey struct{}
Expand All @@ -44,23 +49,13 @@ func (a AddrInfo) Equal(o interface{}) bool {

// SetAddrInfo returns a copy of addr in which the BalancerAttributes field is
// updated with addrInfo.
//
// # Experimental
//
// Notice: This API is EXPERIMENTAL and may be changed or removed in a
// later release.
func SetAddrInfo(addr resolver.Address, addrInfo AddrInfo) resolver.Address {
addr.BalancerAttributes = addr.BalancerAttributes.WithValue(attributeKey{}, addrInfo)
return addr
}

// GetAddrInfo returns the AddrInfo stored in the BalancerAttributes field of
// addr.
//
// # Experimental
//
// Notice: This API is EXPERIMENTAL and may be changed or removed in a
// later release.
func GetAddrInfo(addr resolver.Address) AddrInfo {
v := addr.BalancerAttributes.Value(attributeKey{})
ai, _ := v.(AddrInfo)
Expand Down
7 changes: 7 additions & 0 deletions internal/grpcrand/grpcrand.go
Expand Up @@ -72,3 +72,10 @@ func Uint64() uint64 {
defer mu.Unlock()
return r.Uint64()
}

// Uint32 implements rand.Uint32 on the grpcrand global source.
func Uint32() uint32 {
mu.Lock()
defer mu.Unlock()
return r.Uint32()
}
13 changes: 7 additions & 6 deletions orca/producer.go
Expand Up @@ -199,12 +199,13 @@ func (p *producer) run(ctx context.Context, done chan struct{}, interval time.Du
// Unimplemented; do not retry.
logger.Error("Server doesn't support ORCA OOB load reporting protocol; not listening for load reports.")
return
case status.Code(err) == codes.Unavailable:
// TODO: this code should ideally log an error, too, but for now we
// receive this code when shutting down the ClientConn. Once we
// can determine the state or ensure the producer is stopped before
// the stream ends, we can log an error when it's not a natural
// shutdown.
case status.Code(err) == codes.Unavailable, status.Code(err) == codes.Canceled:
// TODO: these codes should ideally log an error, too, but for now
// we receive them when shutting down the ClientConn (Unavailable
// if the stream hasn't started yet, and Canceled if it happens
// mid-stream). Once we can determine the state or ensure the
// producer is stopped before the stream ends, we can log an error
// when it's not a natural shutdown.
default:
// Log all other errors.
logger.Error("Received unexpected stream error:", err)
Expand Down
2 changes: 1 addition & 1 deletion xds/internal/balancer/clusterimpl/picker.go
Expand Up @@ -160,7 +160,7 @@ func (d *picker) Pick(info balancer.PickInfo) (balancer.PickResult, error) {
d.loadStore.CallFinished(lIDStr, info.Err)

load, ok := info.ServerLoad.(*v3orcapb.OrcaLoadReport)
if !ok {
if !ok || load == nil {
return
}
d.loadStore.CallServerLoad(lIDStr, serverLoadCPUName, load.CpuUtilization)
Expand Down

0 comments on commit 5c4bee5

Please sign in to comment.