Skip to content

Commit

Permalink
[WRR] Backport 1.56: Prefer application_utilization to cpu_utilization (
Browse files Browse the repository at this point in the history
#33378)

Backport of #33355

Co-authored-by: Yousuk Seung <ysseung@google.com>
  • Loading branch information
veblush and yousukseung committed Jun 9, 2023
1 parent fc64109 commit d9d47b5
Show file tree
Hide file tree
Showing 21 changed files with 424 additions and 117 deletions.
12 changes: 6 additions & 6 deletions CMakeLists.txt

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions bazel/grpc_deps.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -502,11 +502,11 @@ def grpc_deps():
if "com_github_cncf_udpa" not in native.existing_rules():
http_archive(
name = "com_github_cncf_udpa",
sha256 = "aef36c29bd0ef95509f7f52693dbdafe4a2c2c5d1eb406bf68e6364a0d12e11b",
strip_prefix = "xds-4003588d1b747e37e911baa5a9c1c07fde4ca518",
sha256 = "0d33b83f8c6368954e72e7785539f0d272a8aba2f6e2e336ed15fd1514bc9899",
strip_prefix = "xds-e9ce68804cb4e64cab5a52e3c8baf840d4ff87b7",
urls = [
"https://storage.googleapis.com/grpc-bazel-mirror/github.com/cncf/xds/archive/4003588d1b747e37e911baa5a9c1c07fde4ca518.tar.gz",
"https://github.com/cncf/xds/archive/4003588d1b747e37e911baa5a9c1c07fde4ca518.tar.gz",
"https://storage.googleapis.com/grpc-bazel-mirror/github.com/cncf/xds/archive/e9ce68804cb4e64cab5a52e3c8baf840d4ff87b7.tar.gz",
"https://github.com/cncf/xds/archive/e9ce68804cb4e64cab5a52e3c8baf840d4ff87b7.tar.gz",
],
)

Expand Down
8 changes: 4 additions & 4 deletions build_autogenerated.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions include/grpcpp/ext/call_metric_recorder.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,14 @@ class CallMetricRecorder {
/// Values outside of the valid range [0, 1] are ignored.
virtual CallMetricRecorder& RecordMemoryUtilizationMetric(double value) = 0;

/// Records a call metric measurement for application specific utilization.
/// Multiple calls to this method will override the stored value.
/// Values may be larger than 1.0 when the usage exceeds the reporter
/// dependent notion of soft limits.
/// Values outside of the valid range [0, infy] are ignored.
virtual CallMetricRecorder& RecordApplicationUtilizationMetric(
double value) = 0;

/// Records a call metric measurement for queries per second.
/// Multiple calls to this method will override the stored value.
/// Values outside of the valid range [0, infy) are ignored.
Expand Down
6 changes: 6 additions & 0 deletions include/grpcpp/ext/server_metric_recorder.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,10 @@ class ServerMetricRecorder {
/// Values outside of the valid range are rejected.
/// Overrides the stored value when called again with a valid value.
void SetMemoryUtilization(double value);
/// Records the application specific utilization in the range [0, infy].
/// Values outside of the valid range are rejected.
/// Overrides the stored value when called again with a valid value.
void SetApplicationUtilization(double value);
/// Records number of queries per second to the server in the range [0, infy).
/// Values outside of the valid range are rejected.
/// Overrides the stored value when called again with a valid value.
Expand All @@ -77,6 +81,8 @@ class ServerMetricRecorder {
void ClearCpuUtilization();
/// Clears the server memory utilization if recorded.
void ClearMemoryUtilization();
/// Clears the application specific utilization if recorded.
void ClearApplicationUtilization();
/// Clears number of queries per second to the server if recorded.
void ClearQps();
/// Clears number of errors per second to the server if recorded.
Expand Down
5 changes: 5 additions & 0 deletions src/core/ext/filters/backend_metrics/backend_metric_filter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,11 @@ absl::optional<std::string> BackendMetricFilter::MaybeSerializeBackendMetrics(
data.mem_utilization);
has_data = true;
}
if (data.application_utilization != -1) {
xds_data_orca_v3_OrcaLoadReport_set_application_utilization(
response, data.application_utilization);
has_data = true;
}
if (data.qps != -1) {
xds_data_orca_v3_OrcaLoadReport_set_rps_fractional(response, data.qps);
has_data = true;
Expand Down
2 changes: 2 additions & 0 deletions src/core/ext/filters/client_channel/backend_metric.cc
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ const BackendMetricData* ParseBackendMetricData(
xds_data_orca_v3_OrcaLoadReport_cpu_utilization(msg);
backend_metric_data->mem_utilization =
xds_data_orca_v3_OrcaLoadReport_mem_utilization(msg);
backend_metric_data->application_utilization =
xds_data_orca_v3_OrcaLoadReport_application_utilization(msg);
backend_metric_data->qps =
xds_data_orca_v3_OrcaLoadReport_rps_fractional(msg);
backend_metric_data->eps = xds_data_orca_v3_OrcaLoadReport_eps(msg);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ struct BackendMetricData {
/// Memory utilization expressed as a fraction of available memory
/// resources.
double mem_utilization = -1;
/// Application specific utilization expressed as a fraction of available
/// resources.
double application_utilization = -1;
/// Total queries per second being served by the backend across all services.
double qps = -1;
/// Total errors per second reported by the backend across all services.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ class WeightedRoundRobin : public LoadBalancingPolicy {
: wrr_(std::move(wrr)), key_(std::move(key)) {}
~AddressWeight() override;

void MaybeUpdateWeight(double qps, double eps, double cpu_utilization,
void MaybeUpdateWeight(double qps, double eps, double utilization,
float error_utilization_penalty);

float GetWeight(Timestamp now, Duration weight_expiration_period,
Expand Down Expand Up @@ -398,23 +398,23 @@ WeightedRoundRobin::AddressWeight::~AddressWeight() {
}

void WeightedRoundRobin::AddressWeight::MaybeUpdateWeight(
double qps, double eps, double cpu_utilization,
double qps, double eps, double utilization,
float error_utilization_penalty) {
// Compute weight.
float weight = 0;
if (qps > 0 && cpu_utilization > 0) {
if (qps > 0 && utilization > 0) {
double penalty = 0.0;
if (eps > 0 && error_utilization_penalty > 0) {
penalty = eps / qps * error_utilization_penalty;
}
weight = qps / (cpu_utilization + penalty);
weight = qps / (utilization + penalty);
}
if (weight == 0) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) {
gpr_log(GPR_INFO,
"[WRR %p] subchannel %s: qps=%f, eps=%f, cpu_utilization=%f: "
"[WRR %p] subchannel %s: qps=%f, eps=%f, utilization=%f: "
"error_util_penalty=%f, weight=%f (not updating)",
wrr_.get(), key_.c_str(), qps, eps, cpu_utilization,
wrr_.get(), key_.c_str(), qps, eps, utilization,
error_utilization_penalty, weight);
}
return;
Expand All @@ -424,10 +424,10 @@ void WeightedRoundRobin::AddressWeight::MaybeUpdateWeight(
MutexLock lock(&mu_);
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) {
gpr_log(GPR_INFO,
"[WRR %p] subchannel %s: qps=%f, eps=%f, cpu_utilization=%f "
"[WRR %p] subchannel %s: qps=%f, eps=%f, utilization=%f "
"error_util_penalty=%f : setting weight=%f weight_=%f now=%s "
"last_update_time_=%s non_empty_since_=%s",
wrr_.get(), key_.c_str(), qps, eps, cpu_utilization,
wrr_.get(), key_.c_str(), qps, eps, utilization,
error_utilization_penalty, weight, weight_, now.ToString().c_str(),
last_update_time_.ToString().c_str(),
non_empty_since_.ToString().c_str());
Expand Down Expand Up @@ -483,14 +483,16 @@ void WeightedRoundRobin::Picker::SubchannelCallTracker::Finish(
args.backend_metric_accessor->GetBackendMetricData();
double qps = 0;
double eps = 0;
double cpu_utilization = 0;
double utilization = 0;
if (backend_metric_data != nullptr) {
qps = backend_metric_data->qps;
eps = backend_metric_data->eps;
cpu_utilization = backend_metric_data->cpu_utilization;
utilization = backend_metric_data->application_utilization;
if (utilization <= 0) {
utilization = backend_metric_data->cpu_utilization;
}
}
weight_->MaybeUpdateWeight(qps, eps, cpu_utilization,
error_utilization_penalty_);
weight_->MaybeUpdateWeight(qps, eps, utilization, error_utilization_penalty_);
}

//
Expand Down Expand Up @@ -847,9 +849,12 @@ void WeightedRoundRobin::WeightedRoundRobinSubchannelList::

void WeightedRoundRobin::WeightedRoundRobinSubchannelData::OobWatcher::
OnBackendMetricReport(const BackendMetricData& backend_metric_data) {
double utilization = backend_metric_data.application_utilization;
if (utilization <= 0) {
utilization = backend_metric_data.cpu_utilization;
}
weight_->MaybeUpdateWeight(backend_metric_data.qps, backend_metric_data.eps,
backend_metric_data.cpu_utilization,
error_utilization_penalty_);
utilization, error_utilization_penalty_);
}

//
Expand Down
13 changes: 11 additions & 2 deletions src/core/ext/upb-generated/xds/data/orca/v3/orca_load_report.upb.c

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 15 additions & 0 deletions src/core/ext/upb-generated/xds/data/orca/v3/orca_load_report.upb.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

60 changes: 53 additions & 7 deletions src/cpp/server/backend_metric_recorder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@
using grpc_core::BackendMetricData;

namespace {
// CPU utilization values must be in [0, infy).
bool IsCpuUtilizationValid(double cpu) { return cpu >= 0.0; }
// Utilization values with soft limits must be in [0, infy).
bool IsUtilizationWithSoftLimitsValid(double util) { return util >= 0.0; }

// Other utilization values must be in [0, 1].
bool IsUtilizationValid(double utilization) {
Expand Down Expand Up @@ -68,7 +68,7 @@ void ServerMetricRecorder::UpdateBackendMetricDataState(
}

void ServerMetricRecorder::SetCpuUtilization(double value) {
if (!IsCpuUtilizationValid(value)) {
if (!IsUtilizationWithSoftLimitsValid(value)) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_backend_metric_trace)) {
gpr_log(GPR_INFO, "[%p] CPU utilization rejected: %f", this, value);
}
Expand All @@ -95,6 +95,22 @@ void ServerMetricRecorder::SetMemoryUtilization(double value) {
}
}

void ServerMetricRecorder::SetApplicationUtilization(double value) {
if (!IsUtilizationWithSoftLimitsValid(value)) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_backend_metric_trace)) {
gpr_log(GPR_INFO, "[%p] Application utilization rejected: %f", this,
value);
}
return;
}
UpdateBackendMetricDataState([value](BackendMetricData* data) {
data->application_utilization = value;
});
if (GRPC_TRACE_FLAG_ENABLED(grpc_backend_metric_trace)) {
gpr_log(GPR_INFO, "[%p] Application utilization set: %f", this, value);
}
}

void ServerMetricRecorder::SetQps(double value) {
if (!IsRateValid(value)) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_backend_metric_trace)) {
Expand Down Expand Up @@ -172,6 +188,14 @@ void ServerMetricRecorder::ClearMemoryUtilization() {
}
}

void ServerMetricRecorder::ClearApplicationUtilization() {
UpdateBackendMetricDataState(
[](BackendMetricData* data) { data->application_utilization = -1; });
if (GRPC_TRACE_FLAG_ENABLED(grpc_backend_metric_trace)) {
gpr_log(GPR_INFO, "[%p] Application utilization cleared.", this);
}
}

void ServerMetricRecorder::ClearQps() {
UpdateBackendMetricDataState([](BackendMetricData* data) { data->qps = -1; });
if (GRPC_TRACE_FLAG_ENABLED(grpc_backend_metric_trace)) {
Expand Down Expand Up @@ -212,9 +236,10 @@ ServerMetricRecorder::GetMetricsIfChanged() const {
const auto& data = result->data;
gpr_log(GPR_INFO,
"[%p] GetMetrics() returned: seq:%" PRIu64
" cpu:%f mem:%f qps:%f eps:%f utilization size: %" PRIuPTR,
" cpu:%f mem:%f app:%f qps:%f eps:%f utilization size: %" PRIuPTR,
this, result->sequence_number, data.cpu_utilization,
data.mem_utilization, data.qps, data.eps, data.utilization.size());
data.mem_utilization, data.application_utilization, data.qps,
data.eps, data.utilization.size());
}
return result;
}
Expand All @@ -223,7 +248,7 @@ ServerMetricRecorder::GetMetricsIfChanged() const {

experimental::CallMetricRecorder&
BackendMetricState::RecordCpuUtilizationMetric(double value) {
if (!IsCpuUtilizationValid(value)) {
if (!IsUtilizationWithSoftLimitsValid(value)) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_backend_metric_trace)) {
gpr_log(GPR_INFO, "[%p] CPU utilization value rejected: %f", this, value);
}
Expand Down Expand Up @@ -251,6 +276,22 @@ BackendMetricState::RecordMemoryUtilizationMetric(double value) {
return *this;
}

experimental::CallMetricRecorder&
BackendMetricState::RecordApplicationUtilizationMetric(double value) {
if (!IsUtilizationWithSoftLimitsValid(value)) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_backend_metric_trace)) {
gpr_log(GPR_INFO, "[%p] Application utilization value rejected: %f", this,
value);
}
return *this;
}
application_utilization_.store(value, std::memory_order_relaxed);
if (GRPC_TRACE_FLAG_ENABLED(grpc_backend_metric_trace)) {
gpr_log(GPR_INFO, "[%p] Application utilization recorded: %f", this, value);
}
return *this;
}

experimental::CallMetricRecorder& BackendMetricState::RecordQpsMetric(
double value) {
if (!IsRateValid(value)) {
Expand Down Expand Up @@ -333,13 +374,18 @@ BackendMetricData BackendMetricState::GetBackendMetricData() {
}
// Only overwrite if the value is set i.e. in the valid range.
const double cpu = cpu_utilization_.load(std::memory_order_relaxed);
if (IsCpuUtilizationValid(cpu)) {
if (IsUtilizationWithSoftLimitsValid(cpu)) {
data.cpu_utilization = cpu;
}
const double mem = mem_utilization_.load(std::memory_order_relaxed);
if (IsUtilizationValid(mem)) {
data.mem_utilization = mem;
}
const double app_util =
application_utilization_.load(std::memory_order_relaxed);
if (IsUtilizationWithSoftLimitsValid(app_util)) {
data.application_utilization = app_util;
}
const double qps = qps_.load(std::memory_order_relaxed);
if (IsRateValid(qps)) {
data.qps = qps;
Expand Down

0 comments on commit d9d47b5

Please sign in to comment.