Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WRR] Backport 1.56: Prefer application_utilization to cpu_utilization #33378

Merged
merged 1 commit into from
Jun 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
12 changes: 6 additions & 6 deletions CMakeLists.txt

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions bazel/grpc_deps.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -502,11 +502,11 @@ def grpc_deps():
if "com_github_cncf_udpa" not in native.existing_rules():
http_archive(
name = "com_github_cncf_udpa",
sha256 = "aef36c29bd0ef95509f7f52693dbdafe4a2c2c5d1eb406bf68e6364a0d12e11b",
strip_prefix = "xds-4003588d1b747e37e911baa5a9c1c07fde4ca518",
sha256 = "0d33b83f8c6368954e72e7785539f0d272a8aba2f6e2e336ed15fd1514bc9899",
strip_prefix = "xds-e9ce68804cb4e64cab5a52e3c8baf840d4ff87b7",
urls = [
"https://storage.googleapis.com/grpc-bazel-mirror/github.com/cncf/xds/archive/4003588d1b747e37e911baa5a9c1c07fde4ca518.tar.gz",
"https://github.com/cncf/xds/archive/4003588d1b747e37e911baa5a9c1c07fde4ca518.tar.gz",
"https://storage.googleapis.com/grpc-bazel-mirror/github.com/cncf/xds/archive/e9ce68804cb4e64cab5a52e3c8baf840d4ff87b7.tar.gz",
"https://github.com/cncf/xds/archive/e9ce68804cb4e64cab5a52e3c8baf840d4ff87b7.tar.gz",
],
)

Expand Down
8 changes: 4 additions & 4 deletions build_autogenerated.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions include/grpcpp/ext/call_metric_recorder.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,14 @@ class CallMetricRecorder {
/// Values outside of the valid range [0, 1] are ignored.
virtual CallMetricRecorder& RecordMemoryUtilizationMetric(double value) = 0;

/// Records a call metric measurement for application specific utilization.
/// Multiple calls to this method will override the stored value.
/// Values may be larger than 1.0 when the usage exceeds the reporter
/// dependent notion of soft limits.
/// Values outside of the valid range [0, infy] are ignored.
virtual CallMetricRecorder& RecordApplicationUtilizationMetric(
double value) = 0;

/// Records a call metric measurement for queries per second.
/// Multiple calls to this method will override the stored value.
/// Values outside of the valid range [0, infy) are ignored.
Expand Down
6 changes: 6 additions & 0 deletions include/grpcpp/ext/server_metric_recorder.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,10 @@ class ServerMetricRecorder {
/// Values outside of the valid range are rejected.
/// Overrides the stored value when called again with a valid value.
void SetMemoryUtilization(double value);
/// Records the application specific utilization in the range [0, infy].
/// Values outside of the valid range are rejected.
/// Overrides the stored value when called again with a valid value.
void SetApplicationUtilization(double value);
/// Records number of queries per second to the server in the range [0, infy).
/// Values outside of the valid range are rejected.
/// Overrides the stored value when called again with a valid value.
Expand All @@ -77,6 +81,8 @@ class ServerMetricRecorder {
void ClearCpuUtilization();
/// Clears the server memory utilization if recorded.
void ClearMemoryUtilization();
/// Clears the application specific utilization if recorded.
void ClearApplicationUtilization();
/// Clears number of queries per second to the server if recorded.
void ClearQps();
/// Clears number of errors per second to the server if recorded.
Expand Down
5 changes: 5 additions & 0 deletions src/core/ext/filters/backend_metrics/backend_metric_filter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,11 @@ absl::optional<std::string> BackendMetricFilter::MaybeSerializeBackendMetrics(
data.mem_utilization);
has_data = true;
}
if (data.application_utilization != -1) {
xds_data_orca_v3_OrcaLoadReport_set_application_utilization(
response, data.application_utilization);
has_data = true;
}
if (data.qps != -1) {
xds_data_orca_v3_OrcaLoadReport_set_rps_fractional(response, data.qps);
has_data = true;
Expand Down
2 changes: 2 additions & 0 deletions src/core/ext/filters/client_channel/backend_metric.cc
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ const BackendMetricData* ParseBackendMetricData(
xds_data_orca_v3_OrcaLoadReport_cpu_utilization(msg);
backend_metric_data->mem_utilization =
xds_data_orca_v3_OrcaLoadReport_mem_utilization(msg);
backend_metric_data->application_utilization =
xds_data_orca_v3_OrcaLoadReport_application_utilization(msg);
backend_metric_data->qps =
xds_data_orca_v3_OrcaLoadReport_rps_fractional(msg);
backend_metric_data->eps = xds_data_orca_v3_OrcaLoadReport_eps(msg);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ struct BackendMetricData {
/// Memory utilization expressed as a fraction of available memory
/// resources.
double mem_utilization = -1;
/// Application specific utilization expressed as a fraction of available
/// resources.
double application_utilization = -1;
/// Total queries per second being served by the backend across all services.
double qps = -1;
/// Total errors per second reported by the backend across all services.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ class WeightedRoundRobin : public LoadBalancingPolicy {
: wrr_(std::move(wrr)), key_(std::move(key)) {}
~AddressWeight() override;

void MaybeUpdateWeight(double qps, double eps, double cpu_utilization,
void MaybeUpdateWeight(double qps, double eps, double utilization,
float error_utilization_penalty);

float GetWeight(Timestamp now, Duration weight_expiration_period,
Expand Down Expand Up @@ -398,23 +398,23 @@ WeightedRoundRobin::AddressWeight::~AddressWeight() {
}

void WeightedRoundRobin::AddressWeight::MaybeUpdateWeight(
double qps, double eps, double cpu_utilization,
double qps, double eps, double utilization,
float error_utilization_penalty) {
// Compute weight.
float weight = 0;
if (qps > 0 && cpu_utilization > 0) {
if (qps > 0 && utilization > 0) {
double penalty = 0.0;
if (eps > 0 && error_utilization_penalty > 0) {
penalty = eps / qps * error_utilization_penalty;
}
weight = qps / (cpu_utilization + penalty);
weight = qps / (utilization + penalty);
}
if (weight == 0) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) {
gpr_log(GPR_INFO,
"[WRR %p] subchannel %s: qps=%f, eps=%f, cpu_utilization=%f: "
"[WRR %p] subchannel %s: qps=%f, eps=%f, utilization=%f: "
"error_util_penalty=%f, weight=%f (not updating)",
wrr_.get(), key_.c_str(), qps, eps, cpu_utilization,
wrr_.get(), key_.c_str(), qps, eps, utilization,
error_utilization_penalty, weight);
}
return;
Expand All @@ -424,10 +424,10 @@ void WeightedRoundRobin::AddressWeight::MaybeUpdateWeight(
MutexLock lock(&mu_);
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) {
gpr_log(GPR_INFO,
"[WRR %p] subchannel %s: qps=%f, eps=%f, cpu_utilization=%f "
"[WRR %p] subchannel %s: qps=%f, eps=%f, utilization=%f "
"error_util_penalty=%f : setting weight=%f weight_=%f now=%s "
"last_update_time_=%s non_empty_since_=%s",
wrr_.get(), key_.c_str(), qps, eps, cpu_utilization,
wrr_.get(), key_.c_str(), qps, eps, utilization,
error_utilization_penalty, weight, weight_, now.ToString().c_str(),
last_update_time_.ToString().c_str(),
non_empty_since_.ToString().c_str());
Expand Down Expand Up @@ -483,14 +483,16 @@ void WeightedRoundRobin::Picker::SubchannelCallTracker::Finish(
args.backend_metric_accessor->GetBackendMetricData();
double qps = 0;
double eps = 0;
double cpu_utilization = 0;
double utilization = 0;
if (backend_metric_data != nullptr) {
qps = backend_metric_data->qps;
eps = backend_metric_data->eps;
cpu_utilization = backend_metric_data->cpu_utilization;
utilization = backend_metric_data->application_utilization;
if (utilization <= 0) {
utilization = backend_metric_data->cpu_utilization;
}
}
weight_->MaybeUpdateWeight(qps, eps, cpu_utilization,
error_utilization_penalty_);
weight_->MaybeUpdateWeight(qps, eps, utilization, error_utilization_penalty_);
}

//
Expand Down Expand Up @@ -847,9 +849,12 @@ void WeightedRoundRobin::WeightedRoundRobinSubchannelList::

void WeightedRoundRobin::WeightedRoundRobinSubchannelData::OobWatcher::
OnBackendMetricReport(const BackendMetricData& backend_metric_data) {
double utilization = backend_metric_data.application_utilization;
if (utilization <= 0) {
utilization = backend_metric_data.cpu_utilization;
}
weight_->MaybeUpdateWeight(backend_metric_data.qps, backend_metric_data.eps,
backend_metric_data.cpu_utilization,
error_utilization_penalty_);
utilization, error_utilization_penalty_);
}

//
Expand Down
13 changes: 11 additions & 2 deletions src/core/ext/upb-generated/xds/data/orca/v3/orca_load_report.upb.c

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 15 additions & 0 deletions src/core/ext/upb-generated/xds/data/orca/v3/orca_load_report.upb.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

60 changes: 53 additions & 7 deletions src/cpp/server/backend_metric_recorder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@
using grpc_core::BackendMetricData;

namespace {
// CPU utilization values must be in [0, infy).
bool IsCpuUtilizationValid(double cpu) { return cpu >= 0.0; }
// Utilization values with soft limits must be in [0, infy).
bool IsUtilizationWithSoftLimitsValid(double util) { return util >= 0.0; }

// Other utilization values must be in [0, 1].
bool IsUtilizationValid(double utilization) {
Expand Down Expand Up @@ -68,7 +68,7 @@ void ServerMetricRecorder::UpdateBackendMetricDataState(
}

void ServerMetricRecorder::SetCpuUtilization(double value) {
if (!IsCpuUtilizationValid(value)) {
if (!IsUtilizationWithSoftLimitsValid(value)) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_backend_metric_trace)) {
gpr_log(GPR_INFO, "[%p] CPU utilization rejected: %f", this, value);
}
Expand All @@ -95,6 +95,22 @@ void ServerMetricRecorder::SetMemoryUtilization(double value) {
}
}

void ServerMetricRecorder::SetApplicationUtilization(double value) {
if (!IsUtilizationWithSoftLimitsValid(value)) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_backend_metric_trace)) {
gpr_log(GPR_INFO, "[%p] Application utilization rejected: %f", this,
value);
}
return;
}
UpdateBackendMetricDataState([value](BackendMetricData* data) {
data->application_utilization = value;
});
if (GRPC_TRACE_FLAG_ENABLED(grpc_backend_metric_trace)) {
gpr_log(GPR_INFO, "[%p] Application utilization set: %f", this, value);
}
}

void ServerMetricRecorder::SetQps(double value) {
if (!IsRateValid(value)) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_backend_metric_trace)) {
Expand Down Expand Up @@ -172,6 +188,14 @@ void ServerMetricRecorder::ClearMemoryUtilization() {
}
}

void ServerMetricRecorder::ClearApplicationUtilization() {
UpdateBackendMetricDataState(
[](BackendMetricData* data) { data->application_utilization = -1; });
if (GRPC_TRACE_FLAG_ENABLED(grpc_backend_metric_trace)) {
gpr_log(GPR_INFO, "[%p] Application utilization cleared.", this);
}
}

void ServerMetricRecorder::ClearQps() {
UpdateBackendMetricDataState([](BackendMetricData* data) { data->qps = -1; });
if (GRPC_TRACE_FLAG_ENABLED(grpc_backend_metric_trace)) {
Expand Down Expand Up @@ -212,9 +236,10 @@ ServerMetricRecorder::GetMetricsIfChanged() const {
const auto& data = result->data;
gpr_log(GPR_INFO,
"[%p] GetMetrics() returned: seq:%" PRIu64
" cpu:%f mem:%f qps:%f eps:%f utilization size: %" PRIuPTR,
" cpu:%f mem:%f app:%f qps:%f eps:%f utilization size: %" PRIuPTR,
this, result->sequence_number, data.cpu_utilization,
data.mem_utilization, data.qps, data.eps, data.utilization.size());
data.mem_utilization, data.application_utilization, data.qps,
data.eps, data.utilization.size());
}
return result;
}
Expand All @@ -223,7 +248,7 @@ ServerMetricRecorder::GetMetricsIfChanged() const {

experimental::CallMetricRecorder&
BackendMetricState::RecordCpuUtilizationMetric(double value) {
if (!IsCpuUtilizationValid(value)) {
if (!IsUtilizationWithSoftLimitsValid(value)) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_backend_metric_trace)) {
gpr_log(GPR_INFO, "[%p] CPU utilization value rejected: %f", this, value);
}
Expand Down Expand Up @@ -251,6 +276,22 @@ BackendMetricState::RecordMemoryUtilizationMetric(double value) {
return *this;
}

experimental::CallMetricRecorder&
BackendMetricState::RecordApplicationUtilizationMetric(double value) {
if (!IsUtilizationWithSoftLimitsValid(value)) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_backend_metric_trace)) {
gpr_log(GPR_INFO, "[%p] Application utilization value rejected: %f", this,
value);
}
return *this;
}
application_utilization_.store(value, std::memory_order_relaxed);
if (GRPC_TRACE_FLAG_ENABLED(grpc_backend_metric_trace)) {
gpr_log(GPR_INFO, "[%p] Application utilization recorded: %f", this, value);
}
return *this;
}

experimental::CallMetricRecorder& BackendMetricState::RecordQpsMetric(
double value) {
if (!IsRateValid(value)) {
Expand Down Expand Up @@ -333,13 +374,18 @@ BackendMetricData BackendMetricState::GetBackendMetricData() {
}
// Only overwrite if the value is set i.e. in the valid range.
const double cpu = cpu_utilization_.load(std::memory_order_relaxed);
if (IsCpuUtilizationValid(cpu)) {
if (IsUtilizationWithSoftLimitsValid(cpu)) {
data.cpu_utilization = cpu;
}
const double mem = mem_utilization_.load(std::memory_order_relaxed);
if (IsUtilizationValid(mem)) {
data.mem_utilization = mem;
}
const double app_util =
application_utilization_.load(std::memory_order_relaxed);
if (IsUtilizationWithSoftLimitsValid(app_util)) {
data.application_utilization = app_util;
}
const double qps = qps_.load(std::memory_order_relaxed);
if (IsRateValid(qps)) {
data.qps = qps;
Expand Down