Skip to content

Commit

Permalink
Add a per log distribution metrics for the start index of GetLeavesBy…
Browse files Browse the repository at this point in the history
…Range requests. (#1364)
  • Loading branch information
phbnf committed Feb 21, 2024
1 parent c287ad6 commit eb82533
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 14 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@

* Add build tags for AIX operating system

### Monitoring
* Add a distribution metric to monitor the start of get-entries requests

### Misc

* Return HTTP 504 instead of HTTP 408 upon timeout or cancellation of a backend connection context by @robstradling in https://github.com/google/certificate-transparency-go/pull/1313
Expand Down
49 changes: 35 additions & 14 deletions trillian/ctfe/handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ import (
)

var (
alignGetEntries = flag.Bool("align_getentries", true, "Enable get-entries request alignment")
alignGetEntries = flag.Bool("align_getentries", true, "Enable get-entries request alignment")
getEntriesMetrics = flag.Bool("getentries_metrics", false, "Export get-entries distribution metrics")
)

const (
Expand Down Expand Up @@ -106,19 +107,20 @@ const (
var (
// Metrics are all per-log (label "logid"), but may also be
// per-entrypoint (label "ep") or per-return-code (label "rc").
once sync.Once
knownLogs monitoring.Gauge // logid => value (always 1.0)
isMirrorLog monitoring.Gauge // logid => value (either 0.0 or 1.0)
maxMergeDelay monitoring.Gauge // logid => value
expMergeDelay monitoring.Gauge // logid => value
lastSCTTimestamp monitoring.Gauge // logid => value
lastSTHTimestamp monitoring.Gauge // logid => value
lastSTHTreeSize monitoring.Gauge // logid => value
frozenSTHTimestamp monitoring.Gauge // logid => value
reqsCounter monitoring.Counter // logid, ep => value
rspsCounter monitoring.Counter // logid, ep, rc => value
rspLatency monitoring.Histogram // logid, ep, rc => value
alignedGetEntries monitoring.Counter // logid, aligned => count
once sync.Once
knownLogs monitoring.Gauge // logid => value (always 1.0)
isMirrorLog monitoring.Gauge // logid => value (either 0.0 or 1.0)
maxMergeDelay monitoring.Gauge // logid => value
expMergeDelay monitoring.Gauge // logid => value
lastSCTTimestamp monitoring.Gauge // logid => value
lastSTHTimestamp monitoring.Gauge // logid => value
lastSTHTreeSize monitoring.Gauge // logid => value
frozenSTHTimestamp monitoring.Gauge // logid => value
reqsCounter monitoring.Counter // logid, ep => value
rspsCounter monitoring.Counter // logid, ep, rc => value
rspLatency monitoring.Histogram // logid, ep, rc => value
alignedGetEntries monitoring.Counter // logid, aligned => count
getEntriesStartPercentiles monitoring.Histogram // logid => percentile
)

// setupMetrics initializes all the exported metrics.
Expand All @@ -135,6 +137,12 @@ func setupMetrics(mf monitoring.MetricFactory) {
rspsCounter = mf.NewCounter("http_rsps", "Number of responses", "logid", "ep", "rc")
rspLatency = mf.NewHistogram("http_latency", "Latency of responses in seconds", "logid", "ep", "rc")
alignedGetEntries = mf.NewCounter("aligned_get_entries", "Number of get-entries requests which were aligned to size limit boundaries", "logid", "aligned")
getEntriesStartPercentiles = mf.NewHistogramWithBuckets(
"get_leaves_start_percentiles",
"Start index of GetLeavesByRange request using percentage of current log size at the time",
monitoring.PercentileBuckets(5),
"logid",
)
}

// Entrypoints is a list of entrypoint names as exposed in statistics/logging.
Expand Down Expand Up @@ -750,6 +758,10 @@ func getEntries(ctx context.Context, li *logInfo, w http.ResponseWriter, r *http
// explicitly here.
return http.StatusBadRequest, fmt.Errorf("need tree size: %d to get leaves but only got: %d", start+1, currentRoot.TreeSize)
}
if *getEntriesMetrics {
label := strconv.FormatInt(req.LogId, 10)
recordStartPercent(start, currentRoot.TreeSize, label)
}
// Do some sanity checks on the result.
if len(rsp.Leaves) > int(count) {
return http.StatusInternalServerError, fmt.Errorf("backend returned too many leaves: %d vs [%d,%d]", len(rsp.Leaves), start, end)
Expand Down Expand Up @@ -1122,3 +1134,12 @@ func (li *logInfo) toHTTPStatus(err error) int {
return http.StatusInternalServerError
}
}

// recordStartPercent works out what percentage of the current log size an index corresponds to,
// and records this to the getEntriesStartPercentiles histogram.
func recordStartPercent(leafIndex int64, treeSize uint64, labelVals ...string) {
if treeSize > 0 {
percent := float64(leafIndex) / float64(treeSize) * 100.0
getEntriesStartPercentiles.Observe(percent, labelVals...)
}
}

0 comments on commit eb82533

Please sign in to comment.