Skip to content

Commit d989a59

Browse files
committed
metrics: add latency histogram statistics
I've added histogram metrics used in cpp-rust-driver. The snapshot of histogram statistics is taken under concurrency precautions using lock-free histogram features. I've adjusted the docs book adding an example of taking the snapshot and accessing it's values.
1 parent e1e7201 commit d989a59

File tree

5 files changed

+143
-1
lines changed

5 files changed

+143
-1
lines changed

docs/source/metrics/metrics.md

+15
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# Driver metrics
22

3+
This feature is available only under the crate feature `metrics`.
4+
35
During operation the driver collects various metrics.
46

57
They can be accessed at any moment using `Session::get_metrics()`
@@ -11,6 +13,7 @@ They can be accessed at any moment using `Session::get_metrics()`
1113
* Total number of paged queries
1214
* Number of errors during paged queries
1315
* Number of retries
16+
* Latency histogram statistics (min, max, mean, standard deviation, percentiles)
1417

1518
### Example
1619
```rust
@@ -29,6 +32,18 @@ println!(
2932
"99.9 latency percentile: {}",
3033
metrics.get_latency_percentile_ms(99.9).unwrap()
3134
);
35+
36+
let snapshot = metrics.get_snapshot().unwrap();
37+
println!("Min: {}", snapshot.min);
38+
println!("Max: {}", snapshot.max);
39+
println!("Mean: {}", snapshot.mean);
40+
println!("Standard deviation: {}", snapshot.stddev);
41+
println!("Median: {}", snapshot.median);
42+
println!("75th percentile: {}", snapshot.percentile_75);
43+
println!("90th percentile: {}", snapshot.percentile_90);
44+
println!("95th percentile: {}", snapshot.percentile_95);
45+
println!("99th percentile: {}", snapshot.percentile_99);
46+
println!("99.9th percentile: {}", snapshot.percentile_99_9);
3247
# Ok(())
3348
# }
3449
```

examples/basic.rs

+12
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,18 @@ async fn main() -> Result<()> {
100100
metrics.get_latency_percentile_ms(99.9).unwrap()
101101
);
102102

103+
let snapshot = metrics.get_snapshot().unwrap();
104+
println!("Min: {}", snapshot.min);
105+
println!("Max: {}", snapshot.max);
106+
println!("Mean: {}", snapshot.mean);
107+
println!("Standard deviation: {}", snapshot.stddev);
108+
println!("Median: {}", snapshot.median);
109+
println!("75th percentile: {}", snapshot.percentile_75);
110+
println!("90th percentile: {}", snapshot.percentile_90);
111+
println!("95th percentile: {}", snapshot.percentile_95);
112+
println!("99th percentile: {}", snapshot.percentile_99);
113+
println!("99.9th percentile: {}", snapshot.percentile_99_9);
114+
103115
println!("Ok.");
104116

105117
Ok(())

scylla/src/transport/histogram/lock_free_histogram.rs

+106
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,23 @@ pub struct Histogram {
3131
config: Config,
3232
}
3333

34+
/// Snapshot is a structure that contains histogram statistics such as
35+
/// min, max, mean, standard deviation, median, and most common percentiles
36+
/// collected in a certain moment.
37+
#[derive(Debug)]
38+
pub struct Snapshot {
39+
pub min: u64,
40+
pub max: u64,
41+
pub mean: u64,
42+
pub stddev: u64,
43+
pub median: u64,
44+
pub percentile_75: u64,
45+
pub percentile_90: u64,
46+
pub percentile_95: u64,
47+
pub percentile_99: u64,
48+
pub percentile_99_9: u64,
49+
}
50+
3451
impl Histogram {
3552
pub fn new() -> Self {
3653
let grouping_power = 7;
@@ -109,6 +126,95 @@ impl Histogram {
109126
}
110127
}
111128

129+
pub fn snapshot() -> impl FnOnce(&[AtomicU64], &Config) -> Result<Snapshot, &'static str> {
130+
|buckets, config| {
131+
let total_count = Histogram::get_total_count(buckets);
132+
133+
let mut min = u64::MAX;
134+
let mut max = 0;
135+
let mut weighted_sum = 0;
136+
let mut pref_sum = 0;
137+
let mut percentile_75 = 0;
138+
let mut percentile_90 = 0;
139+
let mut percentile_95 = 0;
140+
let mut percentile_99 = 0;
141+
let mut percentile_99_9 = 0;
142+
143+
let percentile_75_threshold = (0.75 * total_count as f64).ceil() as u128;
144+
let percentile_90_threshold = (0.9 * total_count as f64).ceil() as u128;
145+
let percentile_95_threshold = (0.95 * total_count as f64).ceil() as u128;
146+
let percentile_99_threshold = (0.99 * total_count as f64).ceil() as u128;
147+
let percentile_99_9_threshold = (0.999 * total_count as f64).ceil() as u128;
148+
149+
for (i, bucket) in buckets.iter().enumerate() {
150+
let count = bucket.load(ORDER_TYPE) as u128;
151+
if count == 0 {
152+
continue;
153+
}
154+
155+
let lower_bound = config.index_to_lower_bound(i);
156+
let upper_bound = config.index_to_upper_bound(i);
157+
158+
if lower_bound < min {
159+
min = lower_bound;
160+
}
161+
if upper_bound > max {
162+
max = upper_bound;
163+
}
164+
165+
weighted_sum += count * lower_bound as u128;
166+
167+
let next_pref_sum = pref_sum + count;
168+
if pref_sum < percentile_75_threshold && next_pref_sum >= percentile_75_threshold {
169+
percentile_75 = lower_bound;
170+
}
171+
if pref_sum < percentile_90_threshold && next_pref_sum >= percentile_90_threshold {
172+
percentile_90 = lower_bound;
173+
}
174+
if pref_sum < percentile_95_threshold && next_pref_sum >= percentile_95_threshold {
175+
percentile_95 = lower_bound;
176+
}
177+
if pref_sum < percentile_99_threshold && next_pref_sum >= percentile_99_threshold {
178+
percentile_99 = lower_bound;
179+
}
180+
if pref_sum < percentile_99_9_threshold
181+
&& next_pref_sum >= percentile_99_9_threshold
182+
{
183+
percentile_99_9 = lower_bound;
184+
}
185+
186+
pref_sum = next_pref_sum;
187+
}
188+
189+
let mean = (weighted_sum / total_count) as u64;
190+
let mut variance_sum = 0;
191+
for (i, bucket) in buckets.iter().enumerate() {
192+
let count = bucket.load(ORDER_TYPE) as u128;
193+
if count == 0 {
194+
continue;
195+
}
196+
197+
let lower_bound = config.index_to_lower_bound(i);
198+
variance_sum += count * (lower_bound as u128 - mean as u128).pow(2);
199+
}
200+
let variance = variance_sum / total_count;
201+
let stddev = (variance as f64).sqrt() as u64;
202+
203+
Ok(Snapshot {
204+
min,
205+
max,
206+
mean,
207+
stddev,
208+
median: config.index_to_lower_bound(buckets.len() / 2),
209+
percentile_75,
210+
percentile_90,
211+
percentile_95,
212+
percentile_99,
213+
percentile_99_9,
214+
})
215+
}
216+
}
217+
112218
pub fn get_total_count(buckets: &[AtomicU64]) -> u128 {
113219
buckets.iter().map(|v| v.load(ORDER_TYPE) as u128).sum()
114220
}

scylla/src/transport/histogram/mod.rs

+1
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@ mod lock_free_histogram;
33

44
pub use config::Config;
55
pub use lock_free_histogram::Histogram;
6+
pub use lock_free_histogram::Snapshot;

scylla/src/transport/metrics.rs

+9-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use crate::transport::histogram::Histogram;
1+
use crate::transport::histogram::{Histogram, Snapshot};
22
use std::sync::atomic::{AtomicU64, Ordering};
33
use std::sync::Arc;
44

@@ -97,6 +97,14 @@ impl Metrics {
9797
Ok(result)
9898
}
9999

100+
/// Returns snapshot of histogram metrics taken at the moment of calling this function. \
101+
/// Available metrics: min, max, mean, std_dev, median,
102+
/// percentile_90, percentile_95, percentile_99, percentile_99_9.
103+
pub fn get_snapshot(&self) -> Result<Snapshot, MetricsError> {
104+
let snapshot = self.histogram.log_operation(Histogram::snapshot())?;
105+
Ok(snapshot)
106+
}
107+
100108
/// Returns counter for errors occurred in nonpaged queries
101109
pub fn get_errors_num(&self) -> u64 {
102110
self.errors_num.load(ORDER_TYPE)

0 commit comments

Comments
 (0)