foundationdb/
metrics.rs

1use std::collections::HashMap;
2use std::sync::{Arc, Mutex};
3
4/// Label key-value pairs for metrics
5pub type Labels = Vec<(String, String)>;
6
7/// Unique key for a metric: name + labels
8#[derive(Clone, Hash, Eq, PartialEq, Debug)]
9pub struct MetricKey {
10    pub name: String,
11    pub labels: Labels,
12}
13
14impl MetricKey {
15    /// Create a new MetricKey
16    ///
17    /// # Arguments
18    /// * `name` - The name of the metric
19    /// * `labels` - Key-value pairs for labeling the metric
20    ///
21    /// # Returns
22    /// * `MetricKey` - A new MetricKey instance
23    pub fn new(name: &str, labels: &[(&str, &str)]) -> Self {
24        // Convert labels to owned strings
25        let mut sorted_labels: Labels = labels
26            .iter()
27            .map(|(k, v)| (k.to_string(), v.to_string()))
28            .collect();
29
30        // Sort labels by key to ensure consistent ordering
31        sorted_labels.sort_by(|a, b| a.0.cmp(&b.0));
32
33        Self {
34            name: name.to_string(),
35            labels: sorted_labels,
36        }
37    }
38}
39
40/// Tracks metrics for a transaction.
41///
42/// This struct maintains transaction metrics protected by a mutex to allow safe concurrent access.
43#[derive(Debug, Clone, Default)]
44pub struct TransactionMetrics {
45    /// All metrics for the transaction, organized by category
46    pub metrics: Arc<Mutex<MetricsReport>>,
47}
48
49/// Transaction-level information that persists across retries
50#[derive(Debug, Default, Clone)]
51pub struct TransactionInfo {
52    /// Number of retries performed
53    pub retries: u64,
54    /// Transaction read version
55    pub read_version: Option<i64>,
56    /// Transaction commit version
57    pub commit_version: Option<i64>,
58}
59
60/// Data structure containing the actual metrics for a transaction,
61/// organized into different categories
62#[derive(Debug, Clone, Default)]
63pub struct MetricsReport {
64    /// Metrics for the current transaction attempt
65    pub current: CounterMetrics,
66    /// Aggregated metrics across all transaction attempts, including retries
67    pub total: CounterMetrics,
68    /// Time-related metrics for performance analysis
69    pub time: TimingMetrics,
70    /// Custom metrics for the current transaction attempt
71    pub custom_metrics: HashMap<MetricKey, u64>,
72    /// Transaction-level information
73    pub transaction: TransactionInfo,
74}
75
76impl MetricsReport {
77    /// Set the read version for the transaction
78    ///
79    /// # Arguments
80    /// * `version` - The read version
81    pub fn set_read_version(&mut self, version: i64) {
82        self.transaction.read_version = Some(version);
83    }
84
85    /// Set the commit version for the transaction
86    ///
87    /// # Arguments
88    /// * `version` - The commit version
89    pub fn set_commit_version(&mut self, version: i64) {
90        self.transaction.commit_version = Some(version);
91    }
92
93    /// Increment the retry counter
94    pub fn increment_retries(&mut self) {
95        self.transaction.retries += 1;
96    }
97}
98
99/// Time-related metrics for performance analysis of transaction execution phases
100#[derive(Debug, Default, Clone)]
101pub struct TimingMetrics {
102    /// Time spent in commit phase execution
103    pub commit_execution_ms: u64,
104    /// Time spent handling errors and retries
105    pub on_error_execution_ms: Vec<u64>,
106    /// Total transaction duration from start to finish
107    pub total_execution_ms: u64,
108}
109
110impl TimingMetrics {
111    /// Record commit execution time
112    ///
113    /// # Arguments
114    /// * `duration_ms` - The duration of the commit execution in milliseconds
115    pub fn record_commit_time(&mut self, duration_ms: u64) {
116        self.commit_execution_ms = duration_ms;
117    }
118
119    /// Add an error execution time to the list
120    ///
121    /// # Arguments
122    /// * `duration_ms` - The duration of the error handling in milliseconds
123    pub fn add_error_time(&mut self, duration_ms: u64) {
124        self.on_error_execution_ms.push(duration_ms);
125    }
126
127    /// Set the total execution time
128    ///
129    /// # Arguments
130    /// * `duration_ms` - The total duration of the transaction in milliseconds
131    pub fn set_execution_time(&mut self, duration_ms: u64) {
132        self.total_execution_ms = duration_ms;
133    }
134
135    /// Get the sum of all error handling times
136    ///
137    /// # Returns
138    /// * `u64` - The total time spent handling errors in milliseconds
139    pub fn get_total_error_time(&self) -> u64 {
140        self.on_error_execution_ms.iter().sum()
141    }
142}
143
144/// Represents a FoundationDB command.
145pub enum FdbCommand {
146    /// Atomic operation
147    Atomic,
148    /// Clear operation
149    Clear,
150    /// Clear range operation
151    ClearRange,
152    /// Get operation
153    Get(u64, u64),
154    GetRange(u64, u64),
155    Set(u64),
156}
157
158const INCREMENT: u64 = 1;
159
160impl TransactionMetrics {
161    /// Create a new instance of TransactionMetrics
162    pub fn new() -> Self {
163        Self {
164            metrics: Arc::new(Mutex::new(MetricsReport::default())),
165        }
166    }
167
168    /// Set the read version for the transaction
169    ///
170    /// # Arguments
171    /// * `version` - The read version
172    pub fn set_read_version(&self, version: i64) {
173        let mut data = self
174            .metrics
175            .lock()
176            .unwrap_or_else(|poisoned| poisoned.into_inner());
177        data.set_read_version(version);
178    }
179
180    /// Set the commit version for the transaction
181    ///
182    /// # Arguments
183    /// * `version` - The commit version
184    pub fn set_commit_version(&self, version: i64) {
185        let mut data = self
186            .metrics
187            .lock()
188            .unwrap_or_else(|poisoned| poisoned.into_inner());
189        data.set_commit_version(version);
190    }
191
192    /// Resets the current metrics and increments the retry counter in total metrics.
193    ///
194    /// This method is called when a transaction is retried due to a conflict or other retryable error.
195    /// It increments the retry count in the transaction info and resets the current metrics to zero.
196    /// It also merges current custom metrics into total custom metrics before clearing them.
197    pub fn reset_current(&self) {
198        let mut data = self
199            .metrics
200            .lock()
201            .unwrap_or_else(|poisoned| poisoned.into_inner());
202        data.increment_retries();
203        data.current = CounterMetrics::default();
204        data.custom_metrics.clear();
205    }
206
207    /// Increment the retry counter
208    pub fn increment_retries(&self) {
209        let mut data = self
210            .metrics
211            .lock()
212            .unwrap_or_else(|poisoned| poisoned.into_inner());
213        data.increment_retries();
214    }
215
216    /// Reports metrics for a specific FDB command by incrementing the appropriate counters.
217    ///
218    /// This method updates both the current and total metrics for the given command.
219    ///
220    /// # Arguments
221    /// * `fdb_command` - The FDB command to report metrics for
222    pub fn report_metrics(&self, fdb_command: FdbCommand) {
223        let mut data = self
224            .metrics
225            .lock()
226            .unwrap_or_else(|poisoned| poisoned.into_inner());
227        data.current.increment(&fdb_command);
228        data.total.increment(&fdb_command);
229    }
230
231    /// Get the number of retries
232    ///
233    /// # Returns
234    /// * `u64` - The number of retries
235    pub fn get_retries(&self) -> u64 {
236        let data = self
237            .metrics
238            .lock()
239            .unwrap_or_else(|poisoned| poisoned.into_inner());
240        data.transaction.retries
241    }
242
243    /// Returns a clone of all metrics data.
244    ///
245    /// # Returns
246    /// * `MetricsData` - A clone of all metrics data
247    pub fn get_metrics_data(&self) -> MetricsReport {
248        let data = self
249            .metrics
250            .lock()
251            .unwrap_or_else(|poisoned| poisoned.into_inner());
252        data.clone()
253    }
254
255    /// Returns a clone of the transaction information.
256    ///
257    /// # Returns
258    /// * `TransactionInfo` - A clone of the transaction information
259    pub fn get_transaction_info(&self) -> TransactionInfo {
260        let data = self
261            .metrics
262            .lock()
263            .unwrap_or_else(|poisoned| poisoned.into_inner());
264        data.transaction.clone()
265    }
266
267    /// Set a custom metric
268    ///
269    /// # Arguments
270    /// * `name` - The name of the metric
271    /// * `value` - The value to set
272    /// * `labels` - Key-value pairs for labeling the metric
273    pub fn set_custom(&self, name: &str, value: u64, labels: &[(&str, &str)]) {
274        let mut data = self
275            .metrics
276            .lock()
277            .unwrap_or_else(|poisoned| poisoned.into_inner());
278        let key = MetricKey::new(name, labels);
279        data.custom_metrics.insert(key.clone(), value);
280    }
281
282    /// Increment a custom metric
283    ///
284    /// # Arguments
285    /// * `name` - The name of the metric
286    /// * `amount` - The amount to increment by
287    /// * `labels` - Key-value pairs for labeling the metric
288    pub fn increment_custom(&self, name: &str, amount: u64, labels: &[(&str, &str)]) {
289        let mut data = self
290            .metrics
291            .lock()
292            .unwrap_or_else(|poisoned| poisoned.into_inner());
293        let key = MetricKey::new(name, labels);
294        // Increment in both current and total custom metrics
295        *data.custom_metrics.entry(key.clone()).or_insert(0) += amount;
296    }
297
298    /// Record commit execution time
299    ///
300    /// # Arguments
301    /// * `duration_ms` - The duration of the commit execution in milliseconds
302    pub fn record_commit_time(&self, duration_ms: u64) {
303        let mut data = self
304            .metrics
305            .lock()
306            .unwrap_or_else(|poisoned| poisoned.into_inner());
307        data.time.commit_execution_ms = duration_ms;
308    }
309
310    /// Add an error execution time to the list
311    ///
312    /// # Arguments
313    /// * `duration_ms` - The duration of the error handling in milliseconds
314    pub fn add_error_time(&self, duration_ms: u64) {
315        let mut data = self
316            .metrics
317            .lock()
318            .unwrap_or_else(|poisoned| poisoned.into_inner());
319        data.time.on_error_execution_ms.push(duration_ms);
320    }
321
322    /// Set the total execution time
323    ///
324    /// # Arguments
325    /// * `duration_ms` - The total duration of the transaction in milliseconds
326    pub fn set_execution_time(&self, duration_ms: u64) {
327        let mut data = self
328            .metrics
329            .lock()
330            .unwrap_or_else(|poisoned| poisoned.into_inner());
331        data.time.set_execution_time(duration_ms);
332    }
333
334    /// Get the sum of all error handling times
335    ///
336    /// # Returns
337    /// * `u64` - The total time spent handling errors in milliseconds
338    pub fn get_total_error_time(&self) -> u64 {
339        let data = self
340            .metrics
341            .lock()
342            .unwrap_or_else(|poisoned| poisoned.into_inner());
343        data.time.on_error_execution_ms.iter().sum()
344    }
345}
346
347/// `CounterMetrics` is used in two contexts within the metrics system:
348/// - As `current` metrics: tracking operations for the current transaction attempt
349/// - As `total` metrics: aggregating operations across all transaction attempts including retries
350///
351/// Each counter is incremented when the corresponding operation is performed, allowing
352/// for detailed analysis of transaction behavior and performance characteristics.
353#[derive(Debug, Default, Clone)]
354pub struct CounterMetrics {
355    // ATOMIC OP
356    /// Number of atomic operations performed (add, and, or, etc.)
357    pub call_atomic_op: u64,
358    // CLEAR
359    /// Number of key clear operations performed
360    pub call_clear: u64,
361    // CLEAR RANGE
362    /// Number of range clear operations performed
363    pub call_clear_range: u64,
364    // GET
365    /// Number of get operations performed
366    pub call_get: u64,
367    /// Total number of key-value pairs fetched across all read operations
368    pub keys_values_fetched: u64,
369    /// Total number of bytes read from the database
370    pub bytes_read: u64,
371    // SET
372    /// Number of set operations performed
373    pub call_set: u64,
374    /// Total number of bytes written to the database
375    pub bytes_written: u64,
376}
377
378impl CounterMetrics {
379    /// Increments the metrics for a given FDB command.
380    ///
381    /// This method updates the metrics counters based on the type of FDB command.
382    ///
383    /// # Arguments
384    /// * `fdb_command` - The FDB command to increment metrics for
385    pub fn increment(&mut self, fdb_command: &FdbCommand) {
386        match fdb_command {
387            FdbCommand::Atomic => self.call_atomic_op += INCREMENT,
388            FdbCommand::Clear => self.call_clear += INCREMENT,
389            FdbCommand::ClearRange => self.call_clear_range += INCREMENT,
390            FdbCommand::Get(bytes_count, kv_fetched) => {
391                self.keys_values_fetched += *kv_fetched;
392                self.bytes_read += *bytes_count;
393                self.call_get += INCREMENT
394            }
395            FdbCommand::GetRange(bytes_count, keys_values_fetched) => {
396                self.keys_values_fetched += *keys_values_fetched;
397                self.bytes_read += *bytes_count;
398            }
399            FdbCommand::Set(bytes_count) => {
400                self.bytes_written += *bytes_count;
401                self.call_set += INCREMENT
402            }
403        }
404    }
405}
406
407#[cfg(test)]
408mod tests {
409    use super::*;
410    use std::collections::HashSet;
411
412    #[test]
413    fn test_metric_key_equality() {
414        // Test that different combinations of the same labels are considered equal
415        let key1 = MetricKey::new("counter", &[("region", "us-west"), ("service", "api")]);
416        let key2 = MetricKey::new("counter", &[("service", "api"), ("region", "us-west")]);
417
418        // Same labels in different order should be considered equal
419        assert_eq!(key1, key2);
420
421        // Different label values should produce different keys
422        let key3 = MetricKey::new("counter", &[("region", "us-east"), ("service", "api")]);
423        assert_ne!(key1, key3);
424
425        // Different label keys should produce different keys
426        let key4 = MetricKey::new("counter", &[("zone", "us-west"), ("service", "api")]);
427        assert_ne!(key1, key4);
428
429        // Different metric names should produce different keys
430        let key5 = MetricKey::new("timer", &[("region", "us-west"), ("service", "api")]);
431        assert_ne!(key1, key5);
432    }
433
434    #[test]
435    fn test_metric_key_in_hashmap() {
436        let mut metrics = HashMap::new();
437
438        // Insert metrics with different label combinations
439        metrics.insert(
440            MetricKey::new("counter", &[("region", "us-west"), ("service", "api")]),
441            100,
442        );
443        metrics.insert(
444            MetricKey::new("counter", &[("region", "us-east"), ("service", "api")]),
445            200,
446        );
447        metrics.insert(
448            MetricKey::new("timer", &[("region", "us-west"), ("service", "api")]),
449            300,
450        );
451
452        // Verify we can retrieve metrics with the same label combinations
453        assert_eq!(
454            metrics.get(&MetricKey::new(
455                "counter",
456                &[("region", "us-west"), ("service", "api")]
457            )),
458            Some(&100)
459        );
460
461        // Verify we can retrieve metrics with labels in different order
462        assert_eq!(
463            metrics.get(&MetricKey::new(
464                "counter",
465                &[("service", "api"), ("region", "us-west")]
466            )),
467            Some(&100)
468        );
469
470        // Verify different label values produce different keys
471        assert_eq!(
472            metrics.get(&MetricKey::new(
473                "counter",
474                &[("region", "us-east"), ("service", "api")]
475            )),
476            Some(&200)
477        );
478
479        // Verify different metric names produce different keys
480        assert_eq!(
481            metrics.get(&MetricKey::new(
482                "timer",
483                &[("region", "us-west"), ("service", "api")]
484            )),
485            Some(&300)
486        );
487    }
488
489    #[test]
490    fn test_metric_key_label_order_independence() {
491        // Create a HashSet to verify uniqueness
492        let mut unique_keys = HashSet::new();
493
494        // Add keys with the same labels in different orders
495        unique_keys.insert(MetricKey::new(
496            "counter",
497            &[("a", "1"), ("b", "2"), ("c", "3")],
498        ));
499        unique_keys.insert(MetricKey::new(
500            "counter",
501            &[("a", "1"), ("c", "3"), ("b", "2")],
502        ));
503        unique_keys.insert(MetricKey::new(
504            "counter",
505            &[("b", "2"), ("a", "1"), ("c", "3")],
506        ));
507        unique_keys.insert(MetricKey::new(
508            "counter",
509            &[("b", "2"), ("c", "3"), ("a", "1")],
510        ));
511        unique_keys.insert(MetricKey::new(
512            "counter",
513            &[("c", "3"), ("a", "1"), ("b", "2")],
514        ));
515        unique_keys.insert(MetricKey::new(
516            "counter",
517            &[("c", "3"), ("b", "2"), ("a", "1")],
518        ));
519
520        // All permutations should be considered the same key
521        assert_eq!(unique_keys.len(), 1);
522    }
523
524    #[test]
525    fn test_custom_metrics_operations() {
526        // Create a metrics instance
527        let metrics = TransactionMetrics::new();
528
529        // Set initial value for a custom metric
530        metrics.set_custom(
531            "api_calls",
532            100,
533            &[("endpoint", "users"), ("method", "GET")],
534        );
535
536        // Get the metrics data and verify the value was set
537        let data = metrics.get_metrics_data();
538        let key = MetricKey::new("api_calls", &[("endpoint", "users"), ("method", "GET")]);
539        assert_eq!(data.custom_metrics.get(&key).copied(), Some(100));
540
541        // Increment the same metric
542        metrics.increment_custom("api_calls", 50, &[("endpoint", "users"), ("method", "GET")]);
543
544        // Verify the value was incremented
545        let data = metrics.get_metrics_data();
546        assert_eq!(data.custom_metrics.get(&key).copied(), Some(150));
547
548        // Set the same metric to a new value (overwrite)
549        metrics.set_custom(
550            "api_calls",
551            200,
552            &[("endpoint", "users"), ("method", "GET")],
553        );
554
555        // Verify the value was overwritten
556        let data = metrics.get_metrics_data();
557        assert_eq!(data.custom_metrics.get(&key).copied(), Some(200));
558
559        // Test with different label order
560        metrics.increment_custom("api_calls", 75, &[("method", "GET"), ("endpoint", "users")]);
561
562        // Verify the value was incremented regardless of label order
563        let data = metrics.get_metrics_data();
564        assert_eq!(data.custom_metrics.get(&key).copied(), Some(275));
565    }
566}