foundationdb/timekeeper.rs
1//! There is a key range called TimeKeeper in the system key space which stores a rolling history window
2//! of time to version mappings, with one data point every 10 seconds.
3//! It is not exposed via any user-facing API, though of course the data can be read by a user.
4//! It is not an official database feature and should not be relied on for anything where accuracy
5//! is critical as nothing prevents or detects system clock skew on the FDB process logging these data points.
6//!
7//! TimeKeeper is used by backup and restore to convert timestamps to approximate versions and versions
8//! to approximate timestamps to make reasoning about backup data and restore operations easier.
9//! Lookups work by finding the nearest value for the query version or timestamp, taking the equivalent other value,
10//! and then adding an adjustment estimate based on 1 million versions per 1 second.
11//! This logic accounts for arbitrary version advancement due to recovery, DR switch operations, or any other reason.
12//!
13//! [source](https://forums.foundationdb.org/t/versionstamp-as-absolute-time/2442/3)
14
15use crate::future::FdbValue;
16use crate::options::TransactionOption;
17use crate::{FdbBindingError, FdbResult, KeySelector, RangeOption, Transaction};
18use foundationdb_tuple::{pack, unpack};
19use futures::StreamExt;
20
21/// Timekeeper keys are stored in a special keyspace
22/// Can be found in the [Java implementation](https://github.com/FoundationDB/fdb-record-layer/blob/main/fdb-extensions/src/main/java/com/apple/foundationdb/system/SystemKeyspace.java#L80)
23const TIME_KEEPER_PREFIX: &[u8] = b"\xff\x02/timeKeeper/map/";
24
25/// Flavor about the mode of scanning
26pub enum HintMode {
27 /// The read version is ensure to be before the timestamp
28 BeforeTimestamp,
29 /// The read version is ensured to be after the timestamp
30 AfterTimestamp,
31}
32
33/// Try to get a version ID closer as possible as the asked timestamp
34///
35/// If no result are found, either your timestamp is in the future of the
36/// Timekeeper or the data has been rolled by fresh ones.
37///
38/// The layout os follow:
39///
40/// TIME_KEEPER_PREFIX/timestamp1 => read_version1
41/// TIME_KEEPER_PREFIX/timestamp2 => read_version2
42/// TIME_KEEPER_PREFIX/timestamp3 => read_version3
43///
44/// Each key are associated to a pack read version on 8 bytes
45/// compatible with an i64.
46///
47/// Timestamps are provided from unix time as seconds
48pub async fn hint_version_from_timestamp(
49 trx: &Transaction,
50 unix_timestamp_as_seconds: u64,
51 mode: HintMode,
52 snapshot: bool,
53) -> Result<Option<u64>, FdbBindingError> {
54 // Timekeeper range keys are stored in /0x00/0x02 system namespace
55 // to be able to read this range, the transaction must have
56 // capabilities to read System Keys
57 trx.set_option(TransactionOption::ReadSystemKeys)?;
58 // The profiling should be used even when the Database has been locked
59 trx.set_option(TransactionOption::ReadLockAware)?;
60
61 // Timekeeper keys are defined has prefix/timestamp
62 let mut start_key_bytes = TIME_KEEPER_PREFIX.to_vec();
63 start_key_bytes.extend_from_slice(&pack(&unix_timestamp_as_seconds));
64 // we get the first key greater than this value because timekeeper doesn't tick
65 // each seconds but rather each 10 seconds but not each time
66 let start_key = KeySelector::first_greater_or_equal(start_key_bytes.clone());
67
68 // The end of the scan is the end of the timekeeper range
69 // but we won't scan it the whole range
70 let mut end_key_bytes = TIME_KEEPER_PREFIX.to_vec();
71 end_key_bytes.extend_from_slice(b"\xff");
72 let end_key = KeySelector::first_greater_than(end_key_bytes);
73
74 let mut range = match mode {
75 HintMode::AfterTimestamp => RangeOption::from((start_key, end_key)),
76 HintMode::BeforeTimestamp => {
77 let mut range = RangeOption::from((
78 KeySelector::first_greater_than(TIME_KEEPER_PREFIX),
79 start_key,
80 ));
81 range.reverse = true;
82 range
83 }
84 };
85 // No need to scan further than the next and previous key
86 range.limit = Some(1);
87
88 // We get the first key matching our start range bound
89 let results = trx
90 .get_ranges_keyvalues(range, snapshot)
91 .take(1)
92 .collect::<Vec<FdbResult<FdbValue>>>()
93 .await;
94
95 // If any result then the value found will be the read version ID
96 if let Some(Ok(kv)) = results.first() {
97 let version = unpack(kv.value()).map_err(FdbBindingError::PackError)?;
98 return Ok(Some(version));
99 }
100 // otherwise timestamp too old or is future
101 Ok(None)
102}