foundationdb/
timekeeper.rs

1//! There is a key range called TimeKeeper in the system key space which stores a rolling history window
2//! of time to version mappings, with one data point every 10 seconds.
3//! It is not exposed via any user-facing API, though of course the data can be read by a user.
4//! It is not an official database feature and should not be relied on for anything where accuracy
5//! is critical as nothing prevents or detects system clock skew on the FDB process logging these data points.
6//!
7//! TimeKeeper is used by backup and restore to convert timestamps to approximate versions and versions
8//! to approximate timestamps to make reasoning about backup data and restore operations easier.
9//! Lookups work by finding the nearest value for the query version or timestamp, taking the equivalent other value,
10//! and then adding an adjustment estimate based on 1 million versions per 1 second.
11//! This logic accounts for arbitrary version advancement due to recovery, DR switch operations, or any other reason.
12//!
13//! [source](https://forums.foundationdb.org/t/versionstamp-as-absolute-time/2442/3)
14
15use crate::future::FdbValue;
16use crate::options::TransactionOption;
17use crate::{FdbBindingError, FdbResult, KeySelector, RangeOption, Transaction};
18use foundationdb_tuple::{pack, unpack};
19use futures::StreamExt;
20
21/// Timekeeper keys are stored in a special keyspace
22/// Can be found in the [Java implementation](https://github.com/FoundationDB/fdb-record-layer/blob/main/fdb-extensions/src/main/java/com/apple/foundationdb/system/SystemKeyspace.java#L80)
23const TIME_KEEPER_PREFIX: &[u8] = b"\xff\x02/timeKeeper/map/";
24
25/// Flavor about the mode of scanning
26pub enum HintMode {
27    /// The read version is ensure to be before the timestamp
28    BeforeTimestamp,
29    /// The read version is ensured to be after the timestamp
30    AfterTimestamp,
31}
32
33/// Try to get a version ID closer as possible as the asked timestamp
34///
35/// If no result are found, either your timestamp is in the future of the
36/// Timekeeper or the data has been rolled by fresh ones.
37///
38/// The layout os follow:
39///
40/// TIME_KEEPER_PREFIX/timestamp1 => read_version1
41/// TIME_KEEPER_PREFIX/timestamp2 => read_version2
42/// TIME_KEEPER_PREFIX/timestamp3 => read_version3
43///
44/// Each key are associated to a pack read version on 8 bytes
45/// compatible with an i64.
46///
47/// Timestamps are provided from unix time as seconds
48pub async fn hint_version_from_timestamp(
49    trx: &Transaction,
50    unix_timestamp_as_seconds: u64,
51    mode: HintMode,
52    snapshot: bool,
53) -> Result<Option<u64>, FdbBindingError> {
54    // Timekeeper range keys are stored in /0x00/0x02 system namespace
55    // to be able to read this range, the transaction must have
56    // capabilities to read System Keys
57    trx.set_option(TransactionOption::ReadSystemKeys)?;
58    // The profiling should be used even when the Database has been locked
59    trx.set_option(TransactionOption::ReadLockAware)?;
60
61    // Timekeeper keys are defined has prefix/timestamp
62    let mut start_key_bytes = TIME_KEEPER_PREFIX.to_vec();
63    start_key_bytes.extend_from_slice(&pack(&unix_timestamp_as_seconds));
64    // we get the first key greater than this value because timekeeper doesn't tick
65    // each seconds but rather each 10 seconds but not each time
66    let start_key = KeySelector::first_greater_or_equal(start_key_bytes.clone());
67
68    // The end of the scan is the end of the timekeeper range
69    // but we won't scan it the whole range
70    let mut end_key_bytes = TIME_KEEPER_PREFIX.to_vec();
71    end_key_bytes.extend_from_slice(b"\xff");
72    let end_key = KeySelector::first_greater_than(end_key_bytes);
73
74    let mut range = match mode {
75        HintMode::AfterTimestamp => RangeOption::from((start_key, end_key)),
76        HintMode::BeforeTimestamp => {
77            let mut range = RangeOption::from((
78                KeySelector::first_greater_than(TIME_KEEPER_PREFIX),
79                start_key,
80            ));
81            range.reverse = true;
82            range
83        }
84    };
85    // No need to scan further than the next and previous key
86    range.limit = Some(1);
87
88    // We get the first key matching our start range bound
89    let results = trx
90        .get_ranges_keyvalues(range, snapshot)
91        .take(1)
92        .collect::<Vec<FdbResult<FdbValue>>>()
93        .await;
94
95    // If any result then the value found will be the read version ID
96    if let Some(Ok(kv)) = results.first() {
97        let version = unpack(kv.value()).map_err(FdbBindingError::PackError)?;
98        return Ok(Some(version));
99    }
100    // otherwise timestamp too old or is future
101    Ok(None)
102}