Julien Egloff: 1 Ensure libfloat is resistant to clock drifting 8 files changed, 25 insertions(+), 21 deletions(-)
Copy & paste the following snippet into your terminal to import this patchset into git:
curl -s https://lists.sr.ht/~ne02ptzero/libfloat/patches/42449/mbox | git am -3Learn more about email & git
Signed-off-by: Julien Egloff <jegloff@scaleway.com> --- election.c | 4 ++-- libfloat.c | 4 ++-- libfloat.h | 13 ++++++++----- log.c | 4 ++-- log.h | 2 +- node.h | 2 +- periodic.c | 13 +++++++------ raft.c | 4 ++-- 8 files changed, 25 insertions(+), 21 deletions(-) diff --git a/election.c b/election.c index 4239e0c..8fa5e09 100644 --- a/election.c +++ b/election.c @@ -278,7 +278,7 @@ void libfloat_update_leader(libfloat_ctx_t *ctx, libfloat_node_t *node) if (ctx->leader != NULL && node == LIBFLOAT_NO_LEADER) { /* We've just lost our leader! */ - ctx->lost_leader_time = ctx->time(NULL); + ctx->lost_leader_time = ctx->global_timer; ctx->write_current_leader(ctx, 0); } @@ -286,7 +286,7 @@ void libfloat_update_leader(libfloat_ctx_t *ctx, libfloat_node_t *node) if (ctx->leader == LIBFLOAT_NO_LEADER && node != NULL) { /* We have a new leader! */ - ctx->stat.last_election_duration = ctx->time(NULL) - ctx->lost_leader_time; + ctx->stat.last_election_duration = MS_TO_S(ctx->global_timer - ctx->lost_leader_time); ctx->stat.leader_election++; ctx->write_current_leader(ctx, node->id); diff --git a/libfloat.c b/libfloat.c index 14d7a4a..ac240ef 100644 --- a/libfloat.c +++ b/libfloat.c @@ -136,7 +136,7 @@ void libfloat_update_last_update(libfloat_ctx_t *ctx, libfloat_node_id_t id) libfloat_node_t *node = libfloat_get_node(ctx, id); if (node != NULL) - node->last_update = ctx->time(NULL); + node->last_update = MS_TO_S(ctx->global_timer); } } @@ -165,7 +165,7 @@ void libfloat_wake_up(libfloat_ctx_t *ctx) ctx->rand() % ctx->conf.election_timeout; ctx->deep_sleep_state = 0; - ctx->last_log_time = ctx->time(NULL); + ctx->last_log_time = MS_TO_S(ctx->global_timer); ctx->write_current_sleep_state(ctx, ctx->deep_sleep_state, ctx->conf.original.deep_sleep_time); } } diff --git a/libfloat.h b/libfloat.h index 9a529b4..1ea1bc4 100644 --- a/libfloat.h +++ b/libfloat.h @@ -43,12 +43,13 @@ struct libfloat_ctx_s { } snapshot; } persistent; + struct { uint64_t leader_election; /*!< Count of leader elections for this cluster */ uint64_t orphans_logs; /*!< Count of logs that are applied on the leader only */ - time_t leader_election_time; /*!< Timestamp of the last leader election */ + uint64_t leader_election_time; /*!< Timestamp of the last leader election */ const char *last_election_reason; /*!< Last reason we had an election */ - time_t last_election_duration; /*!< Time it took to elect a leader for the last election */ + uint64_t last_election_duration; /*!< Time it took to elect a leader for the last election */ } stat; struct { @@ -76,20 +77,22 @@ struct libfloat_ctx_s { libfloat_node_t *leader; /*!< Current Leader (can be NULL) */ libfloat_node_t *me; /*!< My node in the cluster */ +#define MS_TO_S(milliseconds) ((milliseconds) / 1000) + uint64_t global_timer; /*!< Timer used for synchronisation, in millisconds */ uint32_t timeout_elapsed; /*!< Current time elasped between two heartbeats */ uint32_t election_timeout_rand; /*!< Randomized election time */ uint32_t request_timeout; /*!< Timeout for AE */ uint32_t logs_check; /*!< Last check counter of log stuck */ bool stepping_down; /*!< Is the node stepping down from leadership */ - time_t lost_leader_time; /*!< Timestamp that we lost our leader */ - time_t last_log_time; /*!< Last time we applied a log */ + uint64_t lost_leader_time; /*!< Timestamp that we lost our leader */ + uint64_t last_log_time; /*!< Last time we applied a log */ #define LIBFLOAT_DEEP_SLEEP_STATE_MAX 4 uint32_t deep_sleep_state; /*!< Deep sleep state of the cluster */ struct { bool checking; /*!< Are we checking that every node has lost the leader? */ bool recovering; /*!< We've just recoverd a leader from a gray-failure check */ - time_t check_time; /*!< Time that we launched the check */ + uint64_t check_time; /*!< Time that we launched the check */ } gray_failures; diff --git a/log.c b/log.c index 53bf93d..7d953ab 100644 --- a/log.c +++ b/log.c @@ -86,7 +86,7 @@ bool libfloat_add_log(libfloat_ctx_t *ctx, libfloat_commit_type_t commit_type, l entry->commit = commit; entry->udata = udata; entry->commit_type = commit_type; - entry->started = ctx->time(NULL); + entry->started = MS_TO_S(ctx->global_timer); entry->node_acks.prev = &entry->node_acks; entry->node_acks.next = &entry->node_acks; @@ -628,7 +628,7 @@ void libfloat_append_entries_response(libfloat_ctx_t *ctx, libfloat_rpc_append_e } /* Set the next index to sent to the node */ - node->last_update = ctx->time(NULL); + node->last_update = MS_TO_S(ctx->global_timer); if (node->replicated_log < resp->current_index) { diff --git a/log.h b/log.h index 1ff2b66..bb20ba0 100644 --- a/log.h +++ b/log.h @@ -41,7 +41,7 @@ typedef struct { void (*commit)(void *, libfloat_commit_status_t); /*!< Commit callback */ libfloat_commit_type_t commit_type; /*!< Commit type */ void *udata; /*!< User data for callback */ - time_t started; /*!< Age of the log */ + uint64_t started; /*!< Age of the log */ libfloat_list_t node_acks; /*!< List of nodes that have replicated this log */ libfloat_list_t next; diff --git a/node.h b/node.h index 654f275..c614d61 100644 --- a/node.h +++ b/node.h @@ -17,7 +17,7 @@ typedef struct { uint8_t has_responded_to_leader_check : 1; void *udata; /*!< User data */ - time_t last_update; /*!< Time of the last AE response (If I am the leader) */ + uint64_t last_update; /*!< Time of the last AE response (If I am the leader) */ int snapshot_count; /*!< Count of the times we are supposed to send a snapshot */ bool hearbeating; } libfloat_node_t; diff --git a/periodic.c b/periodic.c index 4e55316..987257e 100644 --- a/periodic.c +++ b/periodic.c @@ -3,14 +3,15 @@ void libfloat_periodic(libfloat_ctx_t *ctx, uint32_t time) { ctx->timeout_elapsed += time; + ctx->global_timer += time; if (ctx->conf.deep_sleep_time != 0) { - if (ctx->last_log_time + ctx->conf.deep_sleep_time < ctx->time(NULL)) + if (ctx->last_log_time + ctx->conf.deep_sleep_time < MS_TO_S(ctx->global_timer)) { if (ctx->deep_sleep_state < LIBFLOAT_DEEP_SLEEP_STATE_MAX) { - ctx->last_log_time = ctx->time(NULL); + ctx->last_log_time = MS_TO_S(ctx->global_timer); ctx->conf.deep_sleep_time *= 2; ctx->conf.election_timeout *= 2; @@ -39,7 +40,7 @@ void libfloat_periodic(libfloat_ctx_t *ctx, uint32_t time) { nodes_reachable++; } - else if (node->last_update + ctx->conf.sanity_timeout > ctx->time(NULL)) + else if (node->last_update + ctx->conf.sanity_timeout > MS_TO_S(ctx->global_timer)) { nodes_reachable++; } @@ -68,7 +69,7 @@ void libfloat_periodic(libfloat_ctx_t *ctx, uint32_t time) if (ctx->logs_check >= 5) { libfloat_log_entry_t *entry, *n = NULL; - time_t now = ctx->time(NULL); + uint64_t now = MS_TO_S(ctx->global_timer); libfloat_list_for_each_entry_safe(entry, n, &ctx->logs, next) { @@ -138,7 +139,7 @@ void libfloat_periodic(libfloat_ctx_t *ctx, uint32_t time) /* We did not fire the requests yet, let's init stuff and do it */ ctx->gray_failures.checking = true; - ctx->gray_failures.check_time = ctx->time(NULL); + ctx->gray_failures.check_time = MS_TO_S(ctx->global_timer); ERROR(ctx, "Launching leader-check because timeout has been reached"); for_every_node(ctx, node, { @@ -158,7 +159,7 @@ void libfloat_periodic(libfloat_ctx_t *ctx, uint32_t time) uint32_t node_with_leaders = 0; uint32_t node_responses = 0; - if (ctx->gray_failures.check_time + (ctx->conf.election_timeout / 1000) < ctx->time(NULL)) + if (ctx->gray_failures.check_time + (ctx->conf.election_timeout / 1000) < ctx->global_timer) { /* We did not receive any reponses, and the election timeout has expired twice, let's launch an election */ ctx->gray_failures.checking = false; diff --git a/raft.c b/raft.c index 38c1f93..d45b9be 100644 --- a/raft.c +++ b/raft.c @@ -23,7 +23,7 @@ void libfloat_become_leader(libfloat_ctx_t *ctx) }); ctx->me->is_up_to_date = 1; - ctx->stat.leader_election_time = ctx->time(NULL); + ctx->stat.leader_election_time = MS_TO_S(ctx->global_timer); } void __libfloat_become_candidate(libfloat_ctx_t *ctx, libfloat_elections_args_t *args) @@ -90,7 +90,7 @@ void libfloat_set_current_commit_index(libfloat_ctx_t *ctx, libfloat_entry_id_t if (id <= ctx->persistent.commit_index) return; - ctx->last_log_time = ctx->time(NULL); + ctx->last_log_time = MS_TO_S(ctx->global_timer); libfloat_wake_up(ctx); ctx->persistent.commit_index = id; -- 2.41.0