~ne02ptzero/libfloat

Ensure libfloat is resistant to clock drifting v1 SUPERSEDED

Julien Egloff: 1
 Ensure libfloat is resistant to clock drifting

 8 files changed, 25 insertions(+), 21 deletions(-)
Export patchset (mbox)
How do I use this?

Copy & paste the following snippet into your terminal to import this patchset into git:

curl -s https://lists.sr.ht/~ne02ptzero/libfloat/patches/42298/mbox | git am -3
Learn more about email & git

[PATCH] Ensure libfloat is resistant to clock drifting Export this patch

Signed-off-by: Julien Egloff <jegloff@scaleway.com>
---
 election.c |  4 ++--
 libfloat.c |  4 ++--
 libfloat.h | 13 ++++++++-----
 log.c      |  4 ++--
 log.h      |  2 +-
 node.h     |  2 +-
 periodic.c | 13 +++++++------
 raft.c     |  4 ++--
 8 files changed, 25 insertions(+), 21 deletions(-)

diff --git a/election.c b/election.c
index 4239e0c..8fa5e09 100644
--- a/election.c
+++ b/election.c
@@ -278,7 +278,7 @@ void libfloat_update_leader(libfloat_ctx_t *ctx, libfloat_node_t *node)
    if (ctx->leader != NULL && node == LIBFLOAT_NO_LEADER)
    {
        /* We've just lost our leader! */
        ctx->lost_leader_time = ctx->time(NULL);
        ctx->lost_leader_time = ctx->global_timer;

        ctx->write_current_leader(ctx, 0);
    }
@@ -286,7 +286,7 @@ void libfloat_update_leader(libfloat_ctx_t *ctx, libfloat_node_t *node)
    if (ctx->leader == LIBFLOAT_NO_LEADER && node != NULL)
    {
        /* We have a new leader! */
        ctx->stat.last_election_duration = ctx->time(NULL) - ctx->lost_leader_time;
        ctx->stat.last_election_duration = MS_TO_S(ctx->global_timer - ctx->lost_leader_time);
        ctx->stat.leader_election++;

        ctx->write_current_leader(ctx, node->id);
diff --git a/libfloat.c b/libfloat.c
index 14d7a4a..ac240ef 100644
--- a/libfloat.c
+++ b/libfloat.c
@@ -136,7 +136,7 @@ void libfloat_update_last_update(libfloat_ctx_t *ctx, libfloat_node_id_t id)
        libfloat_node_t         *node = libfloat_get_node(ctx, id);

        if (node != NULL)
            node->last_update = ctx->time(NULL);
            node->last_update = MS_TO_S(ctx->global_timer);
    }
}

@@ -165,7 +165,7 @@ void libfloat_wake_up(libfloat_ctx_t *ctx)
            ctx->rand() % ctx->conf.election_timeout;

        ctx->deep_sleep_state = 0;
        ctx->last_log_time = ctx->time(NULL);
        ctx->last_log_time = MS_TO_S(ctx->global_timer);
        ctx->write_current_sleep_state(ctx, ctx->deep_sleep_state, ctx->conf.original.deep_sleep_time);
    }
}
diff --git a/libfloat.h b/libfloat.h
index 9a529b4..f2b12a4 100644
--- a/libfloat.h
+++ b/libfloat.h
@@ -43,12 +43,13 @@ struct libfloat_ctx_s {
        } snapshot;
    } persistent;


    struct {
        uint64_t        leader_election;                /*!< Count of leader elections for this cluster */
        uint64_t        orphans_logs;                   /*!< Count of logs that are applied on the leader only */
        time_t          leader_election_time;           /*!< Timestamp of the last leader election */
        uint64_t        leader_election_time;           /*!< Timestamp of the last leader election */
        const char      *last_election_reason;          /*!< Last reason we had an election */
        time_t          last_election_duration;         /*!< Time it took to elect a leader for the last election */
        uint64_t        last_election_duration;         /*!< Time it took to elect a leader for the last election */
    } stat;

    struct {
@@ -76,20 +77,22 @@ struct libfloat_ctx_s {
    libfloat_node_t             *leader;                /*!< Current Leader (can be NULL) */
    libfloat_node_t             *me;                    /*!< My node in the cluster */

#define MS_TO_S(milliseconds) (milliseconds / 1000)
    uint64_t                    global_timer;           /*!< Timer used for synchronisation, in millisconds */
    uint32_t                    timeout_elapsed;        /*!< Current time elasped between two heartbeats */
    uint32_t                    election_timeout_rand;  /*!< Randomized election time */
    uint32_t                    request_timeout;        /*!< Timeout for AE */
    uint32_t                    logs_check;             /*!< Last check counter of log stuck */
    bool                        stepping_down;          /*!< Is the node stepping down from leadership */
    time_t                      lost_leader_time;       /*!< Timestamp that we lost our leader */
    time_t                      last_log_time;          /*!< Last time we applied a log */
    uint64_t                    lost_leader_time;       /*!< Timestamp that we lost our leader */
    uint64_t                    last_log_time;          /*!< Last time we applied a log */
#define LIBFLOAT_DEEP_SLEEP_STATE_MAX 4
    uint32_t                    deep_sleep_state;       /*!< Deep sleep state of the cluster */

    struct {
        bool                    checking;               /*!< Are we checking that every node has lost the leader? */
        bool                    recovering;             /*!< We've just recoverd a leader from a gray-failure check */
        time_t                  check_time;             /*!< Time that we launched the check */
        uint64_t                check_time;             /*!< Time that we launched the check */
    } gray_failures;


diff --git a/log.c b/log.c
index 53bf93d..7d953ab 100644
--- a/log.c
+++ b/log.c
@@ -86,7 +86,7 @@ bool libfloat_add_log(libfloat_ctx_t *ctx, libfloat_commit_type_t commit_type, l
    entry->commit = commit;
    entry->udata = udata;
    entry->commit_type = commit_type;
    entry->started = ctx->time(NULL);
    entry->started = MS_TO_S(ctx->global_timer);

    entry->node_acks.prev = &entry->node_acks;
    entry->node_acks.next = &entry->node_acks;
@@ -628,7 +628,7 @@ void libfloat_append_entries_response(libfloat_ctx_t *ctx, libfloat_rpc_append_e
    }

    /* Set the next index to sent to the node */
    node->last_update = ctx->time(NULL);
    node->last_update = MS_TO_S(ctx->global_timer);

    if (node->replicated_log < resp->current_index)
    {
diff --git a/log.h b/log.h
index 1ff2b66..bb20ba0 100644
--- a/log.h
+++ b/log.h
@@ -41,7 +41,7 @@ typedef struct {
    void                        (*commit)(void *, libfloat_commit_status_t);    /*!< Commit callback */
    libfloat_commit_type_t      commit_type;                                    /*!< Commit type */
    void                        *udata;                                         /*!< User data for callback */
    time_t                      started;                                        /*!< Age of the log */
    uint64_t                    started;                                        /*!< Age of the log */
    libfloat_list_t             node_acks;                                      /*!< List of nodes that have replicated this log */

    libfloat_list_t             next;
diff --git a/node.h b/node.h
index 654f275..c614d61 100644
--- a/node.h
+++ b/node.h
@@ -17,7 +17,7 @@ typedef struct {
    uint8_t             has_responded_to_leader_check   : 1;

    void                *udata;                 /*!< User data */
    time_t              last_update;            /*!< Time of the last AE response (If I am the leader) */
    uint64_t            last_update;            /*!< Time of the last AE response (If I am the leader) */
    int                 snapshot_count;         /*!< Count of the times we are supposed to send a snapshot */
    bool                hearbeating;
} libfloat_node_t;
diff --git a/periodic.c b/periodic.c
index 4e55316..987257e 100644
--- a/periodic.c
+++ b/periodic.c
@@ -3,14 +3,15 @@
void libfloat_periodic(libfloat_ctx_t *ctx, uint32_t time)
{
    ctx->timeout_elapsed += time;
    ctx->global_timer += time;

    if (ctx->conf.deep_sleep_time != 0)
    {
        if (ctx->last_log_time + ctx->conf.deep_sleep_time < ctx->time(NULL))
        if (ctx->last_log_time + ctx->conf.deep_sleep_time < MS_TO_S(ctx->global_timer))
        {
            if (ctx->deep_sleep_state < LIBFLOAT_DEEP_SLEEP_STATE_MAX)
            {
                ctx->last_log_time = ctx->time(NULL);
                ctx->last_log_time = MS_TO_S(ctx->global_timer);

                ctx->conf.deep_sleep_time *= 2;
                ctx->conf.election_timeout *= 2;
@@ -39,7 +40,7 @@ void libfloat_periodic(libfloat_ctx_t *ctx, uint32_t time)
                    {
                        nodes_reachable++;
                    }
                    else if (node->last_update + ctx->conf.sanity_timeout > ctx->time(NULL))
                    else if (node->last_update + ctx->conf.sanity_timeout > MS_TO_S(ctx->global_timer))
                    {
                        nodes_reachable++;
                    }
@@ -68,7 +69,7 @@ void libfloat_periodic(libfloat_ctx_t *ctx, uint32_t time)
        if (ctx->logs_check >= 5)
        {
            libfloat_log_entry_t        *entry, *n = NULL;
            time_t                      now = ctx->time(NULL);
            uint64_t                    now = MS_TO_S(ctx->global_timer);

            libfloat_list_for_each_entry_safe(entry, n, &ctx->logs, next)
            {
@@ -138,7 +139,7 @@ void libfloat_periodic(libfloat_ctx_t *ctx, uint32_t time)

                    /* We did not fire the requests yet, let's init stuff and do it */
                    ctx->gray_failures.checking = true;
                    ctx->gray_failures.check_time = ctx->time(NULL);
                    ctx->gray_failures.check_time = MS_TO_S(ctx->global_timer);

                    ERROR(ctx, "Launching leader-check because timeout has been reached");
                    for_every_node(ctx, node, {
@@ -158,7 +159,7 @@ void libfloat_periodic(libfloat_ctx_t *ctx, uint32_t time)
                    uint32_t    node_with_leaders = 0;
                    uint32_t    node_responses = 0;

                    if (ctx->gray_failures.check_time + (ctx->conf.election_timeout / 1000) < ctx->time(NULL))
                    if (ctx->gray_failures.check_time + (ctx->conf.election_timeout / 1000) < ctx->global_timer)
                    {
                        /* We did not receive any reponses, and the election timeout has expired twice, let's launch an election */
                        ctx->gray_failures.checking = false;
diff --git a/raft.c b/raft.c
index 38c1f93..d45b9be 100644
--- a/raft.c
+++ b/raft.c
@@ -23,7 +23,7 @@ void libfloat_become_leader(libfloat_ctx_t *ctx)
    });

    ctx->me->is_up_to_date = 1;
    ctx->stat.leader_election_time = ctx->time(NULL);
    ctx->stat.leader_election_time = MS_TO_S(ctx->global_timer); 
}

void __libfloat_become_candidate(libfloat_ctx_t *ctx, libfloat_elections_args_t *args)
@@ -90,7 +90,7 @@ void libfloat_set_current_commit_index(libfloat_ctx_t *ctx, libfloat_entry_id_t
    if (id <= ctx->persistent.commit_index)
        return;

    ctx->last_log_time = ctx->time(NULL);
    ctx->last_log_time = MS_TO_S(ctx->global_timer);
    libfloat_wake_up(ctx);

    ctx->persistent.commit_index = id;
-- 
2.41.0