Louis Solofrizzo: 1 election: Do not spam elections if one is already ongoing 3 files changed, 19 insertions(+), 5 deletions(-)
Copy & paste the following snippet into your terminal to import this patchset into git:
curl -s https://lists.sr.ht/~ne02ptzero/libfloat/patches/36926/mbox | git am -3Learn more about email & git
And more debug here and there Signed-off-by: Louis Solofrizzo <lsolofrizzo@scaleway.com> --- election.c | 19 ++++++++++++++++--- libfloat.h | 1 + periodic.c | 4 ++-- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/election.c b/election.c index fbc15a3..c014756 100644 --- a/election.c +++ b/election.c @@ -24,6 +24,9 @@ void __libfloat_election_start(libfloat_ctx_t *ctx, libfloat_elections_args_t *a libfloat_term_t last_term = 0; libfloat_entry_id_t last_id = 0; + if (ctx->state == RAFT_STATE_CANDIDATE && args->force == false) + return; + DEBUG(ctx, "Election starting! reason=%s", args->reason); /* First, reset the vote of everyone */ for_every_node(ctx, node, { @@ -81,12 +84,15 @@ static bool libfloat_can_i_grant_vote(libfloat_ctx_t *ctx, libfloat_rpc_request_ { if (ctx->persistent.voted_for == req->candidate_id) return true; + + DEBUG(ctx, "Cannot grant vote to node %d: I have already voted for %d", req->candidate_id, ctx->persistent.voted_for); return false; } if (ctx->persistent.commit_index > 0 && libfloat_get_last_term(ctx, NULL, &last_term) == false) { - /* We have failed to retrieve last_ferm from log */ + /* We have failed to retrieve last_term from log */ + DEBUG(ctx, "Cannot grant vote to node %d: Cannot get last term", req->candidate_id); return false; } @@ -94,6 +100,7 @@ static bool libfloat_can_i_grant_vote(libfloat_ctx_t *ctx, libfloat_rpc_request_ if (last_term > req->last_log_term && req->last_log_index <= ctx->persistent.commit_index) { /* We have a superior term and a superior log, we can't grant our vote */ + DEBUG(ctx, "Cannot grant vote to node %d: I have a greater term and a greater log", req->candidate_id); return false; } @@ -103,10 +110,12 @@ static bool libfloat_can_i_grant_vote(libfloat_ctx_t *ctx, libfloat_rpc_request_ /* We have a superior log, we can't grant our vote */ /* There's an election in progress, and we have a superior log, let's try to become leader */ - libfloat_election_start(ctx, .reason = "I have a greater log"); + DEBUG(ctx, "Cannot grant vote to node %d: I have a greater log", req->candidate_id); + libfloat_election_start(ctx, .reason = "I have a greater log", .force = true); return false; } + DEBUG(ctx, "Granted vote to %d", req->candidate_id); /* All good! */ return true; } @@ -140,7 +149,11 @@ void libfloat_request_vote_receive(libfloat_ctx_t *ctx, libfloat_rpc_request_vot { libfloat_set_current_term(ctx, req->term); - libfloat_become_follower(ctx, .reason = "request_vote_receive: our term is lower than remote term"); + if (ctx->state == RAFT_STATE_LEADER) + libfloat_become_follower(ctx, .reason = "request_vote_receive: our term is lower than remote term"); + + /* Vote reset */ + libfloat_vote_for(ctx, 0); ctx->timeout_elapsed = 0; } diff --git a/libfloat.h b/libfloat.h index fe352d5..eb7056d 100644 --- a/libfloat.h +++ b/libfloat.h @@ -292,6 +292,7 @@ struct libfloat_ctx_s { */ typedef struct { const char *reason; /*!< Logic reason for calling a function, for debug purposes */ + bool force; } libfloat_elections_args_t; /*! diff --git a/periodic.c b/periodic.c index cfab6cf..40004b4 100644 --- a/periodic.c +++ b/periodic.c @@ -93,7 +93,7 @@ void libfloat_periodic(libfloat_ctx_t *ctx, uint32_t time) { /* DynamoDB-like elections are not implemented, let's simply start an election */ DEBUG(ctx, "New election: timeout elapsed %d", ctx->timeout_elapsed); - libfloat_election_start(ctx, .reason = "Election timeout, start a new one"); + libfloat_election_start(ctx, .reason = "Election timeout, start a new one", .force = true); ctx->lost_leader_time -= (ctx->conf.election_timeout / 1000); } else @@ -162,7 +162,7 @@ void libfloat_periodic(libfloat_ctx_t *ctx, uint32_t time) { /* We did not receive any reponses, and the election timeout has expired twice, let's launch an election */ ctx->gray_failures.checking = false; - libfloat_election_start(ctx, .reason = "Gray failures: Complete timeout"); + libfloat_election_start(ctx, .reason = "Gray failures: Complete timeout", .force = true); /* XXX: In this specific case, I'm not sure that triggering an election actually does something: * - Either every node in the cluster is down / unreachable, and a quorum will never be reached anyway -- 2.38.1