~ne02ptzero/libfloat

This thread contains a patchset. You're looking at the original emails, but you may wish to use the patch review UI. Review patch
1

[PATCH] log: Try to fix stuck replication on some cases

Louis Solofrizzo <lsolofrizzo@scaleway.com>
Details
Message ID
<20220531111717.3996173-1-lsolofrizzo@scaleway.com>
DKIM signature
missing
Download raw message
Patch: +25 -2
Signed-off-by: Louis Solofrizzo <lsolofrizzo@scaleway.com>
---
 log.c  | 24 ++++++++++++++++++++++--
 node.h |  3 +++
 2 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/log.c b/log.c
index d7cd67a..d761f14 100644
--- a/log.c
+++ b/log.c
@@ -660,8 +660,28 @@ void libfloat_append_entries_response(libfloat_ctx_t *ctx, libfloat_rpc_append_e
                return;
            }

            ERROR(ctx, "libfloat_append_entries_response: node %d: received current_index (%u) older than replicated_log (%u)",
                node->id, resp->current_index, node->replicated_log);
            if (node->announced_log == resp->current_index)
            {
                node->announced_log_count++;
                if (node->announced_log_count == 20)
                {
                    /* We have 20 consecutive hertbeats telling the same story, let's believe it */

                    node->announced_log_count = 0;
                    node->next_log_to_send = max(resp->current_index + 1, 1);
                    node->replicated_log = resp->current_index;
                    libfloat_send_append_entries(ctx, node, false);
                    return;
                }
            }
            else
            {
                node->announced_log_count = 0;
                node->announced_log = resp->current_index;
            }

            ERROR(ctx, "libfloat_append_entries_response: node %d: received current_index (%u) older than replicated_log (%u) (count=%lu)",
                node->id, resp->current_index, node->replicated_log, node->announced_log_count);
            return;
        }

diff --git a/node.h b/node.h
index f49ce9c..9f0bed8 100644
--- a/node.h
+++ b/node.h
@@ -7,6 +7,9 @@ typedef struct {
    libfloat_entry_id_t next_log_to_send;       /*!< Next log to send to this node */
    libfloat_entry_id_t replicated_log;         /*!< Last known replicated log of this node */

    libfloat_entry_id_t announced_log;
    uint64_t            announced_log_count;

    uint8_t             has_voted_for_me        : 1;
    uint8_t             is_up_to_date           : 1;

-- 
2.36.1
Details
Message ID
<cc7f24fe-5e7f-4246-98a2-0db6fa46caf7@ptrk.io>
In-Reply-To
<20220531111717.3996173-1-lsolofrizzo@scaleway.com> (view parent)
DKIM signature
missing
Download raw message
LG

May 31, 2022 13:17:28 Louis Solofrizzo <lsolofrizzo@scaleway.com>:

> Signed-off-by: Louis Solofrizzo <lsolofrizzo@scaleway.com>
> ---
> log.c  | 24 ++++++++++++++++++++++--
> node.h |  3 +++
> 2 files changed, 25 insertions(+), 2 deletions(-)
>
> diff --git a/log.c b/log.c
> index d7cd67a..d761f14 100644
> --- a/log.c
> +++ b/log.c
> @@ -660,8 +660,28 @@ void libfloat_append_entries_response(libfloat_ctx_t *ctx, libfloat_rpc_append_e
>                  return;
>              }
>
> -            ERROR(ctx, "libfloat_append_entries_response: node %d: received current_index (%u) older than replicated_log (%u)",
> -                node->id, resp->current_index, node->replicated_log);
> +            if (node->announced_log == resp->current_index)
> +            {
> +                node->announced_log_count++;
> +                if (node->announced_log_count == 20)
> +                {
> +                    /* We have 20 consecutive hertbeats telling the same story, let's believe it */
> +
> +                    node->announced_log_count = 0;
> +                    node->next_log_to_send = max(resp->current_index + 1, 1);
> +                    node->replicated_log = resp->current_index;
> +                    libfloat_send_append_entries(ctx, node, false);
> +                    return;
> +                }
> +            }
> +            else
> +            {
> +                node->announced_log_count = 0;
> +                node->announced_log = resp->current_index;
> +            }
> +
> +            ERROR(ctx, "libfloat_append_entries_response: node %d: received current_index (%u) older than replicated_log (%u) (count=%lu)",
> +                node->id, resp->current_index, node->replicated_log, node->announced_log_count);
>              return;
>          }
>
> diff --git a/node.h b/node.h
> index f49ce9c..9f0bed8 100644
> --- a/node.h
> +++ b/node.h
> @@ -7,6 +7,9 @@ typedef struct {
>      libfloat_entry_id_t next_log_to_send;       /*!< Next log to send to this node */
>      libfloat_entry_id_t replicated_log;         /*!< Last known replicated log of this node */
>
> +    libfloat_entry_id_t announced_log;
> +    uint64_t            announced_log_count;
> +
>      uint8_t             has_voted_for_me        : 1;
>      uint8_t             is_up_to_date           : 1;
>
> --
> 2.36.1
Reply to thread Export thread (mbox)