~ne02ptzero/libfloat

This thread contains a patchset. You're looking at the original emails, but you may wish to use the patch review UI. Review patch
2 2

[PATCH] log: Add metric on orphaned logs

Louis Solofrizzo <lsolofrizzo@scaleway.com>
Details
Message ID
<20220103141603.548953-1-lsolofrizzo@scaleway.com>
DKIM signature
pass
Download raw message
Patch: +7 -0
Add an exposed counter for logs that are committed on the leader but not
anywhere else on a cluster, in order to alert on a possible
network-split.

Signed-off-by: Louis Solofrizzo <lsolofrizzo@scaleway.com>
---
 libfloat.h | 1 +
 periodic.c | 6 ++++++
 2 files changed, 7 insertions(+)

diff --git a/libfloat.h b/libfloat.h
index b4aaf1a..9428cbb 100644
--- a/libfloat.h
+++ b/libfloat.h
@@ -45,6 +45,7 @@ struct libfloat_ctx_s {

    struct {
        uint64_t        leader_election;                /*!< Count of leader elections for this cluster */
        uint64_t        orphans_logs;                   /*!< Count of logs that are applied on the leader only */
    } stat;

    struct {
diff --git a/periodic.c b/periodic.c
index c9796fb..6e9b0ec 100644
--- a/periodic.c
+++ b/periodic.c
@@ -64,6 +64,12 @@ void libfloat_periodic(libfloat_ctx_t *ctx, uint32_t time)
                        ctx->leader == ctx->me
                    );

                    if (libfloat_list_count(&entry->node_acks) == 1)
                    {
                        ERROR(ctx, "Log %d timeout with only leader commited, possible loss of synchronization", entry->id);
                        ctx->stat.orphans_logs++;
                    }

                    entry->commit(entry->udata, LIBFLOAT_ENTRY_TIMEOUT);
                    entry->commit = NULL;
                    libfloat_list_del(&entry->next);
-- 
2.34.1
Details
Message ID
<20220103141937.ixmuuck5upxda4ah@scaleway>
In-Reply-To
<20220103141603.548953-1-lsolofrizzo@scaleway.com> (view parent)
DKIM signature
pass
Download raw message
LG
Details
Message ID
<1E8OE54DEjilVMEK6fpBR97leTgeznieRv0kgfwhc0TihV0eZbnDND_uT9vtM7kRP_TT-0KRS-DZczjvOhMG5un-szVTwQ8qOFvL8V9y-eA=@protonmail.com>
In-Reply-To
<20220103141603.548953-1-lsolofrizzo@scaleway.com> (view parent)
DKIM signature
pass
Download raw message
LG

--
Michael Bonfils
htps://www.murlock.org/

‐‐‐‐‐‐‐ Original Message ‐‐‐‐‐‐‐

On Monday, January 3rd, 2022 at 3:16 PM, Louis Solofrizzo <lsolofrizzo@scaleway.com> wrote:

> Add an exposed counter for logs that are committed on the leader but not
>
> anywhere else on a cluster, in order to alert on a possible
>
> network-split.
>
> Signed-off-by: Louis Solofrizzo lsolofrizzo@scaleway.com
>
> libfloat.h | 1 +
>
> periodic.c | 6 ++++++
>
> 2 files changed, 7 insertions(+)
>
> diff --git a/libfloat.h b/libfloat.h
>
> index b4aaf1a..9428cbb 100644
>
> --- a/libfloat.h
>
> +++ b/libfloat.h
>
> @@ -45,6 +45,7 @@ struct libfloat_ctx_s {
>
> struct {
>
> uint64_t leader_election; /*!< Count of leader elections for this cluster */
>
> -          uint64_t        orphans_logs;                   /*!< Count of logs that are applied on the leader only */
>
>
>     } stat;
>
>     struct {
>
>     diff --git a/periodic.c b/periodic.c
>
>     index c9796fb..6e9b0ec 100644
>
>     --- a/periodic.c
>
>     +++ b/periodic.c
>
>     @@ -64,6 +64,12 @@ void libfloat_periodic(libfloat_ctx_t *ctx, uint32_t time)
>
>     ctx->leader == ctx->me
>
>                        );
>
>
>
> -                      if (libfloat_list_count(&entry->node_acks) == 1)
>
>
> -                      {
>
>
> -                          ERROR(ctx, "Log %d timeout with only leader commited, possible loss of synchronization", entry->id);
>
>
> -                          ctx->stat.orphans_logs++;
>
>
> -                      }
>
>
> -                      entry->commit(entry->udata, LIBFLOAT_ENTRY_TIMEOUT);
>
>                        entry->commit = NULL;
>
>                        libfloat_list_del(&entry->next);
>
>
>
> --
>
> 2.34.1
Reply to thread Export thread (mbox)