~exec64/imv-devel

Add support for libgrapheme as an icu replacement v1 NEEDS REVISION

Cem Keylan: 1
 Add support for libgrapheme as an icu replacement

 3 files changed, 86 insertions(+), 1 deletions(-)
Export patchset (mbox)
How do I use this?

Copy & paste the following snippet into your terminal to import this patchset into git:

curl -s https://lists.sr.ht/~exec64/imv-devel/patches/27148/mbox | git am -3
Learn more about email & git

[PATCH] Add support for libgrapheme as an icu replacement Export this patch

Hello, I have patched imv to add libgrapheme[1] as an option for a unicode
library. I think that icu is a rather large library just to use for
cursor placement.

This change adds a 'unicode' option to meson_options.txt, it still
defaults to icu, but it can be switched to grapheme.

[1]: http://git.suckless.org/libgrapheme/
---
 meson.build       | 11 +++++++-
 meson_options.txt |  8 ++++++
 src/console.c     | 68 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 86 insertions(+), 1 deletion(-)

diff --git a/meson.build b/meson.build
index 7cf64b5..b2849aa 100644
--- a/meson.build
+++ b/meson.build
@@ -38,6 +38,15 @@ else
  target_single_ws = false
endif

_unicode = get_option('unicode')
if _unicode == 'icu'
  unicode_lib = dependency('icu-io')
  add_project_arguments('-DIMV_USE_ICU', language: 'c')
else
  unicode_lib = cc.find_library('grapheme')
  add_project_arguments('-DIMV_USE_GRAPHEME', language: 'c')
endif

gl_dep = dependency('gl', required: false)
if not gl_dep.found()
  # libglvnd fallback for pure-wayland systems
@@ -49,7 +58,7 @@ deps_for_imv = [
  gl_dep,
  dependency('threads'),
  dependency('xkbcommon'),
  dependency('icu-io'),
  unicode_lib,
  dependency('inih', fallback : ['inih', 'inih_dep']),
  m_dep,
]
diff --git a/meson_options.txt b/meson_options.txt
index 389b7fd..c13ef7a 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -8,6 +8,14 @@ option('windows',
  description : 'window system to use'
)

# Unicode backend - default is ICU
option('unicode',
  type: 'combo',
  value: 'icu',
  choices : ['icu', 'grapheme'],
  description : 'unicode library to use'
)

option('test',
  type : 'feature',
  description : 'enable tests'
diff --git a/src/console.c b/src/console.c
index 073274f..b36b444 100644
--- a/src/console.c
+++ b/src/console.c
@@ -6,8 +6,15 @@
#include <ctype.h>
#include <stdlib.h>
#include <string.h>

#ifdef IMV_USE_ICU
#include <unicode/utext.h>
#include <unicode/ubrk.h>
#endif

#ifdef IMV_USE_GRAPHEME
#include <grapheme.h>
#endif

struct imv_console {
  char *buffer;
@@ -22,10 +29,54 @@ struct imv_console {
  char *history_before; /* contents of line before history was opened */
};

#ifdef IMV_USE_GRAPHEME
static size_t grapheme_strlen(char *str)
{
  size_t len = 0, ret = 0;
  char *s = strdup(str);
  while (*s != '\0') {
    len = grapheme_bytelen(s);
    ret += len;
    s   += len;
  }
  return ret;
}

static int32_t grapheme_following(char *str, size_t strlength, int32_t offset)
{
  int32_t result;
  char *s = strdup(str);
  s += offset;
  if(!((int)offset<(int)strlength)||*s=='\0')
    return -1;
  result = (int32_t) grapheme_bytelen(s);
  return ((result + offset) > strlength) ? -2 : result + offset;
}

static int32_t grapheme_preceding(char *str, size_t strlength, int32_t offset)
{
  size_t len;
  int32_t result = 0;
  if (offset<=0)
    return -1;
  char *s = strdup(str);
  while (*s != '\0' && result < strlength) {
    len = grapheme_bytelen(s);
    s += len;
    if ((result + (int32_t)len) < offset)
      result += (int32_t)len;
    else
      return result;
  }
  return -2;
}
#endif

/* Iterates forwards over characters in a UTF-8 string */
static size_t next_char(char *buffer, size_t position)
{
  size_t result = position;
  #ifdef IMV_USE_ICU
  UErrorCode status = U_ZERO_ERROR;
  UText *ut = utext_openUTF8(NULL, buffer, -1, &status);

@@ -41,6 +92,14 @@ static size_t next_char(char *buffer, size_t position)

  utext_close(ut);
  assert(U_SUCCESS(status));
  #elif defined (IMV_USE_GRAPHEME)
  size_t length = grapheme_strlen(buffer);
  int boundary = grapheme_following(buffer, length, position);
  if (!(boundary < 0))
    result = (size_t) boundary;

  assert(boundary != -2);
  #endif
  return result;
}

@@ -48,6 +107,7 @@ static size_t next_char(char *buffer, size_t position)
static size_t prev_char(char *buffer, size_t position)
{
  size_t result = position;
  #ifdef IMV_USE_ICU
  UErrorCode status = U_ZERO_ERROR;
  UText *ut = utext_openUTF8(NULL, buffer, -1, &status);

@@ -63,6 +123,14 @@ static size_t prev_char(char *buffer, size_t position)

  utext_close(ut);
  assert(U_SUCCESS(status));
  #elif defined (IMV_USE_GRAPHEME)
  size_t length = grapheme_strlen(buffer);
  int boundary = grapheme_preceding(buffer, length, position);
  if (!(boundary < 0))
    result = (size_t) boundary;

  assert(boundary != -2);
  #endif
  return result;
}

-- 
2.34.1
Hi Cem,

Thanks for the patch. I wasn't aware of libgrapheme before and I like its
smaller profile. icu is overkill for imv.

I've got some suggestions for the patch:

Instead of defining new `grapheme_*` functions to be called by
`next_char` and `prev_char`, I think we should replace the `next_char`
and `prev_char` functions entirely with the #ifdefs depending on
whether imv is using icu or grapheme. I also think you could simplify
the prev/next functions a bit beyond what you have currently. The logic
could be something like...

static size_t next_char(const char *buffer, size_t position):
  return position + grapheme_bytelen(buffer + position)

static size_t prev_char(const char *buffer, size_t position):
  size_t newPosition = 0
  do {
    const size_t step = grapheme_bytelen(buffer + newPosition)
    if newPosition + step >= position:
      break
    newPosition += step
  } while (step > 0)
  return newPosition

That ought to work, since `next_char` only needs to shift one
grapheme cluster forwards. `prev_char` can just search through the
string for the last grapheme cluster before it reaches the current
position. Plus you wouldn't need to define any additional functions.

Harry