~exec64/imv-devel

This thread contains a patchset. You're looking at the original emails, but you may wish to use the patch review UI. Review patch
1

[PATCH] Add support for libgrapheme as an icu replacement

Details
Message ID
<20211206234304.26148-1-cem@ckyln.com>
DKIM signature
missing
Download raw message
Patch: +86 -1
Hello, I have patched imv to add libgrapheme[1] as an option for a unicode
library. I think that icu is a rather large library just to use for
cursor placement.

This change adds a 'unicode' option to meson_options.txt, it still
defaults to icu, but it can be switched to grapheme.

[1]: http://git.suckless.org/libgrapheme/
---
 meson.build       | 11 +++++++-
 meson_options.txt |  8 ++++++
 src/console.c     | 68 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 86 insertions(+), 1 deletion(-)

diff --git a/meson.build b/meson.build
index 7cf64b5..b2849aa 100644
--- a/meson.build
+++ b/meson.build
@@ -38,6 +38,15 @@ else
  target_single_ws = false
endif

_unicode = get_option('unicode')
if _unicode == 'icu'
  unicode_lib = dependency('icu-io')
  add_project_arguments('-DIMV_USE_ICU', language: 'c')
else
  unicode_lib = cc.find_library('grapheme')
  add_project_arguments('-DIMV_USE_GRAPHEME', language: 'c')
endif

gl_dep = dependency('gl', required: false)
if not gl_dep.found()
  # libglvnd fallback for pure-wayland systems
@@ -49,7 +58,7 @@ deps_for_imv = [
  gl_dep,
  dependency('threads'),
  dependency('xkbcommon'),
  dependency('icu-io'),
  unicode_lib,
  dependency('inih', fallback : ['inih', 'inih_dep']),
  m_dep,
]
diff --git a/meson_options.txt b/meson_options.txt
index 389b7fd..c13ef7a 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -8,6 +8,14 @@ option('windows',
  description : 'window system to use'
)

# Unicode backend - default is ICU
option('unicode',
  type: 'combo',
  value: 'icu',
  choices : ['icu', 'grapheme'],
  description : 'unicode library to use'
)

option('test',
  type : 'feature',
  description : 'enable tests'
diff --git a/src/console.c b/src/console.c
index 073274f..b36b444 100644
--- a/src/console.c
+++ b/src/console.c
@@ -6,8 +6,15 @@
#include <ctype.h>
#include <stdlib.h>
#include <string.h>

#ifdef IMV_USE_ICU
#include <unicode/utext.h>
#include <unicode/ubrk.h>
#endif

#ifdef IMV_USE_GRAPHEME
#include <grapheme.h>
#endif

struct imv_console {
  char *buffer;
@@ -22,10 +29,54 @@ struct imv_console {
  char *history_before; /* contents of line before history was opened */
};

#ifdef IMV_USE_GRAPHEME
static size_t grapheme_strlen(char *str)
{
  size_t len = 0, ret = 0;
  char *s = strdup(str);
  while (*s != '\0') {
    len = grapheme_bytelen(s);
    ret += len;
    s   += len;
  }
  return ret;
}

static int32_t grapheme_following(char *str, size_t strlength, int32_t offset)
{
  int32_t result;
  char *s = strdup(str);
  s += offset;
  if(!((int)offset<(int)strlength)||*s=='\0')
    return -1;
  result = (int32_t) grapheme_bytelen(s);
  return ((result + offset) > strlength) ? -2 : result + offset;
}

static int32_t grapheme_preceding(char *str, size_t strlength, int32_t offset)
{
  size_t len;
  int32_t result = 0;
  if (offset<=0)
    return -1;
  char *s = strdup(str);
  while (*s != '\0' && result < strlength) {
    len = grapheme_bytelen(s);
    s += len;
    if ((result + (int32_t)len) < offset)
      result += (int32_t)len;
    else
      return result;
  }
  return -2;
}
#endif

/* Iterates forwards over characters in a UTF-8 string */
static size_t next_char(char *buffer, size_t position)
{
  size_t result = position;
  #ifdef IMV_USE_ICU
  UErrorCode status = U_ZERO_ERROR;
  UText *ut = utext_openUTF8(NULL, buffer, -1, &status);

@@ -41,6 +92,14 @@ static size_t next_char(char *buffer, size_t position)

  utext_close(ut);
  assert(U_SUCCESS(status));
  #elif defined (IMV_USE_GRAPHEME)
  size_t length = grapheme_strlen(buffer);
  int boundary = grapheme_following(buffer, length, position);
  if (!(boundary < 0))
    result = (size_t) boundary;

  assert(boundary != -2);
  #endif
  return result;
}

@@ -48,6 +107,7 @@ static size_t next_char(char *buffer, size_t position)
static size_t prev_char(char *buffer, size_t position)
{
  size_t result = position;
  #ifdef IMV_USE_ICU
  UErrorCode status = U_ZERO_ERROR;
  UText *ut = utext_openUTF8(NULL, buffer, -1, &status);

@@ -63,6 +123,14 @@ static size_t prev_char(char *buffer, size_t position)

  utext_close(ut);
  assert(U_SUCCESS(status));
  #elif defined (IMV_USE_GRAPHEME)
  size_t length = grapheme_strlen(buffer);
  int boundary = grapheme_preceding(buffer, length, position);
  if (!(boundary < 0))
    result = (size_t) boundary;

  assert(boundary != -2);
  #endif
  return result;
}

-- 
2.34.1
Details
Message ID
<CG8OIPSPIZPG.2E4SYU0RVOURM@tpad>
In-Reply-To
<20211206234304.26148-1-cem@ckyln.com> (view parent)
DKIM signature
missing
Download raw message
Hi Cem,

Thanks for the patch. I wasn't aware of libgrapheme before and I like its
smaller profile. icu is overkill for imv.

I've got some suggestions for the patch:

Instead of defining new `grapheme_*` functions to be called by
`next_char` and `prev_char`, I think we should replace the `next_char`
and `prev_char` functions entirely with the #ifdefs depending on
whether imv is using icu or grapheme. I also think you could simplify
the prev/next functions a bit beyond what you have currently. The logic
could be something like...

static size_t next_char(const char *buffer, size_t position):
  return position + grapheme_bytelen(buffer + position)

static size_t prev_char(const char *buffer, size_t position):
  size_t newPosition = 0
  do {
    const size_t step = grapheme_bytelen(buffer + newPosition)
    if newPosition + step >= position:
      break
    newPosition += step
  } while (step > 0)
  return newPosition

That ought to work, since `next_char` only needs to shift one
grapheme cluster forwards. `prev_char` can just search through the
string for the last grapheme cluster before it reaches the current
position. Plus you wouldn't need to define any additional functions.

Harry
Reply to thread Export thread (mbox)