Cem Keylan: 1 Add support for libgrapheme as an icu replacement 3 files changed, 86 insertions(+), 1 deletions(-)
Copy & paste the following snippet into your terminal to import this patchset into git:
curl -s https://lists.sr.ht/~exec64/imv-devel/patches/27148/mbox | git am -3Learn more about email & git
Hello, I have patched imv to add libgrapheme[1] as an option for a unicode library. I think that icu is a rather large library just to use for cursor placement. This change adds a 'unicode' option to meson_options.txt, it still defaults to icu, but it can be switched to grapheme. [1]: http://git.suckless.org/libgrapheme/ --- meson.build | 11 +++++++- meson_options.txt | 8 ++++++ src/console.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 86 insertions(+), 1 deletion(-) diff --git a/meson.build b/meson.build index 7cf64b5..b2849aa 100644 --- a/meson.build +++ b/meson.build @@ -38,6 +38,15 @@ else target_single_ws = false endif +_unicode = get_option('unicode') +if _unicode == 'icu' + unicode_lib = dependency('icu-io') + add_project_arguments('-DIMV_USE_ICU', language: 'c') +else + unicode_lib = cc.find_library('grapheme') + add_project_arguments('-DIMV_USE_GRAPHEME', language: 'c') +endif + gl_dep = dependency('gl', required: false) if not gl_dep.found() # libglvnd fallback for pure-wayland systems @@ -49,7 +58,7 @@ deps_for_imv = [ gl_dep, dependency('threads'), dependency('xkbcommon'), - dependency('icu-io'), + unicode_lib, dependency('inih', fallback : ['inih', 'inih_dep']), m_dep, ] diff --git a/meson_options.txt b/meson_options.txt index 389b7fd..c13ef7a 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -8,6 +8,14 @@ option('windows', description : 'window system to use' ) +# Unicode backend - default is ICU +option('unicode', + type: 'combo', + value: 'icu', + choices : ['icu', 'grapheme'], + description : 'unicode library to use' +) + option('test', type : 'feature', description : 'enable tests' diff --git a/src/console.c b/src/console.c index 073274f..b36b444 100644 --- a/src/console.c +++ b/src/console.c @@ -6,8 +6,15 @@ #include <ctype.h> #include <stdlib.h> #include <string.h> + +#ifdef IMV_USE_ICU #include <unicode/utext.h> #include <unicode/ubrk.h> +#endif + +#ifdef IMV_USE_GRAPHEME +#include <grapheme.h> +#endif struct imv_console { char *buffer; @@ -22,10 +29,54 @@ struct imv_console { char *history_before; /* contents of line before history was opened */ }; +#ifdef IMV_USE_GRAPHEME +static size_t grapheme_strlen(char *str) +{ + size_t len = 0, ret = 0; + char *s = strdup(str); + while (*s != '\0') { + len = grapheme_bytelen(s); + ret += len; + s += len; + } + return ret; +} + +static int32_t grapheme_following(char *str, size_t strlength, int32_t offset) +{ + int32_t result; + char *s = strdup(str); + s += offset; + if(!((int)offset<(int)strlength)||*s=='\0') + return -1; + result = (int32_t) grapheme_bytelen(s); + return ((result + offset) > strlength) ? -2 : result + offset; +} + +static int32_t grapheme_preceding(char *str, size_t strlength, int32_t offset) +{ + size_t len; + int32_t result = 0; + if (offset<=0) + return -1; + char *s = strdup(str); + while (*s != '\0' && result < strlength) { + len = grapheme_bytelen(s); + s += len; + if ((result + (int32_t)len) < offset) + result += (int32_t)len; + else + return result; + } + return -2; +} +#endif + /* Iterates forwards over characters in a UTF-8 string */ static size_t next_char(char *buffer, size_t position) { size_t result = position; + #ifdef IMV_USE_ICU UErrorCode status = U_ZERO_ERROR; UText *ut = utext_openUTF8(NULL, buffer, -1, &status); @@ -41,6 +92,14 @@ static size_t next_char(char *buffer, size_t position) utext_close(ut); assert(U_SUCCESS(status)); + #elif defined (IMV_USE_GRAPHEME) + size_t length = grapheme_strlen(buffer); + int boundary = grapheme_following(buffer, length, position); + if (!(boundary < 0)) + result = (size_t) boundary; + + assert(boundary != -2); + #endif return result; } @@ -48,6 +107,7 @@ static size_t next_char(char *buffer, size_t position) static size_t prev_char(char *buffer, size_t position) { size_t result = position; + #ifdef IMV_USE_ICU UErrorCode status = U_ZERO_ERROR; UText *ut = utext_openUTF8(NULL, buffer, -1, &status); @@ -63,6 +123,14 @@ static size_t prev_char(char *buffer, size_t position) utext_close(ut); assert(U_SUCCESS(status)); + #elif defined (IMV_USE_GRAPHEME) + size_t length = grapheme_strlen(buffer); + int boundary = grapheme_preceding(buffer, length, position); + if (!(boundary < 0)) + result = (size_t) boundary; + + assert(boundary != -2); + #endif return result; } -- 2.34.1
Hi Cem, Thanks for the patch. I wasn't aware of libgrapheme before and I like its smaller profile. icu is overkill for imv. I've got some suggestions for the patch: Instead of defining new `grapheme_*` functions to be called by `next_char` and `prev_char`, I think we should replace the `next_char` and `prev_char` functions entirely with the #ifdefs depending on whether imv is using icu or grapheme. I also think you could simplify the prev/next functions a bit beyond what you have currently. The logic could be something like... static size_t next_char(const char *buffer, size_t position): return position + grapheme_bytelen(buffer + position) static size_t prev_char(const char *buffer, size_t position): size_t newPosition = 0 do { const size_t step = grapheme_bytelen(buffer + newPosition) if newPosition + step >= position: break newPosition += step } while (step > 0) return newPosition That ought to work, since `next_char` only needs to shift one grapheme cluster forwards. `prev_char` can just search through the string for the last grapheme cluster before it reaches the current position. Plus you wouldn't need to define any additional functions. Harry