Hello, I have patched imv to add libgrapheme[1] as an option for a unicode
library. I think that icu is a rather large library just to use for
cursor placement.
This change adds a 'unicode' option to meson_options.txt, it still
defaults to icu, but it can be switched to grapheme.
[1]: http://git.suckless.org/libgrapheme/
---
meson.build | 11 +++++++-
meson_options.txt | 8 ++++++
src/console.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 86 insertions(+), 1 deletion(-)
diff --git a/meson.build b/meson.build
index 7cf64b5..b2849aa 100644
--- a/meson.build
+++ b/meson.build
@@ -38,6 +38,15 @@ else
target_single_ws = false
endif
+_unicode = get_option('unicode')
+if _unicode == 'icu'
+ unicode_lib = dependency('icu-io')
+ add_project_arguments('-DIMV_USE_ICU', language: 'c')
+else
+ unicode_lib = cc.find_library('grapheme')
+ add_project_arguments('-DIMV_USE_GRAPHEME', language: 'c')
+endif
+
gl_dep = dependency('gl', required: false)
if not gl_dep.found()
# libglvnd fallback for pure-wayland systems
@@ -49,7 +58,7 @@ deps_for_imv = [
gl_dep,
dependency('threads'),
dependency('xkbcommon'),
- dependency('icu-io'),
+ unicode_lib,
dependency('inih', fallback : ['inih', 'inih_dep']),
m_dep,
]
diff --git a/meson_options.txt b/meson_options.txt
index 389b7fd..c13ef7a 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -8,6 +8,14 @@ option('windows',
description : 'window system to use'
)
+# Unicode backend - default is ICU
+option('unicode',
+ type: 'combo',
+ value: 'icu',
+ choices : ['icu', 'grapheme'],
+ description : 'unicode library to use'
+)
+
option('test',
type : 'feature',
description : 'enable tests'
diff --git a/src/console.c b/src/console.c
index 073274f..b36b444 100644
--- a/src/console.c
+++ b/src/console.c
@@ -6,8 +6,15 @@
#include <ctype.h>
#include <stdlib.h>
#include <string.h>
+
+#ifdef IMV_USE_ICU
#include <unicode/utext.h>
#include <unicode/ubrk.h>
+#endif
+
+#ifdef IMV_USE_GRAPHEME
+#include <grapheme.h>
+#endif
struct imv_console {
char *buffer;
@@ -22,10 +29,54 @@ struct imv_console {
char *history_before; /* contents of line before history was opened */
};
+#ifdef IMV_USE_GRAPHEME
+static size_t grapheme_strlen(char *str)
+{
+ size_t len = 0, ret = 0;
+ char *s = strdup(str);
+ while (*s != '\0') {
+ len = grapheme_bytelen(s);
+ ret += len;
+ s += len;
+ }
+ return ret;
+}
+
+static int32_t grapheme_following(char *str, size_t strlength, int32_t offset)
+{
+ int32_t result;
+ char *s = strdup(str);
+ s += offset;
+ if(!((int)offset<(int)strlength)||*s=='\0')
+ return -1;
+ result = (int32_t) grapheme_bytelen(s);
+ return ((result + offset) > strlength) ? -2 : result + offset;
+}
+
+static int32_t grapheme_preceding(char *str, size_t strlength, int32_t offset)
+{
+ size_t len;
+ int32_t result = 0;
+ if (offset<=0)
+ return -1;
+ char *s = strdup(str);
+ while (*s != '\0' && result < strlength) {
+ len = grapheme_bytelen(s);
+ s += len;
+ if ((result + (int32_t)len) < offset)
+ result += (int32_t)len;
+ else
+ return result;
+ }
+ return -2;
+}
+#endif
+
/* Iterates forwards over characters in a UTF-8 string */
static size_t next_char(char *buffer, size_t position)
{
size_t result = position;
+ #ifdef IMV_USE_ICU
UErrorCode status = U_ZERO_ERROR;
UText *ut = utext_openUTF8(NULL, buffer, -1, &status);
@@ -41,6 +92,14 @@ static size_t next_char(char *buffer, size_t position)
utext_close(ut);
assert(U_SUCCESS(status));
+ #elif defined (IMV_USE_GRAPHEME)
+ size_t length = grapheme_strlen(buffer);
+ int boundary = grapheme_following(buffer, length, position);
+ if (!(boundary < 0))
+ result = (size_t) boundary;
+
+ assert(boundary != -2);
+ #endif
return result;
}
@@ -48,6 +107,7 @@ static size_t next_char(char *buffer, size_t position)
static size_t prev_char(char *buffer, size_t position)
{
size_t result = position;
+ #ifdef IMV_USE_ICU
UErrorCode status = U_ZERO_ERROR;
UText *ut = utext_openUTF8(NULL, buffer, -1, &status);
@@ -63,6 +123,14 @@ static size_t prev_char(char *buffer, size_t position)
utext_close(ut);
assert(U_SUCCESS(status));
+ #elif defined (IMV_USE_GRAPHEME)
+ size_t length = grapheme_strlen(buffer);
+ int boundary = grapheme_preceding(buffer, length, position);
+ if (!(boundary < 0))
+ result = (size_t) boundary;
+
+ assert(boundary != -2);
+ #endif
return result;
}
--
2.34.1
Hi Cem,
Thanks for the patch. I wasn't aware of libgrapheme before and I like its
smaller profile. icu is overkill for imv.
I've got some suggestions for the patch:
Instead of defining new `grapheme_*` functions to be called by
`next_char` and `prev_char`, I think we should replace the `next_char`
and `prev_char` functions entirely with the #ifdefs depending on
whether imv is using icu or grapheme. I also think you could simplify
the prev/next functions a bit beyond what you have currently. The logic
could be something like...
static size_t next_char(const char *buffer, size_t position):
return position + grapheme_bytelen(buffer + position)
static size_t prev_char(const char *buffer, size_t position):
size_t newPosition = 0
do {
const size_t step = grapheme_bytelen(buffer + newPosition)
if newPosition + step >= position:
break
newPosition += step
} while (step > 0)
return newPosition
That ought to work, since `next_char` only needs to shift one
grapheme cluster forwards. `prev_char` can just search through the
string for the last grapheme cluster before it reaches the current
position. Plus you wouldn't need to define any additional functions.
Harry