From 54cf38b0764c7b9bf656b3f502c3b29ed670c7e6 Mon Sep 17 00:00:00 2001 From: nick black Date: Fri, 10 Sep 2021 01:28:27 -0400 Subject: [PATCH] add ncstrwidth_valid() and documentation #2153 --- USAGE.md | 26 ++++++++++++++++++++------ doc/man/man3/notcurses_cell.3.md | 11 ++++++++++- include/notcurses/notcurses.h | 8 ++++++++ src/lib/notcurses.c | 24 ++++++++++++++++++------ 4 files changed, 56 insertions(+), 13 deletions(-) diff --git a/USAGE.md b/USAGE.md index 5198e9866..ec5ecbff8 100644 --- a/USAGE.md +++ b/USAGE.md @@ -1947,12 +1947,11 @@ nccell_cols(const nccell* c){ } #define NCSTYLE_MASK 0xffffu -#define NCSTYLE_ITALIC 0x0020u -#define NCSTYLE_UNDERLINE 0x0010u -#define NCSTYLE_UNDERCURL 0x0008u -#define NCSTYLE_BOLD 0x0004u -#define NCSTYLE_STRUCK 0x0002u -#define NCSTYLE_BLINK 0x0001u +#define NCSTYLE_ITALIC 0x0010u +#define NCSTYLE_UNDERLINE 0x0008u +#define NCSTYLE_UNDERCURL 0x0004u +#define NCSTYLE_BOLD 0x0002u +#define NCSTYLE_STRUCK 0x0001u #define NCSTYLE_NONE 0 // copy the UTF8-encoded EGC out of the cell, whether simple or complex. the @@ -2079,6 +2078,21 @@ nccells_double_box(struct ncplane* n, uint32_t attr, uint64_t channels, } ``` +It is sometimes useful to find the number of bytes and columns represented by +a UTF-8 string. `ncstrwidth_valid()` returns -1 if it encounters an invalid +character, and the number of columns otherwise. Even if there is an error, if +`validbytes` and/or `validwidth` are not `NULL`, the number of bytes and +columns (respectively) consumed before error are returned via these parameters. + +```c +// Returns the number of columns occupied by a the valid prefix of a multibyte +// (UTF-8) string. If an invalid character is encountered, -1 will be returned, +// and the number of valid bytes and columns will be written into *|validbytes| +// and *|validwidth| (assuming them non-NULL). If the entire string is valid, +// *|validbytes| and *|validwidth| reflect the entire string. +int ncstrwidth_valid(const char* egcs, int* validbytes, int* validwidth); +``` + ### Cell channels API Helpers are provided to manipulate an `nccell`'s `channels` member. They are diff --git a/doc/man/man3/notcurses_cell.3.md b/doc/man/man3/notcurses_cell.3.md index a3c3a759c..50fb701ab 100644 --- a/doc/man/man3/notcurses_cell.3.md +++ b/doc/man/man3/notcurses_cell.3.md @@ -114,6 +114,8 @@ typedef struct nccell { **int ncstrwidth(const char* ***text***)**; +**int ncstrwidth_valid(const char* ***text***, int* ***validbytes***, int* ***validwidth***)**; + # DESCRIPTION Cells make up the framebuffer associated with each plane, with one cell per @@ -140,6 +142,11 @@ ought be considered invalidated by changes to the **nccell** or **egcpool**. The handle is **not** heap-allocated; do **not** attempt to **free(3)** it. A heap-allocated copy can be acquired with **nccell_strdup**. +**ncstrwidth_valid** returns the number of columns occupied by a valid UTF-8 +string, or -1 if an error is encountered. In either case, the number of valid +bytes and columns, respectively, consumed before error into ***validbytes*** +and ***validwidth*** (assuming them to not be **NULL**). + # RETURN VALUES **nccell_load** and similar functions return the number of bytes loaded from the @@ -157,6 +164,7 @@ less than, equal to, or more than ***c2***, respectively. **nccell_cols** returns the number of columns occupied by ***c***, according to **wcwidth(3)***. **ncstrwidth** is an equivalent for strings. +**ncstrwidth_valid** returns the same value as **ncstrwidth**. # NOTES @@ -172,4 +180,5 @@ have been renamed to start with **nccell**. **notcurses_plane(3)**, **notcurses_output(3)**, **notcurses_visual(3)**, -**wcwidth(3)** +**wcwidth(3)**, +**utf8(7)** diff --git a/include/notcurses/notcurses.h b/include/notcurses/notcurses.h index ba636267e..b80c0d342 100644 --- a/include/notcurses/notcurses.h +++ b/include/notcurses/notcurses.h @@ -92,8 +92,16 @@ typedef enum { // Returns the number of columns occupied by a multibyte (UTF-8) string, or // -1 if a non-printable/illegal character is encountered. +// FIXME becomes a static inline in ABI3. API int ncstrwidth(const char* mbs); +// Returns the number of columns occupied by a the valid prefix of a multibyte +// (UTF-8) string. If an invalid character is encountered, -1 will be returned, +// and the number of valid bytes and columns will be written into *|validbytes| +// and *|validwidth| (assuming them non-NULL). If the entire string is valid, +// *|validbytes| and *|validwidth| reflect the entire string. +API int ncstrwidth_valid(const char* egcs, int* validbytes, int* validwidth); + // Returns a heap-allocated copy of the user name under which we are running. API ALLOC char* notcurses_accountname(void); diff --git a/src/lib/notcurses.c b/src/lib/notcurses.c index b13da0223..ab8452254 100644 --- a/src/lib/notcurses.c +++ b/src/lib/notcurses.c @@ -3039,17 +3039,29 @@ int notcurses_ucs32_to_utf8(const uint32_t* ucs32, unsigned ucs32count, } int ncstrwidth(const char* mbs){ - int cols = 0; // number of columns consumed thus far + return ncstrwidth_valid(mbs, NULL, NULL); +} + +int ncstrwidth_valid(const char* egcs, int* validbytes, int* validwidth){ + int cols = 0; // number of columns consumed thus far + if(validwidth == NULL){ + validwidth = &cols; + } + int bytes = 0; // number of bytes consumed thus far + if(validbytes == NULL){ + validbytes = &bytes; + } do{ int thesecols, thesebytes; - thesebytes = utf8_egc_len(mbs, &thesecols); + thesebytes = utf8_egc_len(egcs, &thesecols); if(thesebytes < 0){ return -1; } - mbs += thesebytes; - cols += thesecols; - }while(*mbs); - return cols; + egcs += thesebytes; + *validbytes += thesebytes; + *validwidth += thesecols; + }while(*egcs); + return *validwidth; } void ncplane_pixelgeom(const ncplane* n, int* RESTRICT pxy, int* RESTRICT pxx,