From ba1c79b66eb755378befdf86bfc9521cdfbb255b Mon Sep 17 00:00:00 2001 From: nick black Date: Thu, 9 Dec 2021 22:48:50 -0500 Subject: [PATCH] add nccell_load_ucs32 --- NEWS.md | 1 + USAGE.md | 14 +++++++++ doc/man/man3/notcurses_cell.3.md | 12 +++++--- include/notcurses/notcurses.h | 51 +++++++++++++++++++++----------- 4 files changed, 56 insertions(+), 22 deletions(-) diff --git a/NEWS.md b/NEWS.md index c45a3fe72..af1b77230 100644 --- a/NEWS.md +++ b/NEWS.md @@ -9,6 +9,7 @@ rearrangements of Notcurses. is automatically enlarged to accommodate output at its right (no scrolling) or bottom (scrolling enabled) boundaries. * Added `notcurses_default_background()` and `notcurses_default_foreground()`. + * Added `nccell_load_ucs32()`. * 3.0.0 (2021-12-01) **"In the A"** * Made the ABI/API changes that have been planned/collected during 2.x diff --git a/USAGE.md b/USAGE.md index cc2ad68d5..7df8855e6 100644 --- a/USAGE.md +++ b/USAGE.md @@ -2135,6 +2135,20 @@ nccell_load_egc32(struct ncplane* n, nccell* c, uint32_t egc){ return nccell_load(n, c, gcluster); } +// Load a UCS-32 codepoint into the nccell 'c'. Returns the number of bytes +// used, or -1 on error. +static inline int +nccell_load_ucs32(struct ncplane* n, nccell* c, uint32_t u){ + unsigned char utf8[WCHAR_MAX_UTF8BYTES]; + if(notcurses_ucs32_to_utf8(&u, 1, utf8, sizeof(utf8)) < 0){ + return -1; + } + uint32_t utf8asegc; + _Static_assert(WCHAR_MAX_UTF8BYTES == sizeof(utf8asegc)); + memcpy(&utf8asegc, utf8, sizeof(utf8)); + return nccell_load_egc32(n, c, utf8asegc); +} + // return a pointer to the NUL-terminated EGC referenced by 'c'. this pointer // is invalidated by any further operation on the plane 'n', so...watch out! const char* nccell_extended_gcluster(const struct ncplane* n, const nccell* c); diff --git a/doc/man/man3/notcurses_cell.3.md b/doc/man/man3/notcurses_cell.3.md index c5857cc57..879b67319 100644 --- a/doc/man/man3/notcurses_cell.3.md +++ b/doc/man/man3/notcurses_cell.3.md @@ -74,6 +74,8 @@ typedef struct nccell { **int nccell_load_egc32(struct ncplane* ***n***, nccell* ***c***, uint32_t ***egc***);** +**int nccell_load_ucs32(struct ncplane* ***n***, nccell* ***c***, uint32_t ***u***);** + **char* nccell_extract(const struct ncplane* ***n***, const nccell* ***c***, uint16_t* ***stylemask***, uint64_t* ***channels***);** **uint32_t nccell_bchannel(const nccell* ***c***);** @@ -144,10 +146,12 @@ must be considered associated with **ncplane**s. Indeed, **ncplane_erase** destroys the backing storage for all a plane's cells, invalidating them. This association is formed at the time of **nccell_load**, **nccell_prime**, or **nccell_duplicate**. All of these functions first call **nccell_release**, as -do **nccell_load_egc32** and **nccell_load_char**. When done using a **nccell** -entirely, call **nccell_release**. **ncplane_destroy** will free up the memory -used by the **nccell**, but the backing egcpool has a maximum size of 16MiB, -and failure to release **nccell**s can eventually block new output. +do **nccell_load_egc32**, **nccell_load_char**, and **nccell_load_ucs32**. +When done using a **nccell** entirely, call **nccell_release**. +**ncplane_destroy** will free up the memory used by the **nccell**, but the +backing egcpool has a maximum size of 16MiB, and failure to release **nccell**s +can eventually block new output. Writing over an **ncplane**'s cells releases +them automatically. **nccell_extended_gcluster** provides a nul-terminated handle to the EGC. This ought be considered invalidated by changes to the **nccell** or **egcpool**. diff --git a/include/notcurses/notcurses.h b/include/notcurses/notcurses.h index 830d74566..fb2ecbd43 100644 --- a/include/notcurses/notcurses.h +++ b/include/notcurses/notcurses.h @@ -19,6 +19,7 @@ #ifdef __cplusplus extern "C" { #define RESTRICT +#define _Static_assert(...) #else #define RESTRICT restrict #endif @@ -552,6 +553,24 @@ ncchannels_set_bg_default(uint64_t* channels){ return *channels; } +// 0x0--0x10ffff can be UTF-8-encoded with only 4 bytes +#define WCHAR_MAX_UTF8BYTES 4 + +// Returns the number of columns occupied by the longest valid prefix of a +// multibyte (UTF-8) string. If an invalid character is encountered, -1 will be +// returned, and the number of valid bytes and columns will be written into +// *|validbytes| and *|validwidth| (assuming them non-NULL). If the entire +// string is valid, *|validbytes| and *|validwidth| reflect the entire string. +API int ncstrwidth(const char* egcs, int* validbytes, int* validwidth); + +// input functions like notcurses_get() return ucs32-encoded uint32_t. convert +// a series of uint32_t to utf8. result must be at least 4 bytes per input +// uint32_t (6 bytes per uint32_t will future-proof against Unicode expansion). +// the number of bytes used is returned, or -1 if passed illegal ucs32, or too +// small of a buffer. +API int notcurses_ucs32_to_utf8(const uint32_t* ucs32, unsigned ucs32count, + unsigned char* resultbuf, size_t buflen); + // An nccell corresponds to a single character cell on some plane, which can be // occupied by a single grapheme cluster (some root spacing glyph, along with // possible combining characters, which might span multiple columns). At any @@ -851,6 +870,20 @@ nccell_load_egc32(struct ncplane* n, nccell* c, uint32_t egc){ return nccell_load(n, c, gcluster); } +// Load a UCS-32 codepoint into the nccell 'c'. Returns the number of bytes +// used, or -1 on error. +static inline int +nccell_load_ucs32(struct ncplane* n, nccell* c, uint32_t u){ + unsigned char utf8[WCHAR_MAX_UTF8BYTES]; + if(notcurses_ucs32_to_utf8(&u, 1, utf8, sizeof(utf8)) < 0){ + return -1; + } + uint32_t utf8asegc; + _Static_assert(WCHAR_MAX_UTF8BYTES == sizeof(utf8asegc)); + memcpy(&utf8asegc, utf8, sizeof(utf8)); + return nccell_load_egc32(n, c, utf8asegc); +} + // These log levels consciously map cleanly to those of libav; Notcurses itself // does not use this full granularity. The log level does not affect the opening // and closing banners, which can be disabled via the notcurses_option struct's @@ -1885,21 +1918,6 @@ API ALLOC uint32_t* ncplane_as_rgba(const struct ncplane* n, ncblitter_e blit, unsigned* pxdimy, unsigned* pxdimx) __attribute__ ((nonnull (1))); -// Returns the number of columns occupied by the longest valid prefix of a -// multibyte (UTF-8) string. If an invalid character is encountered, -1 will be -// returned, and the number of valid bytes and columns will be written into -// *|validbytes| and *|validwidth| (assuming them non-NULL). If the entire -// string is valid, *|validbytes| and *|validwidth| reflect the entire string. -API int ncstrwidth(const char* egcs, int* validbytes, int* validwidth); - -// input functions like notcurses_get() return ucs32-encoded uint32_t. convert -// a series of uint32_t to utf8. result must be at least 4 bytes per input -// uint32_t (6 bytes per uint32_t will future-proof against Unicode expansion). -// the number of bytes used is returned, or -1 if passed illegal ucs32, or too -// small of a buffer. -API int notcurses_ucs32_to_utf8(const uint32_t* ucs32, unsigned ucs32count, - unsigned char* resultbuf, size_t buflen); - // Return the offset into 'availu' at which 'u' ought be output given the // requirements of 'align'. Return -INT_MAX on invalid 'align'. Undefined // behavior on negative 'availu' or 'u'. @@ -2013,9 +2031,6 @@ ncplane_putegc(struct ncplane* n, const char* gclust, size_t* sbytes){ API int ncplane_putegc_stained(struct ncplane* n, const char* gclust, size_t* sbytes) __attribute__ ((nonnull (1, 2))); -// 0x0--0x10ffff can be UTF-8-encoded with only 4 bytes -#define WCHAR_MAX_UTF8BYTES 4 - // generate a heap-allocated UTF-8 encoding of the wide string 'src'. ALLOC static inline char* ncwcsrtombs(const wchar_t* src){