From 8846e3cee2c18816a0da9f9952b1da6bb323968d Mon Sep 17 00:00:00 2001 From: nick black Date: Wed, 7 Jul 2021 05:54:47 -0400 Subject: [PATCH] ncdirect_putegc(), get true width in ncwidth #1899 --- NEWS.md | 3 + USAGE.md | 15 ++++- doc/man/man3/notcurses_direct.3.md | 6 ++ include/notcurses/direct.h | 8 +++ src/lib/direct.c | 21 +++++- src/poc/ncwidth.c | 105 ++++++++++++++++++++++------- 6 files changed, 132 insertions(+), 26 deletions(-) diff --git a/NEWS.md b/NEWS.md index ac288cbe1..1641a92a6 100644 --- a/NEWS.md +++ b/NEWS.md @@ -9,6 +9,9 @@ rearrangements of Notcurses. * Documented `ncplane_move_yx()` in `notcurses_plane.3`, and removed the false comment that "passing -1 as a coordinate will hold that axis constant" from `USGAE.md` and `notcurses.h`. This has never been true. + * Added `ncdirect_putegc()` to perform Unicode segmentation. It returns + the number of columns consumed, and makes available the number of bytes + used by the EGC. * 2.3.8 (2021-07-04) * Marked all capability functions `__attribute__ ((pure))`. If you were diff --git a/USAGE.md b/USAGE.md index 23817fa61..aebe72ebe 100644 --- a/USAGE.md +++ b/USAGE.md @@ -440,8 +440,19 @@ int ncdirect_cursor_pop(struct ncdirect* n); // Formatted printing (plus alignment relative to the terminal). int ncdirect_printf_aligned(struct ncdirect* n, int y, ncalign_e align, - const char* fmt, ...) - __attribute__ ((format (printf, 4, 5))); + const char* fmt, ...); + +// Output the string |utf8| according to the channels |channels|. Note that +// ncdirect_putstr() does not explicitly flush output buffers, so it will not +// necessarily be immediately visible. +int ncdirect_putstr(struct ncdirect* nc, uint64_t channels, const char* utf8); + +// Output a single EGC (this might be several characters) from |utf8|, +// according to the channels |channels|. On success, the number of columns +// thought to have been used is returned, and if |sbytes| is not NULL, +// the number of bytes consumed will be written there. +int ncdirect_putegc(struct ncdirect* nc, uint64_t channels, + const char* utf8, int* sbytes); // Draw horizontal/vertical lines using the specified channels, interpolating // between them as we go. The EGC may not use more than one column. For a diff --git a/doc/man/man3/notcurses_direct.3.md b/doc/man/man3/notcurses_direct.3.md index 9f94d72cf..775faa7fd 100644 --- a/doc/man/man3/notcurses_direct.3.md +++ b/doc/man/man3/notcurses_direct.3.md @@ -72,6 +72,8 @@ notcurses_direct - minimal notcurses instances for styling text **int ncdirect_putstr(struct ncdirect* ***nc***, uint64_t ***channels***, const char* ***utf8***);** +**int ncdirect_putegc(struct ncdirect* ***nc***, uint64_t ***channels***, const char* ***utf8***, int* ***sbytes***);** + **int ncdirect_printf_aligned(struct ncdirect* ***n***, int ***y***, ncalign_e ***align***, const char* ***fmt***, ***...***);** **const char* ncdirect_detected_terminal(const struct ncdirect* ***n***);** @@ -225,6 +227,10 @@ to **ncdirect_stop**. **ncdirect_putstr** and **ncdirect_printf_aligned** return the number of bytes written on success. On failure, they return some negative number. +**ncdirect_putegc** returns the number of columns consumed on success, or -1 +on failure. If ***sbytes*** is not **NULL**, the number of bytes consumed +will be written to it. + **ncdirect_check_pixel_support** returns -1 on error, 0 if there is no pixel support, and 1 if pixel support is successfully detected. diff --git a/include/notcurses/direct.h b/include/notcurses/direct.h index d55ff7cd3..df6673444 100644 --- a/include/notcurses/direct.h +++ b/include/notcurses/direct.h @@ -113,6 +113,14 @@ API unsigned ncdirect_palette_size(const struct ncdirect* nc) API int ncdirect_putstr(struct ncdirect* nc, uint64_t channels, const char* utf8) __attribute__ ((nonnull (1, 3))); +// Output a single EGC (this might be several characters) from |utf8|, +// according to the channels |channels|. On success, the number of columns +// thought to have been used is returned, and if |sbytes| is not NULL, +// the number of bytes consumed will be written there. +API int ncdirect_putegc(struct ncdirect* nc, uint64_t channels, + const char* utf8, int* sbytes) + __attribute__ ((nonnull (1, 3))); + // Formatted printing (plus alignment relative to the terminal). Returns the // number of columns printed on success. API int ncdirect_printf_aligned(struct ncdirect* n, int y, ncalign_e align, diff --git a/src/lib/direct.c b/src/lib/direct.c index 02a37e6f9..bec438deb 100644 --- a/src/lib/direct.c +++ b/src/lib/direct.c @@ -46,7 +46,26 @@ int ncdirect_putstr(ncdirect* nc, uint64_t channels, const char* utf8){ if(activate_channels(nc, channels)){ return -1; } - return fprintf(nc->ttyfp, "%s", utf8); + return ncfputs(utf8, nc->ttyfp); +} + +int ncdirect_putegc(ncdirect* nc, uint64_t channels, const char* utf8, + int* sbytes){ + int cols; + int bytes = utf8_egc_len(utf8, &cols); + if(bytes < 0){ + return -1; + } + if(sbytes){ + *sbytes = bytes; + } + if(activate_channels(nc, channels)){ + return -1; + } + if(fprintf(nc->ttyfp, "%.*s", bytes, utf8) < 0){ + return -1; + } + return cols; } int ncdirect_cursor_up(ncdirect* nc, int num){ diff --git a/src/poc/ncwidth.c b/src/poc/ncwidth.c index 37bef1dc6..7466e7156 100644 --- a/src/poc/ncwidth.c +++ b/src/poc/ncwidth.c @@ -4,6 +4,7 @@ #include #include #include +#include static int add_wchar(wchar_t** wbuf, size_t* bufsize, size_t* used, wchar_t wc){ @@ -21,21 +22,27 @@ add_wchar(wchar_t** wbuf, size_t* bufsize, size_t* used, wchar_t wc){ return 0; } -int main(int argc, char **argv){ - if(!setlocale(LC_ALL, "")){ - return EXIT_FAILURE; - } - if(argc <= 1){ - for(int i = 0 ; i < 128 ; ++i){ - wchar_t w = i; - int width = wcwidth(w); - printf("0x%02x: %d%c\t", i, width, width < 0 ? '!' : ' '); - if(i % 4 == 3){ - printf("\n"); - } +static int +defaultout(void){ + for(int i = 0 ; i < 128 ; ++i){ + wchar_t w = i; + int width = wcwidth(w); + printf("0x%02x: %d%c\t", i, width, width < 0 ? '!' : ' '); + if(i % 4 == 3){ + printf("\n"); } - printf("\n"); - return EXIT_SUCCESS; + } + printf("\n"); + return 0; +} + +int main(int argc, char **argv){ + if(argc <= 1){ + return defaultout() ? EXIT_FAILURE : EXIT_SUCCESS; + } + struct ncdirect* n; + if((n = ncdirect_core_init(NULL, NULL, 0)) == NULL){ + return EXIT_FAILURE; } size_t bufsize = 0, used = 0; wchar_t* wbuf = NULL; @@ -52,7 +59,7 @@ int main(int argc, char **argv){ if(conv == (size_t)-1 || conv == (size_t)-2){ fprintf(stderr, "Invalid UTF-8: %s\n", arg); free(wbuf); - return EXIT_FAILURE; + goto err; } int width = wcwidth(w); printf("0x%05lx: %d %lc\t", (long)w, width, w); @@ -66,15 +73,63 @@ int main(int argc, char **argv){ totalb += conv; add_wchar(&wbuf, &bufsize, &used, w); } - printf("\n total width: %d total bytes: %zu wcswidth: %d\n\n", totalcols, totalb, wcswidth(wbuf, used)); - // FIXME this will be broken if totalcols > screen width - printf("%s\n", *argv); - for(int z = 0 ; z < totalcols ; ++z){ + int y, x, newy, newx; + putchar('\n'); + ncdirect_cursor_yx(n, &y, &x); + printf("%s", *argv); + fflush(stdout); + ncdirect_cursor_yx(n, &newy, &newx); + int realcols = (newx - x) + ncdirect_dim_x(n) * (newy - y); + printf("\n iterated wcwidth: %d total bytes: %zu wcswidth: %d true width: %d\n\n", + totalcols, totalb, wcswidth(wbuf, used), realcols); + ncdirect_cursor_yx(n, &y, &x); + // throw up a background color for invisible glyphs + uint64_t chan = CHANNELS_RGB_INITIALIZER(0xff, 0xff, 0xff, 0, 0x80, 0); + int expy, expx; + int misses = 0; + int scrolls = 0; + while(**argv){ + int sbytes; + int cols; + if((cols = ncdirect_putegc(n, chan, *argv, &sbytes)) < 0){ + goto err; + } + fflush(stdout); + ncdirect_cursor_yx(n, &newy, &newx); + if(newy != y){ + newx += ncdirect_dim_x(n) * (newy - y); + } + ncdirect_cursor_push(n); + if(x + cols != newx){ + ++misses; + for(i = 0 ; i < misses ; ++i){ + putchar('\v'); + } + printf("True width: %d wcwidth: %d [%.*s]", newx - x, cols, sbytes, *argv); + ncdirect_cursor_yx(n, &expy, &expx); + scrolls = (newy + misses) - expy; + if(scrolls > 1){ + ncdirect_cursor_up(n, scrolls - 1); + } + } + ncdirect_cursor_pop(n); + *argv += sbytes; + y = newy - (scrolls - 1); + x = newx; + } + for(i = 0 ; i < misses + 1 ; ++i){ + putchar('\n'); + } + ncdirect_set_fg_default(n); + ncdirect_set_bg_default(n); + for(int z = 0 ; z < realcols && z < ncdirect_dim_x(n) ; ++z){ putchar('0' + z % 10); } - putchar('\n'); - if(totalcols > 20){ - for(int z = 0 ; z < totalcols ; ++z){ + if(realcols < ncdirect_dim_x(n)){ + putchar('\n'); + } + if(realcols > 20){ + for(int z = 0 ; z < realcols && z < ncdirect_dim_x(n) ; ++z){ if(z % 10){ putchar(' '); }else{ @@ -85,5 +140,9 @@ int main(int argc, char **argv){ } } free(wbuf); - return EXIT_SUCCESS; + return ncdirect_stop(n) ? EXIT_FAILURE : EXIT_SUCCESS; + +err: + ncdirect_stop(n); + return EXIT_FAILURE; }