mirror of
https://github.com/dankamongmen/notcurses
synced 2025-03-10 01:29:05 -04:00
correctly handle ASCII + combining chars, add unit tests #36
This commit is contained in:
parent
94698a5982
commit
f67a97edfb
@ -240,6 +240,10 @@ cell_init(cell* c){
|
||||
// Breaks the UTF-8 string in 'gcluster' down, setting up the cell 'c'.
|
||||
int cell_load(struct ncplane* n, cell* c, const char* gcluster);
|
||||
|
||||
// Duplicate 'c' into 'targ'. Not intended for external use; exposed for the
|
||||
// benefit of unit tests.
|
||||
int cell_duplicate(struct ncplane* n, cell* targ, const cell* c);
|
||||
|
||||
// Release resources held by the cell 'c'.
|
||||
void cell_release(struct ncplane* n, cell* c);
|
||||
|
||||
|
@ -100,16 +100,23 @@ int main(int argc, char** argv){
|
||||
goto err;
|
||||
}
|
||||
sleep(1);
|
||||
const char s1[] = " Die Welt ist alles, was der Fall ist. ";
|
||||
const char str[] = " Wovon man nicht sprechen kann, darüber muss man schweigen. ";
|
||||
if(ncplane_cursor_move_yx(ncp, rows / 2, (cols - strlen(str) + 4) / 2)){
|
||||
goto err;
|
||||
}
|
||||
if(ncplane_fg_rgb8(ncp, 176, 121, 176)){
|
||||
goto err;
|
||||
}
|
||||
if(ncplane_bg_rgb8(ncp, 100, 100, 100)){
|
||||
goto err;
|
||||
}
|
||||
if(ncplane_cursor_move_yx(ncp, rows / 2 - 1, (cols - strlen(s1) + 4) / 2)){
|
||||
goto err;
|
||||
}
|
||||
if(ncplane_putstr(ncp, s1) != (int)strlen(s1)){
|
||||
goto err;
|
||||
}
|
||||
if(ncplane_cursor_move_yx(ncp, rows / 2, (cols - strlen(str) + 4) / 2)){
|
||||
goto err;
|
||||
}
|
||||
if(ncplane_putstr(ncp, str) != (int)strlen(str)){
|
||||
goto err;
|
||||
}
|
||||
|
@ -17,10 +17,10 @@ extern "C" {
|
||||
// recognizable as use for another cell.
|
||||
|
||||
typedef struct egcpool {
|
||||
char* pool; // ringbuffer of attached extension storage
|
||||
size_t poolsize; // total number of bytes in pool
|
||||
size_t poolused; // bytes actively used, grow when this gets too large
|
||||
size_t poolwrite; // next place to *look for* a place to write
|
||||
char* pool; // ringbuffer of attached extension storage
|
||||
int poolsize; // total number of bytes in pool
|
||||
int poolused; // bytes actively used, grow when this gets too large
|
||||
int poolwrite; // next place to *look for* a place to write
|
||||
} egcpool;
|
||||
|
||||
static inline void
|
||||
@ -37,8 +37,8 @@ int egcpool_grow(egcpool* pool, size_t len);
|
||||
// consumed, not including any NUL terminator. Note that neither the number
|
||||
// of bytes nor columns is necessarily equivalent to the number of decoded code
|
||||
// points. Such are the ways of Unicode.
|
||||
static inline size_t
|
||||
utf8_gce_len(const char* gcluster, int* colcount){
|
||||
static inline int
|
||||
utf8_egc_len(const char* gcluster, int* colcount){
|
||||
size_t ret = 0;
|
||||
*colcount = 0;
|
||||
wchar_t wc;
|
||||
@ -64,8 +64,8 @@ utf8_gce_len(const char* gcluster, int* colcount){
|
||||
|
||||
// if we're inserting a EGC of |len| bytes, ought we proactively realloc?
|
||||
static inline bool
|
||||
egcpool_alloc_justified(const egcpool* pool, size_t len){
|
||||
const size_t poolfree = pool->poolsize - pool->poolused;
|
||||
egcpool_alloc_justified(const egcpool* pool, int len){
|
||||
const int poolfree = pool->poolsize - pool->poolused;
|
||||
// proactively get more space if we have less than 10% free. this doesn't
|
||||
// guarantee that we'll have enough space to insert the string -- we could
|
||||
// theoretically have every 10th byte free, and be unable to write even a
|
||||
@ -83,7 +83,7 @@ egcpool_alloc_justified(const egcpool* pool, size_t len){
|
||||
// columns is stored to '*cols'.
|
||||
static inline int
|
||||
egcpool_stash(egcpool* pool, const char* egc, size_t* ulen, int* cols){
|
||||
size_t len = utf8_gce_len(egc, cols) + 1; // count the NUL terminator
|
||||
int len = utf8_egc_len(egc, cols) + 1; // count the NUL terminator
|
||||
if(len <= 2){ // should never be empty, nor a single byte + NUL
|
||||
return -1;
|
||||
}
|
||||
@ -111,7 +111,7 @@ egcpool_stash(egcpool* pool, const char* egc, size_t* ulen, int* cols){
|
||||
// row. starting at pool->poolwrite, look for such a range of unused
|
||||
// memory. if we find it, write it out, and update used count. if we come
|
||||
// back to where we started, force a growth and try again.
|
||||
size_t curpos = pool->poolwrite;
|
||||
int curpos = pool->poolwrite;
|
||||
do{
|
||||
if(curpos == pool->poolsize){
|
||||
curpos = 0;
|
||||
@ -124,7 +124,7 @@ egcpool_stash(egcpool* pool, const char* egc, size_t* ulen, int* cols){
|
||||
}
|
||||
curpos = 0; // can this skip pool->poolwrite?
|
||||
}else{ // promising! let's see if there's enough space
|
||||
size_t need = len;
|
||||
int need = len;
|
||||
size_t trial = curpos;
|
||||
while(--need){
|
||||
if(pool->pool[++trial]){ // alas, not enough space here
|
||||
@ -154,7 +154,7 @@ egcpool_stash(egcpool* pool, const char* egc, size_t* ulen, int* cols){
|
||||
// we find a zero (our own NUL terminator). remove that number of bytes from
|
||||
// the usedcount.
|
||||
static inline void
|
||||
egcpool_release(egcpool* pool, size_t offset){
|
||||
egcpool_release(egcpool* pool, int offset){
|
||||
size_t freed = 1; // account for free(d) NUL terminator
|
||||
while(pool->pool[offset]){
|
||||
pool->pool[offset] = '\0';
|
||||
|
@ -555,9 +555,15 @@ term_movyx(int y, int x){
|
||||
// is it a single ASCII byte, wholly contained within the cell?
|
||||
static inline bool
|
||||
simple_gcluster_p(const char* gcluster){
|
||||
return *gcluster == '\0' ||
|
||||
// FIXME need to ensure next character is not a nonspacer!
|
||||
(*(unsigned char*)gcluster < 0x80);
|
||||
if(*gcluster == '\0'){
|
||||
return true;
|
||||
}
|
||||
if(*(unsigned char*)gcluster >= 0x80){
|
||||
return false;
|
||||
}
|
||||
// we might be a simple ASCII, if the next character is *not* a nonspacing
|
||||
// combining character
|
||||
return false; // FIXME
|
||||
}
|
||||
|
||||
static inline bool
|
||||
@ -677,8 +683,7 @@ ncplane_cursor_stuck(const ncplane* n){
|
||||
return (n->x == n->lenx && n->y == n->leny);
|
||||
}
|
||||
|
||||
static int
|
||||
cell_duplicate(ncplane* n, cell* targ, const cell* c){
|
||||
int cell_duplicate(ncplane* n, cell* targ, const cell* c){
|
||||
cell_release(n, targ);
|
||||
targ->attrword = c->attrword;
|
||||
targ->channels = c->channels;
|
||||
@ -688,7 +693,7 @@ cell_duplicate(ncplane* n, cell* targ, const cell* c){
|
||||
}
|
||||
size_t ulen;
|
||||
int cols;
|
||||
// FIXME insert colcount into cell...
|
||||
// FIXME insert colcount into cell...if it's ever valid, anyway
|
||||
int eoffset = egcpool_stash(&n->pool, extended_gcluster(n, c), &ulen, &cols);
|
||||
if(eoffset < 0){
|
||||
return -1;
|
||||
@ -733,12 +738,14 @@ void cell_release(ncplane* n, cell* c){
|
||||
// bytes copied out of 'gcluster', or -1 on failure.
|
||||
int cell_load(ncplane* n, cell* c, const char* gcluster){
|
||||
cell_release(n, c);
|
||||
if(simple_gcluster_p(gcluster)){
|
||||
int bytes;
|
||||
int cols;
|
||||
if((bytes = utf8_egc_len(gcluster, &cols)) >= 0 && bytes <= 1){
|
||||
c->gcluster = *gcluster;
|
||||
return !!c->gcluster;
|
||||
}
|
||||
size_t ulen;
|
||||
int cols;
|
||||
// FIXME feed in already-calculated lengths from prior utf8_egc_len()!
|
||||
int eoffset = egcpool_stash(&n->pool, gcluster, &ulen, &cols);
|
||||
if(eoffset < 0){
|
||||
return -1;
|
||||
|
@ -26,7 +26,7 @@ TEST_F(EGCPoolTest, Initialized) {
|
||||
TEST_F(EGCPoolTest, UTF8EGC) {
|
||||
const char* wstr = "☢";
|
||||
int c;
|
||||
auto ulen = utf8_gce_len(wstr, &c);
|
||||
auto ulen = utf8_egc_len(wstr, &c);
|
||||
ASSERT_LT(0, ulen);
|
||||
EXPECT_LT(0, c);
|
||||
}
|
||||
@ -36,13 +36,17 @@ TEST_F(EGCPoolTest, UTF8EGC) {
|
||||
TEST_F(EGCPoolTest, UTF8EGCCombining) {
|
||||
const char* w1 = "à"; // U+00E0, U+0000 (c3 a0)
|
||||
const char* w2 = "à"; // U+0061, U+0300, U+0000 (61 cc 80)
|
||||
int c1, c2;
|
||||
auto u1 = utf8_gce_len(w1, &c1);
|
||||
auto u2 = utf8_gce_len(w2, &c2);
|
||||
const char* w3 = "a"; // U+0061, U+0000 (61)
|
||||
int c1, c2, c3;
|
||||
auto u1 = utf8_egc_len(w1, &c1);
|
||||
auto u2 = utf8_egc_len(w2, &c2);
|
||||
auto u3 = utf8_egc_len(w3, &c3);
|
||||
ASSERT_EQ(2, u1);
|
||||
ASSERT_EQ(3, u2);
|
||||
ASSERT_EQ(1, u3);
|
||||
ASSERT_EQ(1, c1);
|
||||
ASSERT_EQ(1, c2);
|
||||
ASSERT_EQ(1, c3);
|
||||
}
|
||||
|
||||
TEST_F(EGCPoolTest, AddAndRemove) {
|
||||
|
@ -209,3 +209,50 @@ TEST_F(NcplaneTest, PerimeterBox) {
|
||||
TEST_F(NcplaneTest, EraseScreen) {
|
||||
ncplane_erase(n_);
|
||||
}
|
||||
|
||||
// we're gonna run both a composed latin a with grave, and then a latin a with
|
||||
// a combining nonspacing grave
|
||||
TEST_F(NcplaneTest, CellLoadCombining) {
|
||||
const char* w1 = "à"; // U+00E0, U+0000 (c3 a0)
|
||||
const char* w2 = "à"; // U+0061, U+0300, U+0000 (61 cc 80)
|
||||
const char* w3 = "a"; // U+0061, U+0000 (61)
|
||||
cell cell1 = CELL_TRIVIAL_INITIALIZER;
|
||||
cell cell2 = CELL_TRIVIAL_INITIALIZER;
|
||||
cell cell3 = CELL_TRIVIAL_INITIALIZER;
|
||||
auto u1 = cell_load(n_, &cell1, w1);
|
||||
auto u2 = cell_load(n_, &cell2, w2);
|
||||
auto u3 = cell_load(n_, &cell3, w3);
|
||||
ASSERT_EQ(2, u1);
|
||||
ASSERT_EQ(3, u2);
|
||||
ASSERT_EQ(1, u3);
|
||||
cell_release(n_, &cell1);
|
||||
cell_release(n_, &cell2);
|
||||
cell_release(n_, &cell3);
|
||||
}
|
||||
|
||||
TEST_F(NcplaneTest, CellDuplicateCombining) {
|
||||
const char* w1 = "à"; // U+00E0, U+0000 (c3 a0)
|
||||
const char* w2 = "à"; // U+0061, U+0300, U+0000 (61 cc 80)
|
||||
const char* w3 = "a"; // U+0061, U+0000 (61)
|
||||
cell cell1 = CELL_TRIVIAL_INITIALIZER;
|
||||
cell cell2 = CELL_TRIVIAL_INITIALIZER;
|
||||
cell cell3 = CELL_TRIVIAL_INITIALIZER;
|
||||
auto u1 = cell_load(n_, &cell1, w1);
|
||||
auto u2 = cell_load(n_, &cell2, w2);
|
||||
auto u3 = cell_load(n_, &cell3, w3);
|
||||
ASSERT_EQ(2, u1);
|
||||
ASSERT_EQ(3, u2);
|
||||
ASSERT_EQ(1, u3);
|
||||
cell cell4 = CELL_TRIVIAL_INITIALIZER;
|
||||
cell cell5 = CELL_TRIVIAL_INITIALIZER;
|
||||
cell cell6 = CELL_TRIVIAL_INITIALIZER;
|
||||
EXPECT_EQ(2, cell_duplicate(n_, &cell4, &cell1));
|
||||
EXPECT_EQ(3, cell_duplicate(n_, &cell5, &cell2));
|
||||
EXPECT_EQ(1, cell_duplicate(n_, &cell6, &cell3));
|
||||
cell_release(n_, &cell1);
|
||||
cell_release(n_, &cell2);
|
||||
cell_release(n_, &cell3);
|
||||
cell_release(n_, &cell4);
|
||||
cell_release(n_, &cell5);
|
||||
cell_release(n_, &cell6);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user