correctly handle ASCII + combining chars, add unit tests #36

This commit is contained in:
nick black 2019-11-25 21:11:27 -05:00
parent 94698a5982
commit f67a97edfb
No known key found for this signature in database
GPG Key ID: 5F43400C21CBFACC
6 changed files with 97 additions and 28 deletions

View File

@ -240,6 +240,10 @@ cell_init(cell* c){
// Breaks the UTF-8 string in 'gcluster' down, setting up the cell 'c'.
int cell_load(struct ncplane* n, cell* c, const char* gcluster);
// Duplicate 'c' into 'targ'. Not intended for external use; exposed for the
// benefit of unit tests.
int cell_duplicate(struct ncplane* n, cell* targ, const cell* c);
// Release resources held by the cell 'c'.
void cell_release(struct ncplane* n, cell* c);

View File

@ -100,16 +100,23 @@ int main(int argc, char** argv){
goto err;
}
sleep(1);
const char s1[] = " Die Welt ist alles, was der Fall ist. ";
const char str[] = " Wovon man nicht sprechen kann, darüber muss man schweigen. ";
if(ncplane_cursor_move_yx(ncp, rows / 2, (cols - strlen(str) + 4) / 2)){
goto err;
}
if(ncplane_fg_rgb8(ncp, 176, 121, 176)){
goto err;
}
if(ncplane_bg_rgb8(ncp, 100, 100, 100)){
goto err;
}
if(ncplane_cursor_move_yx(ncp, rows / 2 - 1, (cols - strlen(s1) + 4) / 2)){
goto err;
}
if(ncplane_putstr(ncp, s1) != (int)strlen(s1)){
goto err;
}
if(ncplane_cursor_move_yx(ncp, rows / 2, (cols - strlen(str) + 4) / 2)){
goto err;
}
if(ncplane_putstr(ncp, str) != (int)strlen(str)){
goto err;
}

View File

@ -17,10 +17,10 @@ extern "C" {
// recognizable as use for another cell.
typedef struct egcpool {
char* pool; // ringbuffer of attached extension storage
size_t poolsize; // total number of bytes in pool
size_t poolused; // bytes actively used, grow when this gets too large
size_t poolwrite; // next place to *look for* a place to write
char* pool; // ringbuffer of attached extension storage
int poolsize; // total number of bytes in pool
int poolused; // bytes actively used, grow when this gets too large
int poolwrite; // next place to *look for* a place to write
} egcpool;
static inline void
@ -37,8 +37,8 @@ int egcpool_grow(egcpool* pool, size_t len);
// consumed, not including any NUL terminator. Note that neither the number
// of bytes nor columns is necessarily equivalent to the number of decoded code
// points. Such are the ways of Unicode.
static inline size_t
utf8_gce_len(const char* gcluster, int* colcount){
static inline int
utf8_egc_len(const char* gcluster, int* colcount){
size_t ret = 0;
*colcount = 0;
wchar_t wc;
@ -64,8 +64,8 @@ utf8_gce_len(const char* gcluster, int* colcount){
// if we're inserting a EGC of |len| bytes, ought we proactively realloc?
static inline bool
egcpool_alloc_justified(const egcpool* pool, size_t len){
const size_t poolfree = pool->poolsize - pool->poolused;
egcpool_alloc_justified(const egcpool* pool, int len){
const int poolfree = pool->poolsize - pool->poolused;
// proactively get more space if we have less than 10% free. this doesn't
// guarantee that we'll have enough space to insert the string -- we could
// theoretically have every 10th byte free, and be unable to write even a
@ -83,7 +83,7 @@ egcpool_alloc_justified(const egcpool* pool, size_t len){
// columns is stored to '*cols'.
static inline int
egcpool_stash(egcpool* pool, const char* egc, size_t* ulen, int* cols){
size_t len = utf8_gce_len(egc, cols) + 1; // count the NUL terminator
int len = utf8_egc_len(egc, cols) + 1; // count the NUL terminator
if(len <= 2){ // should never be empty, nor a single byte + NUL
return -1;
}
@ -111,7 +111,7 @@ egcpool_stash(egcpool* pool, const char* egc, size_t* ulen, int* cols){
// row. starting at pool->poolwrite, look for such a range of unused
// memory. if we find it, write it out, and update used count. if we come
// back to where we started, force a growth and try again.
size_t curpos = pool->poolwrite;
int curpos = pool->poolwrite;
do{
if(curpos == pool->poolsize){
curpos = 0;
@ -124,7 +124,7 @@ egcpool_stash(egcpool* pool, const char* egc, size_t* ulen, int* cols){
}
curpos = 0; // can this skip pool->poolwrite?
}else{ // promising! let's see if there's enough space
size_t need = len;
int need = len;
size_t trial = curpos;
while(--need){
if(pool->pool[++trial]){ // alas, not enough space here
@ -154,7 +154,7 @@ egcpool_stash(egcpool* pool, const char* egc, size_t* ulen, int* cols){
// we find a zero (our own NUL terminator). remove that number of bytes from
// the usedcount.
static inline void
egcpool_release(egcpool* pool, size_t offset){
egcpool_release(egcpool* pool, int offset){
size_t freed = 1; // account for free(d) NUL terminator
while(pool->pool[offset]){
pool->pool[offset] = '\0';

View File

@ -555,9 +555,15 @@ term_movyx(int y, int x){
// is it a single ASCII byte, wholly contained within the cell?
static inline bool
simple_gcluster_p(const char* gcluster){
return *gcluster == '\0' ||
// FIXME need to ensure next character is not a nonspacer!
(*(unsigned char*)gcluster < 0x80);
if(*gcluster == '\0'){
return true;
}
if(*(unsigned char*)gcluster >= 0x80){
return false;
}
// we might be a simple ASCII, if the next character is *not* a nonspacing
// combining character
return false; // FIXME
}
static inline bool
@ -677,8 +683,7 @@ ncplane_cursor_stuck(const ncplane* n){
return (n->x == n->lenx && n->y == n->leny);
}
static int
cell_duplicate(ncplane* n, cell* targ, const cell* c){
int cell_duplicate(ncplane* n, cell* targ, const cell* c){
cell_release(n, targ);
targ->attrword = c->attrword;
targ->channels = c->channels;
@ -688,7 +693,7 @@ cell_duplicate(ncplane* n, cell* targ, const cell* c){
}
size_t ulen;
int cols;
// FIXME insert colcount into cell...
// FIXME insert colcount into cell...if it's ever valid, anyway
int eoffset = egcpool_stash(&n->pool, extended_gcluster(n, c), &ulen, &cols);
if(eoffset < 0){
return -1;
@ -733,12 +738,14 @@ void cell_release(ncplane* n, cell* c){
// bytes copied out of 'gcluster', or -1 on failure.
int cell_load(ncplane* n, cell* c, const char* gcluster){
cell_release(n, c);
if(simple_gcluster_p(gcluster)){
int bytes;
int cols;
if((bytes = utf8_egc_len(gcluster, &cols)) >= 0 && bytes <= 1){
c->gcluster = *gcluster;
return !!c->gcluster;
}
size_t ulen;
int cols;
// FIXME feed in already-calculated lengths from prior utf8_egc_len()!
int eoffset = egcpool_stash(&n->pool, gcluster, &ulen, &cols);
if(eoffset < 0){
return -1;

View File

@ -26,7 +26,7 @@ TEST_F(EGCPoolTest, Initialized) {
TEST_F(EGCPoolTest, UTF8EGC) {
const char* wstr = "";
int c;
auto ulen = utf8_gce_len(wstr, &c);
auto ulen = utf8_egc_len(wstr, &c);
ASSERT_LT(0, ulen);
EXPECT_LT(0, c);
}
@ -36,13 +36,17 @@ TEST_F(EGCPoolTest, UTF8EGC) {
TEST_F(EGCPoolTest, UTF8EGCCombining) {
const char* w1 = "à"; // U+00E0, U+0000 (c3 a0)
const char* w2 = ""; // U+0061, U+0300, U+0000 (61 cc 80)
int c1, c2;
auto u1 = utf8_gce_len(w1, &c1);
auto u2 = utf8_gce_len(w2, &c2);
const char* w3 = "a"; // U+0061, U+0000 (61)
int c1, c2, c3;
auto u1 = utf8_egc_len(w1, &c1);
auto u2 = utf8_egc_len(w2, &c2);
auto u3 = utf8_egc_len(w3, &c3);
ASSERT_EQ(2, u1);
ASSERT_EQ(3, u2);
ASSERT_EQ(1, u3);
ASSERT_EQ(1, c1);
ASSERT_EQ(1, c2);
ASSERT_EQ(1, c3);
}
TEST_F(EGCPoolTest, AddAndRemove) {

View File

@ -209,3 +209,50 @@ TEST_F(NcplaneTest, PerimeterBox) {
TEST_F(NcplaneTest, EraseScreen) {
ncplane_erase(n_);
}
// we're gonna run both a composed latin a with grave, and then a latin a with
// a combining nonspacing grave
TEST_F(NcplaneTest, CellLoadCombining) {
const char* w1 = "à"; // U+00E0, U+0000 (c3 a0)
const char* w2 = ""; // U+0061, U+0300, U+0000 (61 cc 80)
const char* w3 = "a"; // U+0061, U+0000 (61)
cell cell1 = CELL_TRIVIAL_INITIALIZER;
cell cell2 = CELL_TRIVIAL_INITIALIZER;
cell cell3 = CELL_TRIVIAL_INITIALIZER;
auto u1 = cell_load(n_, &cell1, w1);
auto u2 = cell_load(n_, &cell2, w2);
auto u3 = cell_load(n_, &cell3, w3);
ASSERT_EQ(2, u1);
ASSERT_EQ(3, u2);
ASSERT_EQ(1, u3);
cell_release(n_, &cell1);
cell_release(n_, &cell2);
cell_release(n_, &cell3);
}
TEST_F(NcplaneTest, CellDuplicateCombining) {
const char* w1 = "à"; // U+00E0, U+0000 (c3 a0)
const char* w2 = ""; // U+0061, U+0300, U+0000 (61 cc 80)
const char* w3 = "a"; // U+0061, U+0000 (61)
cell cell1 = CELL_TRIVIAL_INITIALIZER;
cell cell2 = CELL_TRIVIAL_INITIALIZER;
cell cell3 = CELL_TRIVIAL_INITIALIZER;
auto u1 = cell_load(n_, &cell1, w1);
auto u2 = cell_load(n_, &cell2, w2);
auto u3 = cell_load(n_, &cell3, w3);
ASSERT_EQ(2, u1);
ASSERT_EQ(3, u2);
ASSERT_EQ(1, u3);
cell cell4 = CELL_TRIVIAL_INITIALIZER;
cell cell5 = CELL_TRIVIAL_INITIALIZER;
cell cell6 = CELL_TRIVIAL_INITIALIZER;
EXPECT_EQ(2, cell_duplicate(n_, &cell4, &cell1));
EXPECT_EQ(3, cell_duplicate(n_, &cell5, &cell2));
EXPECT_EQ(1, cell_duplicate(n_, &cell6, &cell3));
cell_release(n_, &cell1);
cell_release(n_, &cell2);
cell_release(n_, &cell3);
cell_release(n_, &cell4);
cell_release(n_, &cell5);
cell_release(n_, &cell6);
}