[EGCcore] be more aggressive about ZWJs

This commit is contained in:
nick black 2021-11-02 13:09:01 -04:00
parent 8d08e14912
commit 130f1f6adb
No known key found for this signature in database
GPG Key ID: 5F43400C21CBFACC
2 changed files with 17 additions and 18 deletions

View File

@ -110,27 +110,26 @@ utf8_egc_len(const char* gcluster, int* colcount){
if(prevw && !injoin && uc_is_grapheme_break(prevw, wc)){
break; // starts a new EGC, exit and do not claim
}
int cols;
if(uc_is_property_variation_selector(wc)){ // ends EGC
ret += r;
break;
}
int cols = wcwidth(wc);
if(cols < 0){
injoin = false;
if(iswspace(wc)){ // newline or tab
return ret + 1;
}
cols = 1;
if(wc == L'\u200d'){ // ZWJ is iswcntrl, so check it first
injoin = true;
cols = 0;
}else if(iswcntrl(wc)){
logerror("prohibited or invalid Unicode: 0x%x\n", wc);
return -1;
}
}else if(injoin){
}else if(wc == L'\u200d' || injoin){ // ZWJ is iswcntrl, so check it first
injoin = true;
cols = 0;
injoin = false;
}else{
cols = wcwidth(wc);
if(cols < 0){
injoin = false;
if(iswspace(wc)){ // newline or tab
return ret + 1;
}
cols = 1;
if(iswcntrl(wc)){
logerror("prohibited or invalid Unicode: 0x%x\n", wc);
return -1;
}
}
}
*colcount += cols;
ret += r;

View File

@ -73,7 +73,7 @@ TEST_CASE("Cell") {
CHECK(2 == ncstrwidth("\U0001F471"));
#ifndef __APPLE__ // FIXME
CHECK(2 == ncstrwidth("\U0001F471\u200D"));
CHECK(3 == ncstrwidth("\U0001F471\u200D\u2640")); // *not* a single EGC!
CHECK(2 == ncstrwidth("\U0001F471\u200D\u2640"));
#endif
}