Fix UTF-16 to UTF-8 conversion on non-Windows

U+800 to U+FFFF encode as one UTF-16 code unit, but three byte in UTF-8.
Filenames containing more characters in this range than the ASCII range
would be cut off.

This range includes all BMP Japanese characters.
This commit is contained in:
Muh Muhten
2020-04-08 02:53:08 -04:00
5 changed files with 24 additions and 17 deletions
+4 -3
View File
@@ -152,10 +152,11 @@ char* strcopy_UTF16toUTF8(const utf16char_t *src)
return NULL;
src_len = utf16_strlen(src);
dst_len = src_len * 2;
// UTF-8 can use up to 3 bytes per UTF-16 code unit, or four for a surrogate pair
dst_len = src_len * 3;
// Allocate memory for string
dst = calloc(dst_len, sizeof(char)); // twice the size, as UTF-8 will use up to two bytes for converted UTF16 chars afaik
dst = calloc(dst_len, sizeof(char));
if (!dst)
return NULL;
@@ -206,4 +207,4 @@ oschar_t* os_AppendUTF16StrToPath(const oschar_t *src, const utf16char_t *add)
free(_add);
return new_path;
}
}