Fix UTF-16 to UTF-8 conversion on non-Windows

U+800 to U+FFFF encode as one UTF-16 code unit, but three byte in UTF-8. Filenames containing more characters in this range than the ASCII range would be cut off. This range includes all BMP Japanese characters.
2026-07-03 16:59:04 +00:00 · 2020-04-08 02:53:08 -04:00
parent ce4f09ba0e 501c1714b3
commit 8cbeb744d7
5 changed files with 24 additions and 17 deletions
@@ -152,10 +152,11 @@ char* strcopy_UTF16toUTF8(const utf16char_t *src)
 		return NULL;

 	src_len = utf16_strlen(src);
-	dst_len = src_len * 2;
+	// UTF-8 can use up to 3 bytes per UTF-16 code unit, or four for a surrogate pair
+	dst_len = src_len * 3;

 	// Allocate memory for string
-	dst = calloc(dst_len, sizeof(char)); // twice the size, as UTF-8 will use up to two bytes for converted UTF16 chars afaik
+	dst = calloc(dst_len, sizeof(char));
 	if (!dst)
 		return NULL;

@@ -206,4 +207,4 @@ oschar_t* os_AppendUTF16StrToPath(const oschar_t *src, const utf16char_t *add)

 	free(_add);
 	return new_path;
-}
+}