1. 手动实现UTF-8与Unicode编码转换:
class StringConverter { public: static int UnicodeToUtf8(char* output, int& outputSize, const wchar_t* input, int inputSize); static int Utf8ToUnicode(wchar_t* output, int outputSize, const char* input, int inputSize); }; /** * 将Unicode字符串转换为UTF-8编码 * @param output 输出缓冲区指针 * @param outputSize 输出缓冲区大小 * @param input 输入Unicode字符串指针 * @param inputSize 输入字符串长度 * @return 成功时返回转换后的字符串长度,失败返回-1 */ int StringConverter::UnicodeToUtf8(char* output, int& outputSize, const wchar_t* input, int inputSize) { if (output == nullptr || input == nullptr || inputSize <0) return -1; int requiredSize = 0; for (int i = 0; i (ch); } else if (ch <= 0x7FF) { *current++ = 0xC0 | (ch >> 6); *current++ = 0x80 | (ch & 0x3F); } else if (ch <= 0xFFFF) { *current++ = 0xE0 | (ch >> 12); *current++ = 0x80 | ((ch >> 6) & 0x3F); *current++ = 0x80 | (ch & 0x3F); } } return current - output; } /** * 将UTF-8编码字符串转换为Unicode * @param output 输出缓冲区指针 * @param outputSize 输出缓冲区大小 * @param input 输入UTF-8字符串指针 * @param inputSize 输入字符串长度 * @return 成功时返回转换后的字符串长度,失败返回-1 */ int StringConverter::Utf8ToUnicode(wchar_t* output, int outputSize, const char* input, int inputSize) { if (output == nullptr || input == nullptr || inputSize <0) return -1; int requiredSize = 0; for (int i = 0; i (input[i]); if (byte <= 0x7F) requiredSize += 1; else if ((byte & 0xE0) == 0xC0) requiredSize += 1; else if ((byte & 0xF0) == 0xE0) requiredSize += 1; } if (outputSize (input[i]); if (byte <= 0x7F) { *current++ = byte; } else if ((byte & 0xE0) == 0xC0) { *current++ = (input[i] & 0x1F) <<6 | (input[++i] & 0x3F); } else if ((byte & 0xF0) == 0xE0) { *current++ = (input[i] & 0x0F) <<12 | (input[++i] & 0x3F) <<6 | (input[++i] & 0x3F); } } return current - output; }
2. 利用Visual C++库函数进行转换:
char* UnicodeToUtf8(const wchar_t* unicode) { int len = WideCharToMultiByte(CP_UTF8, 0, unicode, -1, nullptr, 0, nullptr, nullptr); char* str = new char[len + 1]; memset(str, 0, len + 1); WideCharToMultiByte(CP_UTF8, 0, unicode, -1, str, len, nullptr, nullptr); str[len] = '\0'; return str; } wchar_t* Utf8ToUnicode(const char* utf8) { int len = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, nullptr, 0); wchar_t* wstr = new wchar_t[len + 1]; memset(wstr, 0, len + 1); MultiByteToWideChar(CP_UTF8, 0, utf8, -1, wstr, len); wstr[len] = '\0'; return wstr; }