1、斷定字符串是不是UTF-8的編碼ide
bool is_str_utf8(const char* str) { unsigned int nBytes = 0;//UFT8可用1-6個字節編碼,ASCII用一個字節 unsigned char chr = *str; bool bAllAscii = true; for (unsigned int i = 0; str[i] != '\0'; ++i) { chr = *(str + i); //判斷是否ASCII編碼,若是不是,說明有多是UTF8,ASCII用7位編碼,最高位標記爲0,0xxxxxxx if (nBytes == 0 && (chr & 0x80) != 0) { bAllAscii = false; } if (nBytes == 0) { //若是不是ASCII碼,應該是多字節符,計算字節數 if (chr >= 0x80) { if (chr >= 0xFC && chr <= 0xFD) { nBytes = 6; } else if (chr >= 0xF8) { nBytes = 5; } else if (chr >= 0xF0) { nBytes = 4; } else if (chr >= 0xE0) { nBytes = 3; } else if (chr >= 0xC0) { nBytes = 2; } else { return false; } nBytes--; } } else { //多字節符的非首字節,應爲 10xxxxxx if ((chr & 0xC0) != 0x80) { return false; } //減到爲零爲止 nBytes--; } } //違返UTF8編碼規則 if (nBytes != 0) { return false; } if (bAllAscii) { //若是所有都是ASCII, 也是UTF8 return true; } return true; }
2、斷定字符串是不是GBk的編碼編碼
bool is_str_gbk(const char* str) { unsigned int nBytes = 0;//GBK可用1-2個字節編碼,中文兩個 ,英文一個 unsigned char chr = *str; bool bAllAscii = true; //若是所有都是ASCII, for (unsigned int i = 0; str[i] != '\0'; ++i) { chr = *(str + i); if ((chr & 0x80) != 0 && nBytes == 0) {// 判斷是否ASCII編碼,若是不是,說明有多是GBK bAllAscii = false; } if (nBytes == 0) { if (chr >= 0x80) { if (chr >= 0x81 && chr <= 0xFE) { nBytes = +2; } else { return false; } nBytes--; } } else { if (chr < 0x40 || chr>0xFE) { return false; } nBytes--; }//else end } if (nBytes != 0) { //違返規則 return false; } if (bAllAscii) { //若是所有都是ASCII, 也是GBK return true; } return true; }
3、字符串由GBk編碼轉換成UTF-8編碼spa
void ConvertGBKToUtf8(CString &strGBK) { int len=MultiByteToWideChar(CP_ACP, 0, (LPCTSTR)strGBK, -1, NULL,0); wchar_t * wszUtf8 = new wchar_t [len]; memset(wszUtf8, 0, len); MultiByteToWideChar(CP_ACP, 0, (LPCTSTR)strGBK, -1, wszUtf8, len); len = WideCharToMultiByte(CP_UTF8, 0, wszUtf8, -1, NULL, 0, NULL, NULL); char *szUtf8=new char[len + 1]; memset(szUtf8, 0, len + 1); WideCharToMultiByte (CP_UTF8, 0, wszUtf8, -1, szUtf8, len, NULL,NULL); strGBK = szUtf8; delete[] szUtf8; delete[] wszUtf8; } string GBKToUTF8(const char* strGBK) { int len = MultiByteToWideChar(CP_ACP, 0, strGBK, -1, NULL, 0); wchar_t* wstr = new wchar_t[len+1]; memset(wstr, 0, len+1); MultiByteToWideChar(CP_ACP, 0, strGBK, -1, wstr, len); len = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, NULL, 0, NULL, NULL); char* str = new char[len+1]; memset(str, 0, len+1); WideCharToMultiByte(CP_UTF8, 0, wstr, -1, str, len, NULL, NULL); string strTemp = str; if(wstr) delete[] wstr; if(str) delete[] str; return strTemp; }
4、字符串由UTF-8編碼轉換成GBk編碼code
string UtfToGbk(const char* utf8) { int len = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, NULL, 0); wchar_t* wstr = new wchar_t[len+1]; memset(wstr, 0, len+1); MultiByteToWideChar(CP_UTF8, 0, utf8, -1, wstr, len); len = WideCharToMultiByte(CP_ACP, 0, wstr, -1, NULL, 0, NULL, NULL); char* str = new char[len+1]; memset(str, 0, len+1); WideCharToMultiByte(CP_ACP, 0, wstr, -1, str, len, NULL, NULL); if(wstr) delete[] wstr; return str; } bool Utf82gbk(std::string &gbkStr, std::string &srcStr) { //首先先將utf-8編碼轉換爲unicode編碼 if(NULL==setlocale(LC_ALL,"zh_CN.utf8"))//設置轉換爲unicode前的碼,當前爲utf8編碼 { printf("Bad Parameter\n"); return false; } int unicodeLen=mbstowcs(NULL,srcStr.c_str(),0);//計算轉換後的長度 if(unicodeLen<=0) { printf("Can not Transfer!!!\n"); return false; } wchar_t *unicodeStr=(wchar_t *)calloc(sizeof(wchar_t),unicodeLen+1); mbstowcs(unicodeStr,srcStr.c_str(),srcStr.size());//將gbk轉換爲unicode //將unicode編碼轉換爲gbk編碼 if(NULL==setlocale(LC_ALL,"zh_CN.gbk"))//設置unicode轉換後的碼,當前爲gbk { printf("Bad Parameter\n"); return false; } int gbkLen = wcstombs(NULL,unicodeStr,0);//計算轉換後的長度 if(gbkLen<=0) { printf("Can not Transfer!!!\n"); return false; } char gbkbuf[1024*10]; wcstombs(gbkbuf,unicodeStr,gbkLen); gbkbuf[gbkLen]=0;//添加結束符 gbkStr = gbkbuf; free(unicodeStr); return true; } string UTF8ToGBK(const std::string& strUTF8) { int len = MultiByteToWideChar(CP_UTF8, 0, strUTF8.c_str(), -1, NULL, 0); WCHAR* wszGBK = new WCHAR[len+1]; memset(wszGBK, 0, len * 2 + 2); MultiByteToWideChar(CP_UTF8, 0, (LPCSTR)(LPCTSTR)strUTF8.c_str(), -1, wszGBK, len); len = WideCharToMultiByte(CP_ACP, 0, wszGBK, -1, NULL, 0, NULL, NULL); char *szGBK = new char[len + 1]; memset(szGBK, 0, len + 1); WideCharToMultiByte(CP_ACP,0, wszGBK, -1, szGBK, len, NULL, NULL); std::string strTemp(szGBK); delete[]szGBK; delete[]wszGBK; return strTemp; }