字符集研究之不一樣字符集的轉換方式

時間 2019-12-14

標籤字符集研究之不一樣轉換方式简体版

原文原文鏈接

做者：朱金燦sql

來源：http://blog.csdn.net/clever101編程

在上篇文章中介紹了多字節字符集和unicode字符集，今天介紹下兩大字符集之間的轉換方式。windows

首先談談的是微軟對於unicode字符集的態度。在windows的開發體系下。unicode字符字符集被稱爲寬字節字符集，多字節字符集被稱爲窄字符集。微軟對unicode字符集是大力支持的。從下面幾點可以看出：從windows2000開始使用unicode進行開發；Windows CE 自己就是使用Unicode的一種操做系統，全然不支持ANSI版Windows API函數。新建的VCproject默認使用的是unicode字符集(utf16)。那麼問題來了，做爲一個C++程序猿，是否該使用unicode字符集。ide

爲何使用Unicode字符集?函數

提高執行效率。比方Windows內核自己是基於unicode字符的，非unicode字符傳進入要先轉成unicode字符（《windows核心編程有詳解》）。在不一樣語言中可以方便交換數據，比方在英文版操做系統中輸入中文路徑。假設是非unicode字符同一時候又沒有安裝中文字符集，那麼就會出現亂碼。post

爲何不使用Unicode字符集？因爲傳統的勢力很是強大，很是多跨平臺的第三方庫都是基於多字節字節集進行開發，還有就是編程習慣，比方在Windows下開發，你們耳熟能詳的是計算字符串長度的函數是strlen。誰會去用寬字節版的wcslen呢。編碼

詳見我曾經寫的文章：spa

《unicode字符集。用仍是不用？》操作系統

最後談談多字節字符集和unicode字符集。.net

兩種方式。一種是使用跨平臺的iconv庫，演示樣例代碼例如如下：

include <stdio.h>
#include <stdlib.h>
#include <string>
using namespace std;

#include <iconv.h> //編碼轉換庫

#define OUTLEN 255 //文件路徑長度

//代碼轉換:從一種編碼轉爲還有一種編碼
int code_convert(char *from_charset, char *to_charset, char *inbuf, size_t inlen, char *outbuf, size_t  outlen)
{
iconv_t cd;
char **pin = &inbuf;
char **pout = &outbuf;

cd = iconv_open(to_charset,from_charset);
if (cd==0) 
return -1;
memset(outbuf,0,outlen);

if (iconv(cd,pin,&inlen,pout,&outlen)==-1)
return -1;
iconv_close(cd);
return 0;
}
//UNICODE碼轉爲GB2312碼
int u2g(char *inbuf, size_t  inlen, char *outbuf, size_t  outlen)
{
return code_convert("utf-8","gb2312",inbuf,inlen,outbuf,outlen);
}
//GB2312碼轉爲UNICODE碼
int g2u(char *inbuf, size_t inlen, char *outbuf, size_t outlen)
{
return code_convert("gb2312","utf-8",inbuf,inlen,outbuf,outlen);
}

//執行SQL語句回調函數
static int _sql_callback(void* pUsed, int argc, char** argv, char** ppszColName)
{
for(int i=0; i<argc; i++)
{
printf("%s = %s/n", ppszColName[i], argv[i]==0 ? "NULL" : argv[i]);
}
return 0;
}

void main()
{
char *in_gb2312 = "D://控制點庫//GCPDB.3sdb";

char out[OUTLEN];
  
//gb2312碼轉爲unicode碼
g2u(in_gb2312,strlen(in_gb2312),out,OUTLEN);
printf("gb2312-->unicode out=%s /n",out);
}

還有一種方式是使用使用WindiwsAPI，演示樣例代碼例如如下：

std::string MbcsToUtf8( const char* pszMbcs )  
    {  
        std::string str;  
        WCHAR   *pwchar=0;  
        CHAR    *pchar=0;  
        int len=0;  
        int codepage = AreFileApisANSI() ? CP_ACP : CP_OEMCP;  
        len=MultiByteToWideChar(codepage, 0, pszMbcs, -1, NULL,0);  
        pwchar=new WCHAR[len];  
        if(pwchar!=0)  
        {  
            len = MultiByteToWideChar(codepage, 0, pszMbcs, -1, pwchar, len);  
            if( len!=0 )  
            {  
                len = WideCharToMultiByte(CP_UTF8, 0, pwchar, -1, 0, 0, 0, 0);  
                pchar=new CHAR[len];  
                if(pchar!=0)  
                {  
                    len = WideCharToMultiByte(CP_UTF8, 0, pwchar, -1, pchar, len,0, 0);  
                    if(len!=0)                  
                    {  
                        str = pchar;                     
                    }  
                    delete pchar;  
                }  
                delete pwchar;  
            }  
        }  
        return str;  
}

參考文獻：

1. 使用SQLite3支持中文路徑