linux下C轉碼函數:iconv使用

在linux下,若是須要將編碼格式轉換,可使用iconv系列函數。linux


頭文件:

iconv.hless


經常使用函數:
/* Identifier for conversion method from one codeset to another.  */
typedefvoid *iconv_t;
/* Allocate descriptor for code conversion from codeset FROMCODE to
   codeset TOCODE.
   This function is a possible cancellation points and therefore not
   marked with __THROW.  */
externiconv_ticonv_open (__constchar *__tocode, __constchar *__fromcode);
/* Convert at most *INBYTESLEFT bytes from *INBUF according to the
   code conversion algorithm specified by CD and place up to
   *OUTBYTESLEFT bytes in buffer at *OUTBUF.  */
externsize_ticonv (iconv_t __cd, char **__restrict __inbuf,
size_t *__restrict __inbytesleft,
char **__restrict __outbuf,
size_t *__restrict __outbytesleft);
/* Free resources allocated for descriptor CD for code conversion.
   This function is a possible cancellation points and therefore not
   marked with __THROW.  */
externinticonv_close (iconv_t __cd);


注意事項
1)outlen和inlen的長度最好相等,iconv會轉換全部outlen長度的內容,若是inlen長度不夠,可能會形成訪問越界的問題。
2)當ASCII轉到GBK時,ASCII字符佔一位,中文等的書字符佔兩位;
3)當ASCII轉到UTF16BE時,ASCII字符佔兩位,前補零,因此UTF16BE格式不能用strlen取長度或用%s的printf。

4)當GBK或UTF16BE轉到ASCII時,非ASCII字符會被丟棄。ide


遺留問題:

1)iconv的結果經常是失敗,errno:84,表示某些字符沒法轉換的error。可取出來看outbuf,其實內容都已經轉換了,此時inlen和outlen的位置都爲負數,沒法表示轉換到了哪一個字符。函數

【解決】inleft和outleft必定要用類型size_t, 不能用int,不然會報錯84,而且會將buf後面的內存段也寫亂了。切忌。編碼

【緣由】size_t在64位系統上是8字節,而在32位系統上是4字節,int爲4字節。這樣致使取地址的時候越界。
size_t定義以下:

#ifndef __SIZE_TYPE__
#define __SIZE_TYPE__ long unsigned int
#endif
#if !(defined (__GNUG__) && defined (size_t))
typedef __SIZE_TYPE__ size_t;
#ifdef __BEOS__
typedef long ssize_t;




示例代碼:spa

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <iconv.h>
#include <errno.h>
#define MIN(a,b) ((a)>(b)?(b):(a))
void dumprawmsg(char *p, int len)
{
    int i = 0;
    for(i = 0; i < len; i++)
    {
        unsigned char c = p[i];
        printf("%.2X ", c);
    }
    printf ("\n");
}
int convmsg(char * src, char * des, int srclen, int deslen, const char *srctype, const char *destype)
{
    if (strcmp(srctype, destype) == 0)
    {
        memcpy(des, src, MIN(srclen, deslen));
        return 0;
    }
    iconv_t conv = iconv_open (destype, srctype);
    if(conv == (iconv_t)-1)
    {
        printf("iconvopen err\n");
        return -1;
    }
    char *in = src;
    char *out = des;
//    int ret =  iconv (conv, &in, (size_t *) & srclen,
//                                &out,
//                                (size_t *)& deslen);
//
//    if(ret == 0)
//    {
//        printf ("iconv succ\n");
//    }
//    else
//    {
//        if(errno == 84)
//        {
//            printf("iconv  84:%d,%d\n", srclen, deslen);
//        }
//        else
//        {
//            printf("iconv  err %d:%d,%d\n", errno, srclen, deslen);
//        }
//    }
    size_t  avail = deslen;
    size_t insize = srclen;
    char *wrptr = des;
    char *inptr = src;
    while (avail > 0)
      {
        size_t nread;
        size_t nconv;
        printf("avail:%d\n", avail);
        /* Do the conversion.  */
        nconv = iconv (conv, &inptr, &insize, &wrptr, &avail);
        if (nconv == (size_t) -1)
          {
            /* Not everything went right.  It might only be
               an unfinished byte sequence at the end of the
               buffer.  Or it is a real problem.  */
            if (errno == EINVAL)
            {
              /* This is harmless.  Simply move the unused
                 bytes to the beginning of the buffer so that
                 they can be used in the next round.  */
              //memmove (inbuf, inptr, insize);
              printf("EINVAL\n");
            }
            else
              {
                /* It is a real problem.  Maybe we ran out of
                   space in the output buffer or we have invalid
                   input.  In any case back the file pointer to
                   the position of the last processed byte.  */
                printf("error\n");
                break;
              }
          }
      }
    iconv_close (conv);
    return 0;
}
int main(int argc, char * argv[])
{
    if (argc < 3)
    {
        printf("need two type para\n");
        return -1;
    }
    printf("type in %s\n, type out %s\n", argv[1], argv[2]);
    char src[100] = "abcd 1234 其餘";
    char des[100] = {0};
    int srclen = 50;
    int deslen = 50;
    const char * srctype = argv[1];
    const char * destype = argv[2];
    dumprawmsg(des, 400);
    int ret = convmsg(src, des, srclen, deslen, srctype, destype);
    dumprawmsg(des, 400);
    printf("des is : %s\n", des);
    return 0;
}
相關文章
相關標籤/搜索