NSUTF8StringEncoding 轉碼失敗解決方法

- (NSData *)UTF8Data{

    //保存結果
    NSMutableData *resData = [[NSMutableData alloc] initWithCapacity:self.length];
    
    //無效編碼替代符號(常見 � □ ?)
    NSData *replacement = [@"�" dataUsingEncoding:NSUTF8StringEncoding];
    
    uint64_t index = 0;
    const uint8_t *bytes = self.bytes;
    
    while (index < self.length)
    {
        uint8_t len = 0;
        uint8_t header = bytes[index];
        
        //單字節
        if ((header&0x80) == 0)
        {
            len = 1;
        }
        //2字節(而且不能爲C0,C1)
        else if ((header&0xE0) == 0xC0)
        {
            if (header != 0xC0 && header != 0xC1)
            {
                len = 2;
            }
        }
        //3字節
        else if((header&0xF0) == 0xE0)
        {
            len = 3;
        }
        //4字節(而且不能爲F5,F6,F7)
        else if ((header&0xF8) == 0xF0)
        {
            if (header != 0xF5 && header != 0xF6 && header != 0xF7)
            {
                len = 4;
            }
        }
        
        //沒法識別
        if (len == 0)
        {
            [resData appendData:replacement];
            index++;
            continue;
        }
        
        //檢測有效的數據長度(後面還有多少個10xxxxxx這樣的字節)
        uint8_t validLen = 1;
        while (validLen < len && index+validLen < self.length)
        {
            if ((bytes[index+validLen] & 0xC0) != 0x80)
                break;
            validLen++;
        }
        
        //有效字節等於編碼要求的字節數表示合法,不然不合法
        if (validLen == len)
        {
            [resData appendBytes:bytes+index length:len];
        }else
        {
            [resData appendData:replacement];
        }
        
        //移動下標
        index += validLen;
    }
    
    return resData;
}

@end
相關文章
相關標籤/搜索