- (NSData *)UTF8Data{ //保存結果 NSMutableData *resData = [[NSMutableData alloc] initWithCapacity:self.length]; //無效編碼替代符號(常見 � □ ?) NSData *replacement = [@"�" dataUsingEncoding:NSUTF8StringEncoding]; uint64_t index = 0; const uint8_t *bytes = self.bytes; while (index < self.length) { uint8_t len = 0; uint8_t header = bytes[index]; //單字節 if ((header&0x80) == 0) { len = 1; } //2字節(而且不能爲C0,C1) else if ((header&0xE0) == 0xC0) { if (header != 0xC0 && header != 0xC1) { len = 2; } } //3字節 else if((header&0xF0) == 0xE0) { len = 3; } //4字節(而且不能爲F5,F6,F7) else if ((header&0xF8) == 0xF0) { if (header != 0xF5 && header != 0xF6 && header != 0xF7) { len = 4; } } //沒法識別 if (len == 0) { [resData appendData:replacement]; index++; continue; } //檢測有效的數據長度(後面還有多少個10xxxxxx這樣的字節) uint8_t validLen = 1; while (validLen < len && index+validLen < self.length) { if ((bytes[index+validLen] & 0xC0) != 0x80) break; validLen++; } //有效字節等於編碼要求的字節數表示合法,不然不合法 if (validLen == len) { [resData appendBytes:bytes+index length:len]; }else { [resData appendData:replacement]; } //移動下標 index += validLen; } return resData; } @end