javascript正常的英文編碼是utf-8的,mysql默認存的也是這種編碼,而emoji表情是utf-16的,這就致使了db存儲emoji會有問題,因此最好的方式是,把emoji先轉成utf-8的這種實體編碼,存到數據庫裏,要使用的時候,從db拿出來,再解碼成utf-16的形式。javascript
原文章html
{ // 表情轉碼 utf16toEntities(str) { const patt = /[ud800-udbff][udc00-udfff]/g; // 檢測utf16字符正則 str = str.replace(patt, (char) => { let H; let L; let code; let s; if (char.length === 2) { H = char.charCodeAt(0); // 取出高位 L = char.charCodeAt(1); // 取出低位 code = (H - 0xD800) * 0x400 + 0x10000 + L - 0xDC00; // 轉換算法 s = `&#${code};`; } else { s = char; } return s; }); return str; }, // 表情解碼 entitiestoUtf16(strObj) { const patt = /&#d+;/g; const arr = strObj.match(patt) || []; let H; let L; let code; for (let i = 0; i < arr.length; i += 1) { code = arr[i]; code = code.replace('&#', '').replace(';', ''); // 高位 H = Math.floor((code - 0x10000) / 0x400) + 0xD800; // 低位 L = ((code - 0x10000) % 0x400) + 0xDC00; code = `&#${code};`; const s = String.fromCharCode(H, L); strObj = strObj.replace(code, s); } return strObj; } }
使用示例java
const s = 'test emoji 👇👉👈🙌'; const dbSaveStr = utf16toEntities(s); // 結果是: 'test emoji 👇👉👈🙌' 這樣子的實體編碼字符串存到db就沒問題了 // 要使用時,想數據庫中拿到上面的存儲記錄 cosnt dbOutStr = 'test emoji 👇👉👈🙌' ; // 而後將其中的emoji轉碼成表情使用 const ret = entitiestoUtf16(dbOutStr ) // 獲得: 'test emoji 👇👉👈🙌';
這是更通用的方式,由於上面那種只是在web端能顯示,若是是要存到db,給c++獲取數據再客戶端app展現,就行不通了。必須轉成unicode-16才行。mysql
// http://www.2ality.com/2013/09/javascript-unicode.html function toUTF16(codePoint) { var TEN_BITS = parseInt('1111111111', 2); function u(codeUnit) { return 'u'+codeUnit.toString(16).toUpperCase(); } if (codePoint <= 0xFFFF) { return u(codePoint); } codePoint -= 0x10000; // Shift right to get to most significant 10 bits var leadSurrogate = 0xD800 + (codePoint >> 10); // Mask to get least significant 10 bits var tailSurrogate = 0xDC00 + (codePoint & TEN_BITS); return u(leadSurrogate) + u(tailSurrogate); }
使用示例c++
// using codePointAt, it's easy to go from emoji // to decimal and back. // Emoji to decimal representation "😀".codePointAt(0) >128512 // Decimal to emoji String.fromCodePoint(128512) >"😀" // going from emoji to hexadecimal is a little // bit trickier. To convert from decimal to hexadecimal, // we can use toUTF16. // Decimal to hexadecimal toUTF16(128512) > "uD83DuDE00" // Hexadecimal to emoji "uD83DuDE00" > "😀"
判斷字符串是否爲emoji字符web
const emojiReg = /(?:[u2700-u27bf]|(?:ud83c[udde6-uddff]){2}|[ud800-udbff][udc00-udfff])[ufe0eufe0f]?(?:[u0300-u036fufe20-ufe23u20d0-u20f0]|ud83c[udffb-udfff])?(?:u200d(?:[^ud800-udfff]|(?:ud83c[udde6-uddff]){2}|[ud800-udbff][udc00-udfff])[ufe0eufe0f]?(?:[u0300-u036fufe20-ufe23u20d0-u20f0]|ud83c[udffb-udfff])?)*/;
文章來源算法