嗯,如題 是個蛋疼物
目前QQ的聊天記錄導出功能很讓人鬱悶
三種聊天記錄格式的導出
1 TXT 沒圖
2 BAK 只能再導入QQ使用
3 MHT 有圖有字,缺點是一旦聊天記錄不少,文件體積就會很大,幾乎全部的工具都不能正常打開
單純的把MHT轉換成HTML也不行,由於HTML也很大,加上圖片之類的資源 也會卡死
因而只能切開顯示,處理思路很簡單,就是超大的文本文件,按行順序處理,把圖片解碼存入文件,而後分割HTML內容
代碼以下 只支持單個QQ羣導出記錄php
1 import io; 2 import fsys.dlg; 3 import string; 4 import crypt.bin; 5 import fsys.path; 6 //將Base64的數據轉換成圖片
7 function base64images (str,path) 8 { 9 if(str) 10 { 11 var kzm = string.match(str,"Content-Type:image/(\S+)"); 12 var wjm = string.match(str,"Content-Location\:(\S+.dat)"); 13 startpos,endpos = string.find(str,"}.dat"); 14 if(endpos) 15 { 16 var bindata = crypt.bin.decodeBase64(string.trim( string.sub(str,endpos+1))); 17 //io.print(string.trim( string.sub(str,endpos)));
18 //execute("pause") //按任意鍵繼續
19 string.save(path ++ "\\" ++ wjm,bindata); 20 } 21
22 } 23 } 24 //切割記錄文件
25 function split_html (file_path,path) 26 { 27 var html_head = '<html xmlns="http://www.w3.org/1999/xhtml"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /><title>QQ Message</title><style type="text/css">body{font-size:12px; line-height:22px; margin:2px;}td{font-size:12px; line-height:22px;}</style></head><body><table width=100% cellspacing=0>'; 28 var html_foot = '</body></html>'; 29 var file = io.open( file_path, "rt" ); 30 var line_text = file.read(); 31 var line = 0; 32 var page = 0; 33 var tmp = ''; 34 while(line_text) 35 { 36 line_text = string.match(line_text,"\<tr.*?\</tr\>"); 37 if(line_text) 38 { 39 tmp = tmp ++ string.replace(line_text,'@<IMG src="{','<IMG src="images/{'); 40 line++; 41 if( line>500 ) 42 { 43 var f = io.open(path ++page++".html","w+"); 44 f.write(html_head); 45 f.write(tmp); 46 f.write('</table>'++'<a href="'++ page+1 ++'.html">Next page</a>'); 47 f.write(html_foot); 48 f.close(); 49 page++; 50 line = 0; 51 tmp = ''; 52 } 53 } 54 line_text = file.read(); 55 } 56 var f = io.open(path ++ page ++ ".html","w+"); 57 f.write(html_head); 58 f.write(tmp); 59 f.write('</table>'); 60 f.write(html_foot); 61 f.close(); 62 file.close(); 63 } 64 //打開控制檯
65 io.open(); 66 //選擇QQ聊天記錄
67 var mhtPath = fsys.dlg.open("QQ聊天記錄文件*.mht|*.mht||"); 68 //開始處理文件
69 if(mhtPath) 70 { 71 var path = io.splitpath(mhtPath); 72 var img_dir = path.dir ++ path.name ++ '\\images'; 73 var file = io.open( mhtPath, "rt" ); 74 fsys.createDir( path.dir ++ path.name); 75 fsys.createDir( path.dir ++ path.name ++ '\\images'); 76 var html = io.open( path.dir ++ path.name ++ "\\tmp.html","a+"); 77 var line_text = file.read(); 78 var i = 0; //行數
79 var is_html = false; 80 var tmp_image_data = ''; 81 //判斷是不是騰訊QQ聊天記錄文件
82 if(string.indexAny(line_text,"Tencent")) 83 { 84
85 while( line_text ) 86 { 87 //判斷文本內容開始處
88 html_start,xmlns_end = string.find(line_text,"@<html xmlns"); 89 if( 1 == html_start ) 90 { 91 is_html = true; 92 } 93 //判斷文本內容結束處
94 html_end1,html_end2 = string.find(line_text,"@</table></body></html>"); 95 if( 1 == html_end1 ) 96 { 97 //最後一行寫入文件
98 html.write(line_text); 99 html.write('\r\n'); 100 is_html = false; 101 //break;
102 } 103
104 if(is_html) 105 { //將聊天文本內容寫入文件
106 html.write(line_text); 107 html.write('\r\n'); 108 } 109
110 //切割圖片base64數據
111 if(false == is_html && i>10 ) 112 { 113 if(string.find(line_text,"@------=_NextPart")){ 114 base64images(tmp_image_data,img_dir); 115 tmp_image_data = line_text ++ '\r\n'; 116 }else{ 117 tmp_image_data = tmp_image_data ++ line_text ++ '\r\n'; 118 } 119 } 120
121 line_text = file.read(); 122 i++; 123 io.print("已處理",i,'行數據'); 124 } 125 base64images(tmp_image_data,img_dir); 126
127 }else { 128
129 io.print("您選擇的文件可能不是QQ導出的mht聊天記錄文件"); 130
131 } 132 //關閉文件
133 file.close(); 134 html.close(); 135 //須要切割html
136 split_html( path.dir ++ path.name ++ "\\tmp.html",path.dir ++ path.name ++ "\\"); 137 } 138 else
139 { 140 io.print("請正確的選擇QQ導出的mht聊天記錄文件"); 141 } 142 execute("pause") //按任意鍵繼續
143 io.close();//關閉控制檯
二進制版本css
PHP版本的(可能有點小BUG 不肯定 效率提高很多)html
1 <?php 2 if(!file_exists($argv[1])){ 3 echo 'There isn\'t have this file.'; 4 exit; 5 } 6 ini_set('pcre.backtrack_limit',100000000); 7 #ini_set('pcre.recursion_limit',100000000);
8 define("BASEDIR",dirname($argv[1]).'/'); 9 $is_table_end = false; 10 $page = 0; 11 mk_imgdir(); 12 $handle = fopen("input.mht", "rb"); 13 $contents = ''; 14 while (!feof($handle)) { 15 $contents .=fread($handle,204800); 16 $contents = process($contents); 17 } 18 process($contents); 19
20 #主體數據處理
21 function process($contents){ 22 $html_head = '<html xmlns="http://www.w3.org/1999/xhtml"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /><title>QQ Message</title><style type="text/css">body{font-size:12px; line-height:22px; margin:2px;}td{font-size:12px; line-height:22px;}</style></head><body><table width=100% cellspacing=0>'; 23 $html_foot = '</table></body></html>'; 24 Global $is_table_end; 25 Global $page; 26 if(false === $is_table_end){ 27 if( false !== strpos($contents,$html_foot) ){ 28 $is_table_end = true;//判斷消息部分是否完畢 只處理圖片部分
29 } 30 $r = preg_match_all ('|<tr.*?\</tr\>|ims', $contents , $matches ,PREG_OFFSET_CAPTURE); 31 if($r){ 32 $matches = array_chunk($matches[0],200); 33 foreach($matches as $key=>$val){ 34 $arr = array_column($val,0); 35 array_walk( $arr , function(&$v, $k) { $v = preg_replace('|<IMG src="{(\S)(\S)(\S+).dat|ims','<IMG src="../images/$1/$2/{$1$2$3.dat',$v);}); 36 file_put_contents('messages/'.sprintf("%08d", $page+$key).'.html' ,$html_head . implode('',$arr).'<td><H1><a href="./'.sprintf("%08d", $page+$key+1).'.html">Next page</a></h1></td>'.$html_foot); 37 } 38 $page += $key; 39 $pos = end($val); 40 $contents = substr($contents,bcadd($pos[1],strlen($pos[0]),0)); 41 }else{ 42 $contents = ''; 43 } 44 } 45 if(true === $is_table_end){ 46 //圖片部分
47 $r = preg_match_all ('|Content-Type:image.*?:base64.*?Content-Location:(.*?)\.dat(.*?)(?:------=_)|ims', $contents , $matches ,PREG_OFFSET_CAPTURE | PREG_SET_ORDER); 48 if($r){ 49 //$matches = array_chunk($matches[0],200);
50 $result = array(); 51 foreach($matches as $key=>$val){ 52 $result[] = array('name'=>$val[1][0],'contents'=>$val[2][0]); 53 if(count($result) >=150){ 54 put_images($result); 55 $result = array(); 56 } 57 } 58 put_images($result); 59 $result = array(); 60 $contents = substr($contents,$val[0][1]); ##There is no str len
61 }else{ 62 $contents = ''; 63 } 64 } 65 return $contents; 66 } 67
68
69 #寫入圖片表情文件到硬盤
70 function put_images($data){ 71 foreach($data as $val){ 72 $dir = './images/'. substr($val['name'],1,1) .'/' . substr($val['name'],2,1) .'/'.$val['name'] . '.dat'; 73 echo $dir."\r\n"; 74 file_put_contents($dir,base64_decode(trim($val['contents']))); 75 } 76 } 77
78 #創建圖片保存目錄
79 function mk_imgdir(){ 80 if(file_exists(BASEDIR.'images')){ 81 rename(BASEDIR.'images',BASEDIR.'images_old'); 82 } 83 $tmp = array('0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z'); 84 foreach($tmp as $v1){ 85 mkdir(BASEDIR.'images'.'/'.$v1,0777,true); 86 foreach($tmp as $v2){ 87 mkdir(BASEDIR.'images'.'/'.$v1.'/'.$v2,0777,true); 88 } 89 } 90 if(file_exists(BASEDIR.'messages')){ 91 rename(BASEDIR.'messages',BASEDIR.'messages_old'); 92 } 93 mkdir(BASEDIR.'messages'.'/',0777,true); 94 return 0; 95 }
<?php ini_set('pcre.backtrack_limit',1000000); $is_table_end = false ; $page = 0 ;$contents = ''; $output_zip = './test.zip'; #判斷輸入文件 if(!file_exists($argv[1])) { echo 'There isn\'t have this file.'; exit; } #創建歸檔文件 默認爲 make_output_target($output_zip); $handle = fopen($argv[1], "rb"); #分段處理 do { $contents .=fread($handle,124416);#wtf $contents = mht_process($contents,$output_zip); } while (!feof($handle)); #主體數據處理 function mht_process($contents,$output_zip){ Global $is_table_end; Global $page; $zip = new ZipArchive; if ($zip->open($output_zip,ZIPARCHIVE::CREATE) !== TRUE) { echo 'create image failed'; exit; } #判斷非圖片消息部分是否處理完畢 if(false === $is_table_end) { $html_head = '<html xmlns="http://www.w3.org/1999/xhtml"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /><title>QQ Message</title><style type="text/css">body{font-size:12px; line-height:22px; margin:2px;}td{font-size:12px; line-height:22px;}</style></head><body><table width=100% cellspacing=0>'; $html_foot = '</table></body></html>'; #判斷消息部分是否完畢 只處理圖片部分 if(false !== strpos($contents,$html_foot)) { $is_table_end = true; } $r = preg_match_all ('|<tr.*?\</tr\>|ims', $contents , $matches ,PREG_OFFSET_CAPTURE); if($r) { $matches = array_chunk($matches[0],200); foreach($matches as $key=>$val) { $arr = array_column($val,0); array_walk( $arr , function(&$v, $k) use ($zip) { $v = preg_replace('|<IMG src="{(\S)(\S)(\S+).dat|ims','<IMG src="../images/$1/$2/{$1$2$3.dat',$v); }); $zip->addFromString('messages/'.sprintf("%08d", $page+$key).'.html' ,$html_head . implode('',$arr).'<td> <H1><a href="./'.sprintf("%08d", $page+$key-1).'.html">Prev page</a></h1> <H1><a href="./'.sprintf("%08d", $page+$key+1).'.html">Next page</a></h1></td>'.$html_foot); } $page += $key; $pos = end($val); $contents = substr($contents,bcadd($pos[1],strlen($pos[0]),0)); } else { $contents = ''; } } #處理圖片部分 if(true === $is_table_end) { #圖片數據匹配 $r = preg_match_all ('|Content-Type:image.*?:base64.*?Content-Location:(.*?)\.dat(.*?)(?:------=_)|ims', $contents , $matches ,PREG_OFFSET_CAPTURE | PREG_SET_ORDER); if($r) { //$matches = array_chunk($matches[0],200); $result = array(); foreach($matches as $key=>$val){ $result = array('name'=>$val[1][0],'contents'=>$val[2][0]); $dir = 'images/'. substr($result['name'],1,1) .'/' . substr($result['name'],2,1) .'/'.$result['name'] . '.dat'; #寫入圖片表情文件到硬盤 $zip->addFromString($dir,base64_decode(trim($result['contents']))); $result = array(); } $result = array(); $contents = substr($contents,$val[0][1]); } else { $contents = ''; } } $zip->close(); #剩餘部分返回 下一次處理拼接數據 return $contents; } #創建保存目標 function make_output_target($output='./test.zip') { $zip = new ZipArchive; if ($zip->open($output,ZIPARCHIVE::CREATE) !== TRUE) { echo 'create images directory failed'; exit; } $zip->addEmptyDir('images'); $tmp = array('0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z'); array_walk($tmp,function($val,$key) use ($tmp,$zip) { $base_path = 'images/'.$val; $zip->addEmptyDir($base_path); array_walk($tmp,function($val,$key) use ($tmp,$zip,$base_path) { $zip->addEmptyDir($base_path.'/'.$val); }); }); $zip->addEmptyDir('messages'); $zip->close(); return 0; }
使用方法工具
1 "php.exe" cli.php input.mht > log.txt 2 pause
生成ZIP以後 能夠用winmount來掛載查看 無需解壓很方便 ui