c++ 讀取、輸出txt文件

下面這段話轉自:https://blog.csdn.net/lightlater/article/details/6326338html

關於文本文件的文件頭ios

第一 ANSI文件的文件頭爲空,不須要處理;windows

第二 UNICODE文件的文件頭爲0xFF,0xFE共計兩個字節,讀取時須要偏移兩個字節再行讀取;app

第三 UTF-8文件的文件頭爲0xEF,0xBB,0xBF共計三個字節,讀取時須要偏移三個字節後再行讀取;函數

 

1.ansi格式txt文件spa

 1 void readAnsiTXT(){
 2     string filename = "ansi.txt";
 3     ifstream fin(filename.c_str());
 4     if (!fin.is_open()){
 5         cout << "open failed!\n";
 6     }
 7     char ch;
 8     string msg = "";
 9     while (fin.get(ch)){
10         msg += ch;
11     }
12     cout << msg << "\n";
13 }

2.Unicode格式.net

轉載:http://www.javashuo.com/article/p-vzubwbix-nt.htmlcode

memset函數:https://baike.baidu.com/item/memset/4747579?fr=aladdinhtm

setlocal函數:https://www.runoob.com/cprogramming/c-function-setlocale.htmlblog

void readUnicodeTXT(){
    string filename = "unicode.txt";
    ifstream fin;
    fin.open(filename, ios::binary);
    fin.seekg(2, ios::beg);
    wstring wstrLine;
    while (!fin.eof())
    {
        wchar_t wch;
        fin.read((char *)(&wch), 2);
        wstrLine.append(1, wch);
    }
    string str = ws2s(wstrLine);
    str.erase(str.size()-1, 1);//刪除結尾重複的一個字符
    cout << str << endl;
}

std::string ws2s(const std::wstring& ws)
{
    std::string curLocale = setlocale(LC_ALL, NULL); // C 庫函數 char *setlocale(int category, const char *locale) 設置或讀取地域化信息。
    setlocale(LC_ALL, "chs");
    const wchar_t* _Source = ws.c_str();
    size_t _Dsize = 2 * ws.size() + 1;
    char *_Dest = new char[_Dsize];
    memset(_Dest, 0, _Dsize);
    wcstombs(_Dest, _Source, _Dsize);
    std::string result = _Dest;
    delete[]_Dest;
    setlocale(LC_ALL, curLocale.c_str());
    return result;
}

utf8格式:

 1 void readUtf8TXT(){
 2     string str = "utf8.txt";
 3     wstring res=L"";
 4     std::locale loc("chs");
 5     std::wcout.imbue(loc);
 6     std::wifstream wif(str, ios::binary);
 7     codecvt_utf8<wchar_t, 0x10ffff, std::consume_header>* codecvToUnicode = new codecvt_utf8<wchar_t, 0x10ffff, std::consume_header>;
 8     if (wif.is_open()){
 9         wif.imbue(std::locale(wif.getloc(), codecvToUnicode));
10         wstring wline;
11         while (getline(wif, wline)){
12             wstring convert;
13             for (auto c : wline){
14                 if (c != L'\0' && c != L'?') convert += c;
15             }
16             res = res + convert;        
17         }
18         wif.close();
19     }    
20     for (wstring::iterator i = res.begin(); i != res.end(); i++){//將res中的'\r'換成'\n',不然輸出異常
21         if (*i == '\r'){
22             *i = '\n';
23         }
24     }
25     wcout << res << endl;
26 }

 

Windows下用std::wifstream讀取Unicode和UTF-8文本

轉載:http://101.132.192.87/2019/08/29/windows%e4%b8%8b%e7%94%a8stdwifstream%e8%af%bb%e5%8f%96unicode%e6%96%87%e6%9c%ac/

Windows下使用std::wifstream讀取Unicode文本的方法:

 1 std::locale loc("chs");                //windows下ok
 2     std::wcout.imbue(loc);
 3     // open as a byte stream
 4     std::wifstream wif("路徑", std::ios::binary);
 5     std::codecvt_utf16<wchar_t, 0x10ffff, std::consume_header>* codecvtToUnicode = new std::codecvt_utf16 < wchar_t, 0x10ffff, std::consume_header >;
 6     if (wif.is_open())
 7     {
 8         // apply BOM-sensitive UTF-16 facet
 9         wif.imbue(std::locale(wif.getloc(), codecvtToUnicode));
10         std::wstring wline;
11         while (std::getline(wif, wline))
12         {
13             std::wstring convert;
14             for (auto c : wline)
15             {
16                 if (c != L'\0' && c != L'?')
17                     convert += c;
18             }
19             wcout << convert << endl;
20         }
21         wif.close();
22         //delete codecvtToUnicode;     //new和delete,應該不用手動delete,在哪裏delete都會崩潰(親測)
23     }

Windows下使用std::wifstream讀取UTF-8文本的方法:

 1 std::locale loc("chs");                //windows下ok
 2     std::wcout.imbue(loc);
 3     // open as a byte stream
 4     std::wifstream wif("路徑", std::ios::binary);
 5     std::codecvt_utf8<wchar_t, 0x10ffff, std::consume_header>* codecvtToUnicode = new std::codecvt_utf8 < wchar_t, 0x10ffff, std::consume_header >;
 6     if (wif.is_open())
 7     {
 8         // apply BOM-sensitive UTF-8 facet
 9         wif.imbue(std::locale(wif.getloc(), codecvtToUnicode));
10         std::wstring wline;
11         while (std::getline(wif, wline))
12         {
13             std::wstring convert;
14             for (auto c : wline)
15             {
16                 if (c != L'\0' && c != L'?')
17                     convert += c;
18             }
19             wcout << convert << endl;
20         }
21         wif.close();
22         //delete codecvtToUnicode;     //new和delete,應該不用手動delete,在哪裏delete都會崩潰(親測)
23     }
相關文章
相關標籤/搜索