踏破鐵鞋無覓處,得來全不費工夫--刪除文本多餘空行ClipboardFormatter

從網上拷一些文章,老是有不少不少不少....的空行,
以下圖(CRLF就是換行符\r\n),怎麼去掉它們呢?

通常方法就是貼到文本編輯器中,
而後查找替換,查找替換,查找替換....
周而復始,不厭其煩....
必定有更好的辦法,
html

想象中...

若是有什麼辦法,讓電腦自動完成這件事,那該多好啊!

因而就有了本文的想法:
------寫一個程序自動刪除拷貝的文本里多餘的空行.
(電腦的過剩的計算資源不用白不用)
node


動手吧

要實現的程序是這樣工做的:
首先得取得剪切板的內容(
不要問剪切板是什麼?),windows

而後查找重複的關鍵字(\r\n\r\n),而後替換成一個(\r\n),編輯器

最後把替換完的文本內容,寫回剪切板中,
...後面,會考慮寫成托盤程序,
函數

能自啓動學習

寫代碼了^_^測試

實現一個剪切板的類Clipboard,主要用於取得剪切板內容,和設置剪切板內容
url

//僅用於我的測試學習用途,做者不對使用本軟件與代碼產生的任何結果負責
//做者: wisepragma
//主頁:http://blog.csdn.net/wisepragma

#pragma once
#include <Windows.h> 
#include <tchar.h>
class Clipboard
{
private:
        TCHAR*          m_pBuffer;//動態內存,用於存剪切板文本拷貝
        int             m_bufSize;//拷貝的緩衝大小
        UINT            m_ClipFormat;//剪切板文本的格式,根據程序自動選擇
        HANDLE          m_hclip;//剪切板句柄,實際是剪切板內存指針
private:
        void destroy()
        { 
                if(m_pBuffer!=NULL) 
                { 
                        delete[]m_pBuffer;
                        m_pBuffer=NULL;
                }
        }
public:
        Clipboard()
        {
                m_pBuffer=NULL;
                m_ClipFormat=(  sizeof(TCHAR)==sizeof(WCHAR) )?CF_UNICODETEXT:CF_TEXT;//根據程序版本,設置剪切板的文本類型
        }
        ~Clipboard()
        { 
                destroy();
        }      
        UINT count()
        {
                m_bufSize=0;
                if(OpenClipboard(NULL))
                {
                        if(IsClipboardFormatAvailable(m_ClipFormat))
                        {		
                                HANDLE hclip=GetClipboardData(m_ClipFormat);

                                TCHAR* pClipBuffer=static_cast<TCHAR*>(GlobalLock(hclip) );
                                m_bufSize=lstrlen(pClipBuffer);
                                GlobalUnlock(hclip);
                                CloseClipboard();
                        }
                }
                return m_bufSize;
        }
        TCHAR* gettxt()
        {
                destroy();//釋放動態內存,避免屢次調用下內存泄漏
                if(OpenClipboard(NULL))
                {
                        if(IsClipboardFormatAvailable(m_ClipFormat))
                        {
                                HANDLE hclip=GetClipboardData(m_ClipFormat);
                                TCHAR* pClipBuffer=static_cast<TCHAR*>(GlobalLock(hclip) );//鎖定剪切板,取得剪切板字符串地址 
                                if(m_pBuffer!=NULL) delete[]m_pBuffer;
                                m_bufSize=lstrlen(pClipBuffer);//並計算此字符串長度
                                m_pBuffer=new TCHAR[m_bufSize+1];//申請動態內存,加1個空間,裝NULL結尾符
                                memcpy(m_pBuffer,pClipBuffer,m_bufSize*sizeof(TCHAR));//拷出剪切板,bugfixed:memcpy()以字節計數不是字
                                m_pBuffer[m_bufSize]=TEXT('\0');//組裝成一個以NULL結尾的字符串
                                GlobalUnlock(hclip);//解鎖剪切板,鎖與解鎖間動做要快
                                CloseClipboard();
                        }
                }
                return m_pBuffer;
        }
        bool settxt(TCHAR* str)
        { 
                HANDLE hClip;
                if(m_hclip!=NULL) GlobalFree( m_hclip);
                if( OpenClipboard(NULL) )
                {
                        EmptyClipboard();//寫入前必定要清空
                        int len=lstrlen(str);
                        m_hclip=GlobalAlloc(GMEM_MOVEABLE,(len+1)*sizeof(TCHAR));//BUBFIXED:GlobalAlloc()參數2以字節爲單位
                        TCHAR *pClipBuffer=static_cast<TCHAR*>(GlobalLock(m_hclip));
                        lstrcpy(pClipBuffer,str);
                        //寫入剪切板
                        GlobalUnlock(m_hclip);
                        hClip=SetClipboardData(m_ClipFormat,m_hclip);
                        //成功反回指向剪切板的句柄,失敗返回NULL
                        CloseClipboard();
                }
                return (hClip!=NULL);//寫入成功
        }
};


接下來,要來怎麼處理文本字符串
這個問題好像很簡單,不事後來發現我想得太複雜了...

舉例來講有一個字符串是這樣定義的

CString s="今天今天今天今天今天今天我有所思在遠道,一日不見兮,我心悄悄今天今天今天今天今天今天今天今天今天今天今天今天常言道:天道酬勤,靜以修身,儉以養德.今天今天今天事事通曉皆道理,人情達練即文章今天今天今天今天今天今天";

咱們要把其中全部的,多個連續的"今天"替換成一個"今天",因而我想到從字串開頭逐個字遍歷,當找到"今天"後,以關鍵字"今天"的長度遍歷,直到找到非關鍵字"今天"記錄下關鍵字串的開頭與結尾,重複個數等信息,而後再逐個字遍歷,直到字串的結尾(描述好長)
spa


struct node 
{
        UINT iDifferentStart;
        UINT nSame;
        UINT iSameStart;
        node *next;
};

class KeyWordReplacer
{
private:
        node *m_head;//單向鏈表記錄下索引等信息
         CString *m_sString;
        CString m_sKeyWord4Search;
        CString m_sKeyWord4Replace;

        CString m_sFormatText;
        UINT m_nString;
        UINT m_nKeyWord;

public:
        ~KeyWordReplacer()
        {
                destory();
        }
        void destory()
        {
                if(NULL!=m_head)
                {
                        while(m_head!=NULL)
                        {
                                node *idx=m_head;
                                m_head=m_head->next;
                                delete idx;
                        }
                }
                m_head=NULL;
        }
        KeyWordReplacer( CString  *sTxt, TCHAR *pSearchkeyWord,TCHAR *pReplaceKeyWord=NULL)
        {                
                //循環的工做原理:從m_sString中查找與關鍵字m_sKeyWord4Search相同的,記錄相同的個數
                //而後在返回的索引下查找與關鍵字m_sKeyWord4Search不一樣的
                //經過單向鏈表記錄下索引等信息
                m_sString=sTxt;
                m_sKeyWord4Search=pSearchkeyWord;
                m_sKeyWord4Replace=pReplaceKeyWord;
                m_nString=m_sString->GetLength();
                m_nKeyWord=m_sKeyWord4Search.GetLength();
                m_head=NULL;

                UINT ix=m_sString->Find(m_sKeyWord4Search);//ix用於從m_sString查找關鍵字m_sKeyWord4Search
                if( ix!=-1 )//沒找到返回-1,找到返回第一個關鍵字下標
                {
                        m_head=new node;
                        m_head->next=NULL;
                        m_head->nSame=0; //用於累計相同關鍵字的個數
                        if(ix==0) m_head->iDifferentStart=-1;//在開頭找到關鍵字, m_head->iDifferentStart以-1表示
                        else m_head->iDifferentStart=0;//在開頭沒找到關鍵字, m_head->iDifferentStart以0表示
                        m_head->iSameStart=ix;// m_head->iSameStart即iDifferentEnd


                        node *inx=m_head;
                        do{
                                UINT iz=ix;//iz用於記錄查找非關鍵字退出循環時刻,最後的索引
                                for(UINT iy=ix; iy<=m_nString-m_nKeyWord; iy+=m_nKeyWord)//iy用於從m_sString查找非m_sKeyWord4Search關鍵字
                                {
                                        iz=iy;

                                        if( m_sString->Mid(iy,m_nKeyWord)==m_sKeyWord4Search )
                                        {
                                                inx->nSame++; //累計相同關鍵字的個數
                                                if(inx->nSame==1) inx->iSameStart=iy;//第一次找到關鍵字即iDifferentEnd
                                                //_tprintf(_T("//[%d],%d,%s\n"),iy,inx->nSame,m_sString->Mid(iy,m_nKeyWord));
                                        }
                                        else
                                        {     //鏈表有兩種生長方式(new node<==m_head 和 m_head==>new node) //本程序使用此方式m_head==>new node
                                                inx->next=new node;
                                                inx=inx->next;
                                                inx->nSame=0;//初始化爲0,以便下一次累計相同關鍵字的個數
                                                inx->next=NULL;//重要!!!這是遍歷結束的標誌
                                                inx->iSameStart=-1;//在結尾沒找到關鍵字,iSameStart=-1表示
                                                inx->iDifferentStart=iy;//鏈表記錄非關鍵字的開始
                                                //_tprintf(_T("\n"));
                                                break;//找到不一樣的即立退出
                                        }
                                }					 
                                ix=m_sString->Find(m_sKeyWord4Search,iz);//查找關鍵字

                        }while( ix!=-1 &&  ix<m_nString-m_nKeyWord);

                }

        }
        CString &Replace(TCHAR *pReplaceKeyWord=NULL)
        {
                if( NULL!=pReplaceKeyWord ) m_sKeyWord4Replace=pReplaceKeyWord;
                if(NULL!=m_head)
                {
                        node *idx=m_head;
                        while(idx!=NULL )
                        {
                                _tprintf(_T("//[iDifferentStart:%d],iSameStart:%d,[nSame:%d],%s\n"),idx->iDifferentStart,idx->iSameStart, idx->nSame,m_sString->Mid(idx->iDifferentStart) );

                                if( idx->iSameStart == -1 )
                                {
                                        m_sFormatText+=m_sString->Mid( idx->iDifferentStart);
                                        //m_sFormatText+=m_sKeyWord4Replace;//註釋掉末尾不加入替換關鍵字
                                        _tprintf(_T("//....[sDifferent:%s]\n"),m_sString->Mid( idx->iDifferentStart) );
                                }
                                else 
                                {                                      
                                        if(idx->iDifferentStart != -1)
                                        {
                                                m_sFormatText+=m_sString->Mid(idx->iDifferentStart, idx->iSameStart - idx->iDifferentStart);
                                                m_sFormatText+=m_sKeyWord4Replace;
                                                _tprintf(_T("//....[sDifferent:%s]\n"), m_sString->Mid(idx->iDifferentStart, idx->iSameStart - idx->iDifferentStart) );
                                        }
                                }
                                idx=idx->next;
                        }
                }
                _tprintf(_T("字串:%s\n查找:%s\n替換成:%s\n結果:%s\n"),*m_sString,m_sKeyWord4Search,m_sKeyWord4Replace,m_sFormatText);

                return m_sFormatText;
        }
        TCHAR *getText()
        {
                return m_sFormatText.GetBuffer( m_sFormatText.GetLength() );
        }
};


int _tmain()
{	
        setlocale(LC_CTYPE,"CHS");//加上#include<locale.h>,[加上#include <tchar.h>]讓_tprintf()在ANSI,UNICODE下都能支持中文

        //CString s="我是菩提樹上菩提花,冷眼看人世千年塵沙,你流連樹下,回眸那一剎,天地間只剩你眉眼如畫,長亭十里憶你風袖迎晨霞,清酒一壺醉里弄琴琶,長亭十里憶你薄衫牽駿馬,梅雨一簾多少相思話";
        //CString s="今天今天今天今天今天今天今天今天今天";
        //CString s=_T("可是今天今天今天今天今天它真寫今天今天今天些什麼着");
        //CString s="今天今天今天今天今天今天我有所思在遠道,一日不見兮,我心悄悄今天今天今天今天今天今天今天今天今天今天今天今天常言道:天道酬勤,靜以修身,儉以養德.今天今天今天事事通曉皆道理,人情達練即文章今天今天今天今天今天今天";

        Clipboard brd;     
        CString sClipBoard=brd.gettxt(); 
        KeyWordReplacer crlf(&sClipBoard,_T("\r\n") );
        crlf.Replace(_T("\r\n"));
        //KeyWordReplacer crlf(&sClipBoard,_T("今天"),_T("▓") );
        if(NULL!=crlf.getText() ) 
        {

                brd.settxt( crlf.getText() );
        }
        else _tprintf(_T("沒有找到和替換\n"));

        getchar();
}





下面就來測試

.net


結果是這樣的


可是當我把一個4MB左右的文本拿來做試驗的時候,發現了一個問題
那就是---效率實在過低了!!!
展轉反側,上下求索,沒找到好的辦法.....

忽然有一天,眼前一亮,CString 不是有一個Replace()函數嗎
而後就着NOTEPAD2替換着文本的換行


而後簡化了代碼:(有點黯然神傷)

bool bRemoveAllCRLF=false;
Clipboard brd;    
_tprintf(_T("...正在讀取剪切板...\n"));
CString sClipBoard=brd.gettxt(); 
_tprintf(_T("...正在查找並替換多餘空行...\n"));
int nReplace=0;
bool bKeyWordFound=false;
do{
	   if(bRemoveAllCRLF)
	   {
		   nReplace=sClipBoard.Replace(_T("\r\n"),_T(""));      
	   }
	   else
	   {
		   nReplace=sClipBoard.Replace(_T("\r\n\r\n"),_T("\r\n"));      
	   }
	   if( nReplace>0 )bKeyWordFound=true;
}while( nReplace>0);//重複替換,直到替換個數爲0
if(bKeyWordFound ) 
{            
		_tprintf(_T("...正在寫入剪切板...\n"));
		bool bSucceed=brd.settxt( sClipBoard.GetBuffer(sClipBoard.GetLength() ) );                  
	  if(bSucceed) _tprintf(_T("...刪除剪切板多餘空行成功...\n"));
}


雖然是重複地進行查找替換,可是它的效率不是通常地高,差距啊

接着把KeyWordReplacer類改進爲KeyWordRemover

//僅用於我的測試學習用途,做者不對使用本軟件與代碼產生的任何結果負責
//做者: wisepragma
//主頁:http://blog.csdn.net/wisepragma
#pragma once
#include <windows.h>
#include <tchar.h>
class KeyWordRemover
{
private:
        CString m_sText;
public:
	bool remove( TCHAR  *sTxt, TCHAR *pskeyWord,bool bRemoveAll=false)
	{                
        
                CString s1keyword(pskeyWord);//單重關鍵字
                CString s2keyword=s1keyword+pskeyWord;//雙重關鍵字
                m_sText=sTxt;
                int nReplace=0;
                bool bKeyWordFound=false;
                do{
                        if(bRemoveAll)
                        {
                                nReplace=m_sText.Replace(s1keyword,_T(""));    //s1keyword單重關鍵字 替換成 空即刪除關鍵字 , 這個比下面的耗時 
                        }
                        else
                        {
                                nReplace=m_sText.Replace(s2keyword,s1keyword);        //雙重關鍵字 替換成 單重關鍵字 即 縮減關鍵字  
                        }
                        if( nReplace>0 )bKeyWordFound=true;
                }while( nReplace>0);//直到替換個數爲0
              
                return bKeyWordFound;
	}
        TCHAR *gettxt()
        {
                return m_sText.GetBuffer( m_sText.GetLength() );
        }
};


搞定了!

作成WINDOWS托盤圖標式軟件是這個樣子的



代碼與軟件
ClipboardFormatter[bin]  
ClipboardFormatter[src]

================升級版=========================================
ClipboardFormatter V1.1[bin]
更新說明:

1.改進了KeyWordRemover,修改了remove()函數,增長了settxt()函數,見下面
2.Clipboard類settxt()中修HANDLE hClip=NULL;//bugfixed 未初始化引用,函數老是返回真
3.加入刪除空白字符如,空格,漢字空格,製表符,換行符功能

#pragma once
#include <windows.h>
#include <tchar.h>
class KeyWordRemover
{
private:
        CString m_sText;
public:
        void settxt(TCHAR  *sTxt)
        {
                m_sText=sTxt;
        }
	bool remove(  TCHAR *pskeyWord,bool bRemoveAll=false)
	{                
        
              CString s1keyword(pskeyWord);//單重關鍵字
                CString s2keyword=s1keyword+pskeyWord;//雙重關鍵字
               
                int nReplace=0;
              bool bKeyWordFound=false;
              do{
                        if(bRemoveAll)
                        {
                                nReplace=m_sText.Replace(s1keyword,_T(""));    //s1keyword單重關鍵字 替換成 空即刪除關鍵字 , 這個比下面的耗時 
                        }
                        else
                        {
                                nReplace=m_sText.Replace(s2keyword,s1keyword);        //雙重關鍵字 替換成 單重關鍵字 即 縮減關鍵字  
                        }
                        if( nReplace>0 )bKeyWordFound=true;
                }while( nReplace>0);//直到替換個數爲0
              
                return bKeyWordFound;
	}
        TCHAR *gettxt()
        {
                return m_sText.GetBuffer( m_sText.GetLength() );
        }
};
相關文章
相關標籤/搜索