個人主要文件夾分爲三個:分別爲客戶端,cppjieba分詞庫,服務器部分前端
==啓動時服務器:==mysql
==客戶端連入時時服務器:==linux
==輸入「百度」==ios
==輸入「周杰倫」==git
==輸入「清華大學」==github
==輸入「微信」==web
==啓動時服務器:==sql
==客戶端連入時時服務器:==編程
==輸入「baidu":==
==輸入"github":==
==輸入"word":==
///======================================= /// File: Client.cc /// Author: wtptorres(1584292712@qq.com) /// Date: 2019-06-22 21:54:36 /// Dream: Don't forget your dreams! /// ====================================== #include "json/json.h" #include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> #include <errno.h> #include <sys/types.h> #include <sys/socket.h> #include <netinet/in.h> #include <arpa/inet.h> #include <sys/epoll.h> #include <string> #include <iostream> using std::cout; using std::endl; using std::string; #define ERR_EXIT(m) \ do { \ perror(m);\ exit(EXIT_FAILURE);\ }while(0) void do_service(int sockfd); size_t readn(int sockfd,char *buf,size_t count) { size_t nleft = count; char * pbuf = buf; while(nleft > 0) { int nread = read(sockfd,pbuf,nleft); if( nread == -1) { if(errno == EINTR) continue; return EXIT_FAILURE; }else if(nread == 0 ) break; pbuf += nread; nleft -= nread; } return (count - nleft); } size_t writen(int sockfd,const char *buf,size_t count) { size_t nleft = count; const char * pbuf = buf; while(nleft > 0) { int nwrite = write(sockfd,pbuf,nleft); if(nwrite == -1) { if(errno == EINTR) continue; return EXIT_FAILURE; } nleft -= nwrite; pbuf += nwrite; } return (count -nleft); } int main(int argc, const char *argv[]) { if(argc !=3) { printf("./client IP Port"); return -1; } int peerfd = socket(PF_INET, SOCK_STREAM, 0); if(peerfd == -1) ERR_EXIT("socket"); struct sockaddr_in addr; memset(&addr, 0, sizeof addr); addr.sin_family = AF_INET; addr.sin_addr.s_addr = inet_addr(argv[1]); addr.sin_port = htons(atoi(argv[2])); socklen_t len = sizeof addr; if(connect(peerfd, (struct sockaddr*)&addr, len) == -1) ERR_EXIT("Connect"); char buf[1024]; int length; memset(buf, 0, sizeof(buf)); readn(peerfd,(char*)&length,sizeof(int)); readn(peerfd, buf, length); cout<<buf<<endl; do_service(peerfd); return 0; } void jsonPara(const string strData) { Json::Reader reader; Json::Value DevsJson; reader.parse(strData,DevsJson); int siNum = DevsJson.size(); for(int i = 0;i< siNum;++i) { Json::Value Dev = DevsJson[i]; string Devstr = Dev.toStyledString(); cout << Devstr; } } void do_service(int sockfd) { char recvbuf[1024] = {0}; char sendbuf[1024] = {0}; while(1) { bzero(sendbuf,sizeof(sendbuf)); read(0,sendbuf,sizeof(sendbuf)-1); write(sockfd,sendbuf,strlen(sendbuf)); int len; bzero(recvbuf,sizeof(recvbuf)); readn(sockfd,(char*)&len,sizeof(int)); readn(sockfd,recvbuf,len); jsonPara(string(recvbuf)); } } #if 0 void JsonPara(const string strData) { Json::Reader reader; Json::Value DevsJson; reader.parse(strData,DevsJson); int siNum =DevsJson.size(); for(int i =0;i<siNum;++i) { Json::Value Dev =DevsJson[i]; string Devstr =Dev.toStyledString(); if(Devstr.c_str()) cout<<Devstr; } } #endif
///======================================= /// File: Acceptor.h /// Author: wtptorres(1584292712@qq.com) /// Date: 2019-06-22 23:47:05 /// Dream: Don't forget your dreams! /// ====================================== #ifndef __WD_ACCEPTOR_H__ #define __WD_ACCEPTOR_H__ #include "Socket.h" #include "InetAddress.h" namespace wd { class Acceptor { public: Acceptor(int listenfd,const InetAddress & addr); void ready(); int accept(); int fd()const {return _listenSock.fd();} private: void setReuseAddr(bool on); void setReusePort(bool on); void bind(); void listen(); Socket _listenSock; InetAddress _addr; }; } #endif
///======================================= /// File: Acceptor.cc /// Author: wtptorres(1584292712@qq.com) /// Date: 2019-06-05 23:52:12 /// Dream: Don't forget your dreams! /// ====================================== #include <iostream> #include "Acceptor.h" #include "SocketUtil.h" namespace wd { Acceptor::Acceptor(int listenfd,const InetAddress & addr) :_listenSock(listenfd) ,_addr(addr) {} void Acceptor::ready() { setReuseAddr(true); setReusePort(true); bind(); listen(); } int Acceptor::accept() { int peerfd=::accept(_listenSock.fd(),NULL,NULL); if(-1==peerfd) { perror("accept error!"); } return peerfd; } void Acceptor::setReuseAddr(bool flag) { int on=(flag?1:0); if(::setsockopt(_listenSock.fd() ,SOL_SOCKET ,SO_REUSEADDR ,&on ,static_cast<socklen_t>(size_t(on)))==-1) { perror("setsockopt reuseaddr error!"); ::close(_listenSock.fd()); exit(EXIT_FAILURE); } } void Acceptor::setReusePort(bool flag) { #ifdef SO_REUSEPORT int on =(flag?1:0); if(::setsockopt(_listenSock.fd() ,SOL_SOCKET ,SO_REUSEPORT ,&on ,static_cast<socklen_t>(sizeof(on)))==-1) { perror("setsockopt reuseport error!"); ::close(_listenSock.fd()); exit(EXIT_FAILURE); } #else if(flag) { fprintf(stderr,"SO_REUSEPORT is not supported!\n"); } #endif } void Acceptor::bind() { if(-1==::bind(_listenSock.fd() ,(const struct sockaddr*)_addr.getSockAddrPtr() ,sizeof(InetAddress))) { perror("bind error!"); ::close(_listenSock.fd()); exit(EXIT_FAILURE); } } void Acceptor::listen() { if(-1==::listen(_listenSock.fd(),10)) { perror("listen error!"); ::close(_listenSock.fd()); exit(EXIT_FAILURE); } } }
///======================================= /// File: Cache.h /// Author: wtptorres(1584292712@qq.com) /// Date: 2019-06-25 19:52:40 /// Dream: Don't forget your dreams! /// ====================================== #ifndef __CACHE_H__ #define __CACHE_H__ #include <unordered_map> #include <string> using namespace std; namespace wd { class Cache { public: void addElement(string,string); void readFromFile(string); void writeToFile(string); void update(const Cache&); bool find(string querry); string &operator[](string key); private: unordered_map<string,string>_hashTable; }; }; #endif
///======================================= /// File: Cache.cc /// Author: wtptorres(1584292712@qq.com) /// Date: 2019-06-05 20:01:25 /// Dream: Don't forget your dreams! /// ====================================== #include "Cache.h" #include <fstream> #include <utility> #include <iostream> using namespace std; using namespace wd; void Cache::addElement(string querry,string result) { _hashTable[querry]=result; } void Cache::readFromFile(string filePath) { ifstream ifs(filePath); if(!ifs){ cout<<" file open error!"<<endl; return; } string querry,result; while(ifs>>querry,!ifs.eof()) { ifs>>result; _hashTable[querry]=result; } } #if 0 void Cache::writeToFile(string filePath) { ofstream ofs(filePath); if(!ofs){ cout<<""<<endl; return; } for(auto &mypair:_hashTable) { ofs<<mypair.first<<" "; ofs<<mypair.second<<endl; } } void Cache::update(const Cache & cache) { for(auto &mypair:cache._hashTable) { auto cit =_hashTable.find(mypair.first); if(cit==_hashTable.end()) { _hashTable.insert(std::move(mypair)); } } } #endif void Cache::writeToFile(string filePath) { ofstream ofs(filePath); if(!ofs){ cout<<"file write error!"<<endl; return; } for(auto &mypair:_hashTable) { ofs<<mypair.first<<" "; ofs<<mypair.second<<endl; } } void Cache::update(const Cache & cache) { for(auto &mypair:cache._hashTable) { auto cit =_hashTable.find(mypair.first); if(cit==_hashTable.end()) { _hashTable.insert(std::move(mypair)); } } } bool Cache::find(string querry) { auto cit =_hashTable.find(querry); if(cit==_hashTable.end()) return false; return true; } string &Cache::operator[](string key) { return _hashTable[key]; } #if 0 int main() { cout<<"cache is correct!"<<endl; } #endif
///======================================= /// File: CacheManger.h /// Author: wtptorres(1584292712@qq.com) /// Date: 2019-06-25 20:51:09 /// Dream: Don't forget your dreams! /// ====================================== #ifndef __CACHEMANGER_H__ #define __CACHEMANGER_H__ #include "Cache.h" #include <vector> #define THREADNUM 4 using std::vector; namespace wd { class CacheManger { public: CacheManger(string filePath); void init(string); Cache & getCache(size_t); void periodicUpdate(); private: string _cacheFilePath; vector<Cache>_cacheList; }; }; #endif
///======================================= /// File: CacheManger.cc /// Author: wtptorres(1584292712@qq.com) /// Date: 2019-06-05 20:56:50 /// Dream: Don't forget your dreams! /// ====================================== #include "CacheManger.h" #include <iostream> #include <fstream> #include <utility> #include <iostream> using namespace std; using namespace wd; CacheManger::CacheManger(string cacheFilePath) { init(cacheFilePath); } void CacheManger::init(string cacheFilePath) { _cacheFilePath=cacheFilePath; _cacheList.reserve(THREADNUM); Cache tmp; tmp.readFromFile(_cacheFilePath); for(size_t i=0;i!=THREADNUM;++i) { _cacheList.push_back(std::move(tmp)); } } Cache & CacheManger::getCache(size_t number) { return _cacheList[number]; } void CacheManger::periodicUpdate() { auto cit=_cacheList.begin(); Cache lastWrite=*(cit ++); for(;cit<_cacheList.end();++cit) { lastWrite.update(*cit); } for(cit=_cacheList.begin()+1;cit!=_cacheList.end();++cit) { (*cit).update(lastWrite); } lastWrite.writeToFile(_cacheFilePath); }
///======================================= /// File: Condition.h /// Author: wtptorres(1584292712@qq.com) /// Date: 2019-06-26 10:32:43 /// Dream: Don't forget your dreams! /// ====================================== #ifndef __WD_CONDITION_H__ #define __WD_CONDITION_H__ #include "Noncopyable.h" #include "MutexLock.h" #include <pthread.h> namespace wd { class Condition :Noncopyable { public: Condition(MutexLock &mutex) :_mutex(mutex) {pthread_cond_init(&_cond,NULL);} ~Condition() {pthread_cond_destroy(&_cond);} void wait() {pthread_cond_wait(&_cond,_mutex.getMutexLockPtr());} void notify() {pthread_cond_signal(&_cond);} void notifyAll() {pthread_cond_broadcast(&_cond);} private: pthread_cond_t _cond; MutexLock & _mutex; }; } #endif
///======================================= /// File: ConFiguration.h /// Author: wtptorres(1584292712@qq.com) /// Date: 2019-06-26 10:32:43 /// Dream: Don't forget your dreams! /// ====================================== #ifndef __CONFIGURATION_H__ #define __CONFIGURATION_H__ #include "Noncopyable.h" #include <string> #include <map> #include <set> using std::string; using std::map; using std::set; class Configuration { public: Configuration(const string &filePath); map<string,string>& getConfigMap(); set<string>& getStopWordList(); Configuration &operator=(const Configuration & rhs); string getIp()const; unsigned short getPort() const; private: string _filePath; map<string,string> _configMap; set<string> _stopWordList; }; template<typename T> class Singleton { public: template<typename ...Args> static T *getInstance(Args ...args) { if(!_pInstance) _pInstance=new T(args ...); return _pInstance; } static void destroy() { if(_pInstance) delete _pInstance; } private: Singleton(); ~Singleton(); static T *_pInstance; }; template<typename T> T *Singleton<T>::_pInstance =NULL; #endif
///======================================= /// File: ConFiguration.cc /// Author: wtptorres(1584292712@qq.com) /// Date: 2019-06-07 15:24:14 /// Dream: Don't forget your dreams! /// ====================================== #include "ConFiguration.h" #include <stdlib.h> #include <utility> #include <fstream> #include <iostream> using namespace std; using namespace wd; Configuration::Configuration(const string & filePath) :_filePath(std::move(filePath)) { ifstream ifs(_filePath); if(!ifs){ cout<<"file open error!"<<endl; } string key,value; while(ifs>>key,!ifs.eof()) { ifs>>value; _configMap.insert(std::make_pair(key,value)); } ifs.close(); } map<string,string>& Configuration::getConfigMap() { return _configMap; } set<string> & Configuration::getStopWordList() { string stopWordPath=_configMap["myStopWord"]; ifstream ifs(stopWordPath); if(!ifs){ cout<<"getStopWordList file open error!"<<endl; return _stopWordList; } string stopword; while(ifs>>stopword) { _stopWordList.insert(stopword); } return _stopWordList; } Configuration & Configuration::operator=(const Configuration & rhs) { _filePath=rhs._filePath; _configMap=rhs._configMap; _stopWordList=rhs._stopWordList; return *this; } string Configuration::getIp()const { auto cit =_configMap.find("myip"); if(cit ==_configMap.end()){ return ""; } return cit->second; } unsigned short Configuration::getPort() const { auto cit =_configMap.find("myport"); if(cit==_configMap.end()) return 0; else return atoi(cit->second.c_str()); }
///======================================= /// File: EpollPoller.h /// Author: wtptorres(1584292712@qq.com) /// Date: 2019-06-25 11:03:36 /// Dream: Don't forget your dreams! /// ====================================== #ifndef __WD_EPOLLPOLLER_H__ #define __WD_EPOLLPOLLER_H__ #include "TcpConnection.h" #include "Noncopyable.h" #include "MutexLock.h" #include <sys/epoll.h> #include <vector> #include <map> #include <functional> namespace wd { class Acceptor; class EpollPoller :Noncopyable { public: typedef TcpConnection::TcpConnectionCallback EpollCallback; typedef std::function<void()> Functor; EpollPoller(Acceptor &acceptor); ~EpollPoller(); void loop(); void unloop(); void runInLoop(const Functor && cb); void doPendingFunctors(); void wakeup(); void setConnectionCallback(EpollCallback cb); void setMessageCallback(EpollCallback cb); void setCloseCallback(EpollCallback cb); private: void waitEpollfd(); void handleConnection(); void handleMessage(int peerfd); void handleRead(); Acceptor & _acceptor; int _epollfd; int _eventfd; int _listenfd; bool _isLooping; MutexLock _mutex; std::vector<Functor> _pendingFunctors; typedef std::vector<struct epoll_event>Eventlist; Eventlist _eventList; typedef std::map<int,TcpConnectionPtr> ConnectionMap; ConnectionMap _connMap; EpollCallback _onConnectionCb; EpollCallback _onMessageCb; EpollCallback _onCloseCb; }; } #endif
///======================================= /// File: EpollPoller.cc /// Author: wtptorres(1584292712@qq.com) /// Date: 2019-06-07 15:42:54 /// Dream: Don't forget your dreams! /// ====================================== #include "EpollPoller.h" #include "SocketUtil.h" #include "Acceptor.h" #include <assert.h> #include <iostream> using namespace std; namespace wd { EpollPoller::EpollPoller(Acceptor & acceptor) :_acceptor(acceptor) ,_epollfd(createEpollFd()) ,_eventfd(createEventFd()) ,_listenfd(_acceptor.fd()) ,_isLooping(false) ,_eventList(1024) { addEpollFdRead(_epollfd,_listenfd); addEpollFdRead(_epollfd,_eventfd); } EpollPoller::~EpollPoller() { ::close(_epollfd); } void EpollPoller::loop() { _isLooping=true; while(_isLooping) { waitEpollfd(); } } void EpollPoller::unloop() { if(_isLooping) _isLooping=false; } void EpollPoller::setConnectionCallback(EpollCallback cb) { _onConnectionCb=cb; } void EpollPoller::setMessageCallback(EpollCallback cb) { _onMessageCb=cb; } void EpollPoller::setCloseCallback(EpollCallback cb) { _onCloseCb=cb; } void EpollPoller::waitEpollfd() { int nready; do { nready =::epoll_wait(_epollfd,&(*_eventList.begin()),_eventList.size(),10000); }while(-1==nready && errno ==EINTR); if(-1==nready){ perror("epoll wait error!"); exit(EXIT_FAILURE); }else if(0==nready){ cout<<"epoll_wait timeout!"<<endl; }else{ if(nready==static_cast<int>(_eventList.size())){ _eventList.resize(_eventList.size()*2); } for(int idx=0;idx!=nready;++idx)//正宗羅老師循環體(TwT) { if(_eventList[idx].data.fd ==_listenfd) { if(_eventList[idx].events & EPOLLIN) { handleConnection(); } }else if(_eventList[idx].data.fd ==_eventfd){ handleRead(); cout<<">>doPendingFunctors()"<<endl; doPendingFunctors(); }else{ if(_eventList[idx].events & EPOLLIN){ handleMessage(_eventList[idx].data.fd); } } } } } void EpollPoller::handleConnection() { int peerfd=_acceptor.accept(); addEpollFdRead(_epollfd,peerfd); TcpConnectionPtr conn(new TcpConnection(peerfd,this)); conn->setConnectionCallback(_onConnectionCb); conn->setMessageCallback(_onMessageCb); conn->setCloseCallback(_onCloseCb); std::pair<ConnectionMap::iterator,bool>ret; ret=_connMap.insert(std::make_pair(peerfd,conn)); assert(ret.second ==true); (void)ret; conn->handleConnectionCallback(); } void EpollPoller::handleMessage(int peerfd) { bool isClosed=isConnectionClosed(peerfd); ConnectionMap::iterator it =_connMap.find(peerfd); assert(it!=_connMap.end()); if(isClosed) { it->second->handleCloseCallback(); delEpollReadFd(_epollfd,peerfd); _connMap.erase(it); }else{ it->second->handleMessageCallback(); } } void EpollPoller::runInLoop(const Functor && cb) { { MutexLockGuard mlg(_mutex); _pendingFunctors.push_back(std::move(cb)); } wakeup(); } void EpollPoller::doPendingFunctors() { std::vector<Functor>tmp; { MutexLockGuard mlg(_mutex); tmp.swap(_pendingFunctors); } for(auto & functor:tmp) { functor(); } } void EpollPoller::handleRead() { uint64_t howmany; int ret=::read(_eventfd,&howmany,sizeof(howmany)); if(ret !=sizeof(howmany)) { perror("read error!"); } } void EpollPoller::wakeup() { uint64_t one =1; int ret =::write(_eventfd,&one,sizeof(one)); if(ret!=sizeof(one)) { perror("write error!"); } } }
///======================================= /// File: InetAddress.h /// Author: wtptorres(1584292712@qq.com) /// Date: 2019-06-22 21:55:19 /// Dream: Don't forget your dreams! /// ====================================== #ifndef __WD_INETADDRESS_H__ #define __WD_INETADDRESS_H__ #include <netinet/in.h> #include <string> namespace wd { class InetAddress { public: InetAddress(short port); InetAddress(const char *pIp,short port); InetAddress(const struct sockaddr_in & addr); std::string ip()const; unsigned short port() const; const struct sockaddr_in *getSockAddrPtr() const; private: struct sockaddr_in _addr; }; } #endif
///======================================= /// File: InetAddress.cc /// Author: wtptorres(1584292712@qq.com) /// Date: 2019-06-07 20:55:18 /// Dream: Don't forget your dreams! /// ====================================== #include "InetAddress.h" #include <stdio.h> #include <stdlib.h> #include <sys/socket.h> #include <arpa/inet.h> #include <string.h> namespace wd { InetAddress::InetAddress(short port) { ::memset(&_addr,0,sizeof(_addr)); _addr.sin_family=AF_INET; _addr.sin_port=htons(port); _addr.sin_addr.s_addr=INADDR_ANY; } InetAddress::InetAddress(const char * pIp,short port) { ::memset(&_addr,0,sizeof(_addr)); _addr.sin_family=AF_INET; _addr.sin_port=htons(port); _addr.sin_addr.s_addr=inet_addr(pIp); } InetAddress::InetAddress(const struct sockaddr_in & addr) :_addr(addr) {} const struct sockaddr_in * InetAddress::getSockAddrPtr()const { return & _addr; } std::string InetAddress::ip()const { return std::string(inet_ntoa(_addr.sin_addr)); } unsigned short InetAddress::port() const { return ntohs(_addr.sin_port); } }
///======================================= /// File: MutexLock.h /// Author: wtptorres(1584292712@qq.com) /// Date: 2019-06-24 10:32:43 /// Dream: Don't forget your dreams! /// ====================================== #ifndef __WD_MUTEXLOCK_H__ #define __WD_MUTEXLOCK_H__ #include "Noncopyable.h" #include <pthread.h> namespace wd { class MutexLock :Noncopyable { public: MutexLock() {pthread_mutex_init(&_mutex,NULL);} ~MutexLock() {pthread_mutex_destroy(&_mutex);} void lock() {pthread_mutex_lock(&_mutex);} void unlock() {pthread_mutex_unlock(&_mutex);} pthread_mutex_t *getMutexLockPtr() {return &_mutex;} private: pthread_mutex_t _mutex; }; class MutexLockGuard { public: MutexLockGuard(MutexLock &mutex) :_mutex(mutex) { _mutex.lock(); } ~MutexLockGuard() { _mutex.unlock(); } private: MutexLock &_mutex; }; } #endif
///======================================= /// File: Mydict.h /// Author: wtptorres(1584292712@qq.com) /// Date: 2019-06-06 11:12:19 /// Dream: Don't forget your dreams! /// ====================================== #ifndef __MYDICT_H__ #define __MYDICT_H__ #include <string> #include <vector> #include <map> #include <utility> #include <set> #include <fstream> #include <iostream> #include <sstream> using namespace std; namespace wd { struct MyResult { string _word; int _iFreq;//詞頻 int _iDist;//最小編輯距離 }; class Mydict { public: Mydict(const string dictDir,const string indexDir) { ifstream ifs1(dictDir),ifs2(indexDir); if(!ifs1||!ifs2) cout<<"Mydict open file error!"<<endl; string key; int value; ifs1>>value;//把前面的兩個空格讀去 _dict.push_back(std::make_pair(string(""),value)); ifs1>>value; _dict.push_back(std::make_pair(string(""),value)); while(ifs1>>key) { ifs1>>value; _dict.push_back(std::make_pair(key,value)); } string line; while(std::getline(ifs2,line)) { istringstream iss(line); string ikey; int ivalue; iss>>ikey; set<int> tmp; while(iss>>ivalue) { tmp.insert(ivalue); } _index.insert(std::make_pair(ikey,tmp)); } } vector<pair<string,int>> & getDict(){return _dict;} map<string ,set<int>> & getIndexTable(){return _index;} private: vector<pair<string,int>> _dict; map<string,set<int>> _index; }; }; #endif
///======================================= /// File: Noncopyable.h /// Author: wtptorres(1584292712@qq.com) /// Date: 2019-06-22 23:47:05 /// Dream: Don't forget your dreams! /// ====================================== #ifndef __WD_NONCOPYABLE_H__ #define __WD_NONCOPYABLE_H__ namespace wd { class Noncopyable { protected: Noncopyable(){} ~Noncopyable(){} private: Noncopyable(const Noncopyable &); Noncopyable & operator=(const Noncopyable &); }; } #endif
///======================================= /// File: Socket.h /// Author: wtptorres(1584292712@qq.com) /// Date: 2019-06-23 21:46:26 /// Dream: Don't forget your dreams! /// ====================================== #ifndef __WD_SOCKET_H__ #define __WD_SOCKET_H__ #include "Noncopyable.h" namespace wd { class InetAddress; class Socket :Noncopyable { public: Socket(int socket); Socket(); ~Socket(); void shutdownWrite(); int fd()const {return _sockfd;} void nonblock(); static InetAddress getLocalAddr(int socketfd); static InetAddress getPeerAddr(int sockfd); private: int _sockfd; }; } #endif
///======================================= /// File: Socket.cc /// Author: wtptorres(1584292712@qq.com) /// Date: 2019-06-07 20:38:20 /// Dream: Don't forget your dreams! /// ====================================== #include "Socket.h" #include "SocketUtil.h" #include "InetAddress.h" namespace wd { Socket::Socket(int sockfd) :_sockfd(sockfd) {} Socket::Socket() :_sockfd(createSocketFd()) {} Socket::~Socket() { ::close(_sockfd); } void Socket::nonblock() { setNonblock(_sockfd); } void Socket::shutdownWrite() { if(-1==::shutdown(_sockfd,SHUT_WR)){ perror("shutdown write error!"); } } InetAddress Socket::getLocalAddr(int sockfd) { struct sockaddr_in addr; socklen_t len=sizeof(sockaddr_in); if(-1==::getsockname(sockfd,(struct sockaddr *)&addr,&len)){ perror("getsockname error!"); } return InetAddress(addr); } InetAddress Socket::getPeerAddr(int sockfd) { struct sockaddr_in addr; socklen_t len=sizeof(sockaddr_in); if(-1==::getpeername(sockfd,(struct sockaddr *)&addr,&len)){ perror("getpeername error!"); } return InetAddress(addr); } }
///======================================= /// File: SocketIO.h /// Author: wtptorres(1584292712@qq.com) /// Date: 2019-06-22 17:10:23 /// Dream: Don't forget your dreams! /// ====================================== #ifndef __SOCKETIO_H__ #define __SOCKETIO_H__ #include <stdio.h> namespace wd { class SocketIO { public: SocketIO(int sockfd); size_t readn(char *buf,size_t count); size_t writen(const char *buf,size_t count); size_t readline(char *buf,size_t max_len); private: size_t recv_peek(char *buf,size_t count); int _sockfd; }; } #endif
///======================================= /// File: SocketIO.cc /// Author: wtptorres(1584292712@qq.com) /// Date: 2019-06-07 21:56:34 /// Dream: Don't forget your dreams! /// ====================================== #include "SocketIO.h" #include "SocketUtil.h" namespace wd { SocketIO::SocketIO(int sockfd) :_sockfd(sockfd) {} size_t SocketIO::readn(char *buf,size_t count) { size_t nleft =count; char *pbuf=buf; while(nleft>0) { int nread =::read(_sockfd,pbuf,nleft); if(-1==nread) { if(errno ==EINTR|| errno ==EAGAIN)//忽略中斷信號 continue; return EXIT_FAILURE; }else if(0==nread){ break; } pbuf =pbuf+nread; nleft=nleft-nread; } return (count -nleft); } size_t SocketIO::writen(const char * buf,size_t count) { size_t nleft =count; const char *pbuf=buf; while(nleft >0) { int nwrite=::write(_sockfd,pbuf,nleft); if(-1==nwrite) { if(errno ==EINTR) continue; return EXIT_FAILURE; } nleft =nleft -nwrite; pbuf =pbuf +nwrite; } return (count -nleft); } size_t SocketIO::recv_peek(char *buf,size_t count) { int nread; do{ nread=::recv(_sockfd,buf,count,MSG_PEEK); }while(-1==nread && errno ==EINTR); return nread; } size_t SocketIO::readline(char *buf,size_t maxlen) { size_t nleft =maxlen-1; char *pbuf=buf; size_t total=0; while(nleft>0) { size_t nread =recv_peek(pbuf,nleft); if(nread<=0) return nread; for(size_t idx =0;idx!=nread;++idx){//檢查換行符 if(pbuf[idx]=='\n'){ size_t nsize =idx +1; if(readn(pbuf,nsize)!=nsize) return EXIT_FAILURE; pbuf +=nsize; total +=nsize; *pbuf=0; return total; } } if(readn(pbuf,nread)!=nread) return EXIT_FAILURE; pbuf +=nread; nleft -=nread; total +=nread; } *pbuf=0; return maxlen-1; } }
///======================================= /// File: SocktUtil.h /// Author: wtptorres(1584292712@qq.com) /// Date: 2019-06-05 22:01:30 /// Dream: Don't forget your dreams! /// ====================================== #ifndef __WD_SOCKERUTIL_H__ #define __WD_SOCKERUTIL_H__ #include <stdio.h> #include <stdlib.h> #include <errno.h> #include <string.h> #include <unistd.h> #include <fcntl.h> #include <sys/types.h> #include <sys/socket.h> #include <netinet/in.h> #include <arpa/inet.h> #include <sys/eventfd.h> #include <sys/epoll.h> namespace wd { inline int createSocketFd() { int fd=::socket(AF_INET,SOCK_STREAM,0); if(-1==fd) { perror("socket create error!"); } return fd; } inline void setNonblock(int fd) { int flags=::fcntl(fd,F_GETFL,0); flags |= O_NONBLOCK; ::fcntl(fd,F_SETFL,flags); } inline int createEpollFd() { int efd=::epoll_create1(0); if(-1==efd) { perror("epoll create1 error!"); exit(EXIT_FAILURE); } return efd; } inline int createEventFd() { int evtfd=::eventfd(0,EFD_NONBLOCK|EFD_CLOEXEC); if(-1==evtfd) { perror("eventfd create error!"); } return evtfd; } inline void addEpollFdRead(int efd,int fd) { struct epoll_event ev; ev.data.fd=fd; ev.events=EPOLLIN; int ret=epoll_ctl(efd,EPOLL_CTL_ADD,fd,&ev); if(-1==ret) { perror("epoll ctl add error!"); exit(EXIT_FAILURE); } } inline void delEpollReadFd(int efd,int fd) { struct epoll_event ev; ev.data.fd=fd; int ret=epoll_ctl(efd,EPOLL_CTL_DEL,fd,&ev); if(-1==ret) { perror("epoll ctl delete error!"); exit(EXIT_FAILURE); } } inline size_t recvPeek(int sockfd,void *buf,size_t len)//做用是預覽數據 { int nread; do{ nread=::recv(sockfd,buf,len,MSG_PEEK); }while(nread==-1 && errno ==EINTR); return nread; } inline bool isConnectionClosed(int sockfd)//經過預覽數據,判斷conn是否關閉 { char buf[1024]; int nread=recvPeek(sockfd,buf,sizeof(buf)); if(-1==nread) { perror("recvPeek error!"); return true; } return (0==nread); } } #endif
#ifndef __WD_TASKQUEUE_H__ #define __WD_TASKQUEUE_H__ #include "MutexLock.h" #include "Condition.h" #include <queue> #include <functional> namespace wd { typedef std::function<void()>Task; class TaskQueue { public: TaskQueue(size_t queSize) :_queSize(queSize) ,_mutex() ,_notFull(_mutex) ,_notEmpty(_mutex) ,_flag(true) {} void push(Task &&task); Task pop(); bool empty()const { return _que.size()==0; } bool full()const {return _que.size()==_queSize;} void wakeup() { _flag=false; _notEmpty.notifyAll(); } private: size_t _queSize; std::queue<Task> _que; MutexLock _mutex; Condition _notFull; Condition _notEmpty; bool _flag; }; } #endif
#include "TaskQueue.h" using namespace wd; //生產者所在的線程 void TaskQueue::push(Task && task) { MutexLockGuard autoLock(_mutex); while(full()) { _notFull.wait(); } _que.push(std::move(task)); _notEmpty.notify(); } //消費者所在線程 Task TaskQueue::pop() { MutexLockGuard autoLock(_mutex); while(_flag && empty()) { _notEmpty.wait(); } if(_flag){ Task task=_que.front(); _que.pop(); _notFull.notify(); return task; }else{ return NULL; } } #if 0 Task TaskQueue::pop() { MutexLockGuard autoLock(_mutex); while(_flag && empty()) { _notEmpty.wait(); } if(_flag){ Task task =_que.front(); _que.pop(); _notFull.notify(); return task; }else{ return NULL; } } #endif
///======================================= /// File: TcpConnection.h /// Author: wtptorres(1584292712@qq.com) /// Date: 2019-06-06 17:15:33 /// Dream: Don't forget your dreams! /// ====================================== #ifndef __WD_TCPCONNECTION_H__ #define __WD_TCPCONNECTION_H__ #include "Noncopyable.h" #include "InetAddress.h" #include "Socket.h" #include "SocketIO.h" #include <string> #include <memory> #include <functional> namespace wd { class EpollPoller; class TcpConnection; typedef std::shared_ptr<TcpConnection> TcpConnectionPtr; class TcpConnection :Noncopyable ,public std::enable_shared_from_this<TcpConnection> { public: typedef std::function<void(const TcpConnectionPtr &)>TcpConnectionCallback; TcpConnection(int sockfd,EpollPoller *loop); ~TcpConnection(); std::string receive(); void send(const std::string &msg); void sendInLoop(const std::string &msg); void shutdown(); std::string toString(); void setConnectionCallback(TcpConnectionCallback cb); void setMessageCallback(TcpConnectionCallback cb); void setCloseCallback(TcpConnectionCallback cb); void handleConnectionCallback(); void handleMessageCallback(); void handleCloseCallback(); private: Socket _sockfd; SocketIO _sockIO; const InetAddress _localAddr; const InetAddress _peerAddr; bool _isShutdownWrite; EpollPoller * _loop; TcpConnectionCallback _onConnectionCb; TcpConnectionCallback _onMessageCb; TcpConnectionCallback _onCloseCb; }; } #endif
///======================================= /// File: TcpConnection.cc /// Author: wtptorres(1584292712@qq.com) /// Date: 2019-06-07 22:22:22 /// Dream: Don't forget your dreams! /// ====================================== #include "TcpConnection.h" #include "EpollPoller.h" #include <string.h> #include <stdio.h> namespace wd { TcpConnection::TcpConnection(int sockfd,EpollPoller * loop) :_sockfd(sockfd) ,_sockIO(sockfd) ,_localAddr(wd::Socket::getLocalAddr(sockfd)) ,_peerAddr(wd::Socket::getPeerAddr(sockfd)) ,_isShutdownWrite(false) ,_loop(loop) {_sockfd.nonblock();} TcpConnection::~TcpConnection() { if(!_isShutdownWrite) { _isShutdownWrite=true; shutdown(); } printf("~TcpConnection()\n"); } std::string TcpConnection::receive() { char buf[65536]; memset(buf,0,sizeof(buf)); size_t ret =_sockIO.readline(buf,sizeof(buf)); if(0==ret){ return std::string(); }else{ return std::string(buf); } } void TcpConnection::send(const std::string &msg) { size_t len=msg.size(); _sockIO.writen((const char *)&len,sizeof(int)); _sockIO.writen(msg.c_str(),len); } void TcpConnection::shutdown() { if(!_isShutdownWrite) _sockfd.shutdownWrite(); _isShutdownWrite=true; } std::string TcpConnection::toString() { char str[100]; snprintf(str,sizeof(str),"%s:%d->%s:%d" ,_localAddr.ip().c_str() ,_localAddr.port() ,_peerAddr.ip().c_str() ,_peerAddr.port()); return std::string(str); } void TcpConnection::setConnectionCallback(TcpConnectionCallback cb) { _onConnectionCb =cb; } void TcpConnection::setMessageCallback(TcpConnectionCallback cb) { _onMessageCb =cb; } void TcpConnection::setCloseCallback(TcpConnectionCallback cb) { _onCloseCb =cb; } void TcpConnection::handleConnectionCallback() { if(_onConnectionCb){ _onConnectionCb(shared_from_this()); } } void TcpConnection::handleMessageCallback() { if(_onMessageCb){ _onMessageCb(shared_from_this()); } } void TcpConnection::handleCloseCallback() { if(_onCloseCb){ _onCloseCb(shared_from_this()); } } void TcpConnection::sendInLoop(const std::string & msg) { _loop->runInLoop(std::bind(&TcpConnection::send,this,msg)); } }
///======================================= /// File: TcpServer.h /// Author: wtptorres(1584292712@qq.com) /// Date: 2019-06-06 20:15:21 /// Dream: Don't forget your dreams! /// ====================================== #ifndef __WD_TCPSERVER_H__ #define __WD_TCPSERVER_H__ #include "Acceptor.h" #include "EpollPoller.h" #include <string> using std::string; namespace wd { class TcpServer { public: typedef EpollPoller::EpollCallback TcpServerCallback; TcpServer(const string & ip,unsigned short port); TcpServer(unsigned short port); void start(); void stop(); void setConnectionCallback(TcpServerCallback cb); void setMessageCallback(TcpServerCallback cb); void setCloseCallback(TcpServerCallback cb); private: Acceptor _acceptor; EpollPoller _poller; TcpServerCallback _connectionCallback; TcpServerCallback _messageCallback; TcpServerCallback _closeCallback; }; } #endif
///======================================= /// File: TcpServer.cc /// Author: wtptorres(1584292712@qq.com) /// Date: 2019-06-07 19:59:37 /// Dream: Don't forget your dreams! /// ====================================== #include "TcpServer.h" #include "InetAddress.h" #include "SocketUtil.h" #include <iostream> using namespace std; namespace wd { TcpServer::TcpServer(const string & ip,unsigned short port) :_acceptor(createSocketFd(),InetAddress(ip.c_str(),port)) ,_poller(_acceptor) {} void TcpServer::start() { _acceptor.ready(); _poller.setConnectionCallback(_connectionCallback); _poller.setMessageCallback(_messageCallback); _poller.setCloseCallback(_closeCallback); _poller.loop(); } void TcpServer::stop() { _poller.unloop(); } void TcpServer::setConnectionCallback(TcpServerCallback cb) {_connectionCallback=cb;} void TcpServer::setMessageCallback(TcpServerCallback cb) {_messageCallback=cb;} void TcpServer::setCloseCallback(TcpServerCallback cb) {_closeCallback=cb;} }
#ifndef __WD_THREAD_H__ #define __WD_THREAD_H__ #include "Noncopyable.h" #include <pthread.h> #include <functional> using std::function; namespace wd { class Thread; struct ThreadPtr { int _number; Thread *_pthread; }; class Thread :Noncopyable//此處不可複製,表達語義 { using ThreadCallback =function<void()>; public: Thread(ThreadCallback &&cb); ~Thread(); void start(int number); void join(); bool isRunning()const {return _isRunning;} private: static void * threadFunc(void *);//採用靜態成員函數,去除this的影響 pthread_t _pthid; bool _isRunning; ThreadCallback _cb; }; } #endif
#include "Thread.h" #include <iostream> using namespace std; using namespace wd; __thread int t_number;//與線程存儲有關 //右值引用自己取決於其是否有名字,來決定它是左值仍是右值 Thread::Thread(ThreadCallback && cb) :_pthid(0) ,_isRunning(false) ,_cb(std::move(cb)) { cout<<"Thread(cb)"<<endl; } void Thread::start(int number) { ThreadPtr *threadPtr=new ThreadPtr(); threadPtr->_number=number; threadPtr->_pthread=this; pthread_create(&_pthid,NULL,threadFunc,threadPtr); _isRunning=true; } void *Thread::threadFunc(void *arg) { ThreadPtr *threadPtr=static_cast<ThreadPtr*>(arg); Thread * pthread=threadPtr->_pthread; t_number=threadPtr->_number; if(pthread) pthread->_cb();//執行任務 delete threadPtr; return NULL; } void Thread::join() { pthread_join(_pthid,NULL); _isRunning=false; } Thread::~Thread() { if(_isRunning) { pthread_detach(_pthid);//將運行的線程交給系統託管 _isRunning=false; } cout<<"~Thread()"<<endl; }
#ifndef __WD_THREADPOLL_H__ #define __WD_THREADPOLL_H__ #include "TaskQueue.h" #include "Thread.h" #include <vector> #include <memory> #include <functional> using namespace std; using std::shared_ptr; using std::vector; namespace wd { class Threadpool { public: using Task=std::function<void()>; Threadpool(size_t threadNum,size_t queSize) :_threadNum(threadNum) ,_queSize(queSize) ,_taskQue(_queSize) ,_isExit(false) { _threads.reserve(_threadNum); } ~Threadpool(); void start(); void stop(); void addTask(Task && task); private: void threadFunc(); Task getTask(); size_t _threadNum; size_t _queSize; vector<shared_ptr<Thread>> _threads; TaskQueue _taskQue; bool _isExit; }; } #endif
#include "Threadpool.h" #include "Thread.h" #include <unistd.h> #include <iostream> using namespace std; using namespace wd; void Threadpool::start() { for(size_t idx=0;idx<_threadNum;++idx) { shared_ptr<Thread>pThread(new Thread(std::bind(&Threadpool::threadFunc,this))); _threads.push_back(std::move(pThread)); } int number=0; for(auto &pThread:_threads) { pThread->start(number); ++number; } } void Threadpool::stop() { if(!_isExit) { while(!_taskQue.empty()){ ::sleep(1); cout<<"Threadpool sleep 1 second!"<<endl; } _isExit=true; cout<<"Threadpool ->stop:_isExit="<<_isExit<<endl; _taskQue.wakeup(); for(auto &pthread:_threads){ pthread->join(); } } } Threadpool::~Threadpool() { if(!_isExit){ stop(); } } void Threadpool::addTask(Task && task) { _taskQue.push(std::move(task)); } Task Threadpool::getTask() { return _taskQue.pop(); } void Threadpool::threadFunc()//線程存儲中子線程要作的事情 { while(!_isExit) { Task task=getTask(); if(task){ task();//執行任務的時間不肯定 } } }
///======================================= /// File: Timer.h /// Author: wtptorres(1584292712@qq.com) /// Date: 2019-06-06 20:00:45 /// Dream: Don't forget your dreams! /// ====================================== #ifndef __WD_TIMER_H__ #define __WD_TIMER_H__ #include <functional> namespace wd { class Timer { public: using TimerCallback =std::function<void()>; Timer(int initailTime,int intervalTime,TimerCallback && cb); ~Timer(); void start(); void stop(); private: int _fd; int _initialTime; int _intervalTime; TimerCallback _cb; bool _isStarted; int createTimerFd(); void setTimerfd(int initialTime, int intervalTime); void handleRead(); }; } #endif
///======================================= /// File: Timer.cc /// Author: wtptorres(1584292712@qq.com) /// Date: 2019-06-07 20:09:14 /// Dream: Don't forget your dreams! /// ====================================== #include "Timer.h" #include <unistd.h> #include <errno.h> #include <poll.h> #include <sys/timerfd.h> #include <iostream> using namespace std; using namespace wd; Timer::Timer(int initialTime,int intervalTime,TimerCallback && cb) :_fd(createTimerFd()) ,_initialTime(initialTime) ,_intervalTime(intervalTime) ,_cb(std::move(cb)) ,_isStarted(false) {} void Timer::start() { struct pollfd pfd; pfd.fd=_fd; pfd.events=POLLIN; setTimerfd(_initialTime,_intervalTime); _isStarted=true; while(_isStarted){ int nready=::poll(&pfd,1,5000); if(-1==nready &&errno ==EINTR){ continue; }else if(-1==nready){ perror(">>>poll error!"); exit(EXIT_FAILURE); }else if(0==nready){ cout<<">>>poll timeout!"<<endl; }else{ if(pfd.revents & POLLIN){ handleRead(); if(_cb){ _cb(); } } } } } void Timer::stop() { setTimerfd(0,0); if(_isStarted){ _isStarted=false; } } Timer::~Timer() { if(_isStarted){ stop(); } } int Timer::createTimerFd() { int fd=::timerfd_create(CLOCK_REALTIME,0); if(-1==fd){ perror(">>timerfd_create error!"); } return fd; } void Timer::setTimerfd(int initialTime,int intervalTime) { struct itimerspec value; value.it_value.tv_sec=initialTime; value.it_value.tv_nsec=0; value.it_interval.tv_sec=intervalTime; value.it_interval.tv_nsec=0; int ret=::timerfd_settime(_fd,0,&value,NULL); if(-1==ret){ perror(">>>timerfd_settime error!"); } } #if 0 void Timer::handleRead() { uint64_t howmany; int ret =::read(_fd,&howmany,sizeof(uint64_t)); if(ret!=sizeof(uint64_t)){ perror("read!"); } } #endif void Timer::handleRead() { uint64_t howmany; int ret=::read(_fd,&howmany,sizeof(uint64_t)); if(ret!=sizeof(uint64_t)){ perror(">>>read error!"); } }
///======================================= /// File: TimerThread.h /// Author: wtptorres(1584292712@qq.com) /// Date: 2019-06-06 20:07:05 /// Dream: Don't forget your dreams! /// ====================================== #ifndef __WD_TIMERTHREAD_H__ #define __WD_TIMERTHREAD_H__ #include "Timer.h" #include "Thread.h" #include <functional> namespace wd { class TimerThread { public: using TimerCallback =std::function<void()>; TimerThread(int ,int ,TimerCallback && cb); ~TimerThread(); void start(); void stop(); private: Timer _timer; Thread _subThread; bool _isStarted; }; } #endif
///======================================= /// File: TimerThread.cc /// Author: wtptorres(1584292712@qq.com) /// Date: 2019-06-07 17:12:51 /// Dream: Don't forget your dreams! /// ====================================== #include "TimerThread.h" using namespace wd; TimerThread::TimerThread(int initialTime,int intervalTime,TimerCallback &&cb) :_timer(initialTime,intervalTime,std::move(cb)) ,_subThread(std::bind(&Timer::start,&_timer)) ,_isStarted(false) {} void TimerThread::start() { _subThread.start(0); _isStarted =true; } void TimerThread::stop() { if(_isStarted){ _timer.stop(); _subThread.join(); _isStarted=false; } } TimerThread::~TimerThread() { if(_isStarted) stop(); }
///======================================= /// File: WebPage.h /// Author: wtptorres(1584292712@qq.com) /// Date: 2019-06-23 10:35:35 /// Dream: Don't forget your dreams! /// ====================================== #ifndef __WEBPAGE_H__ #define __WEBPAGE_H__ #include "ConFiguration.h" #include "WordSegmentation.h" #include <iostream> #include <string> #include <vector> #include <map> #include <set> using namespace std; class WebPage { public: WebPage(string doc,Configuration & config,WordSegmentation & jieba); int getDocId(); const string & getDoc() const; map<string,int>& getWordMap();//獲取文檔的詞頻統計map const string & getTitle()const; const string & getUrl() const; WebPage(const WebPage &rhs); WebPage(WebPage && rhs); WebPage & operator =(WebPage && rhs); WebPage & operator=(const WebPage & rhs); string _docSummary; private: void processDoc(string & doc ,WordSegmentation&); string _doc; int _docId; string _docTitle; string _docUrl; string _docContent; map<string,int> _wordsMap; Configuration & _conf; }; #endif
///======================================= /// File: WebPage.cc /// Author: wtptorres(1584292712@qq.com) /// Date: 2019-06-25 10:35:57 /// Dream: Don't forget your dreams! /// ====================================== #include "WebPage.h" #include <stdlib.h> #include <iostream> #include <utility> #include <algorithm> #include <functional> using namespace std; bool cmp(pair<string,int> a,pair<string,int> b) { if(a.second != b.second){ return a.second>b.second; }else{ return a.first.size()<b.first.size(); } } const string & WebPage::getUrl() const { return _docUrl; } const string & WebPage::getTitle() const { return _docTitle; } WebPage::WebPage(string doc,Configuration & config,WordSegmentation & jieba) :_doc(std::move(doc)) ,_conf(config) { auto n1 =_doc.find("<docid>");//在格式化字符串取得id auto cit1 =_doc.begin()+n1+7; auto n2 =_doc.find("</docid>");//在格式化字符串取得id auto cit2 =_doc.begin()+n1+n2; string docId(cit1,cit2); _docId =::atoi(docId.c_str()); n1 =_doc.find("<url>");//在格式化字符串取得url cit1 =_doc.begin()+n1+5; n2 =_doc.find("</url>");//在格式化字符串取得url cit2 =_doc.begin()+n1+n2; _docUrl.insert(_docUrl.end(),cit1,cit2); n1 =_doc.find("<title>");//在格式化字符串取得title n2 =_doc.find("</title>"); cit1 =_doc.begin()+n1+7; cit2 =_doc.begin()+n2; _docTitle.insert(_docTitle.end(),cit1,cit2); n1 =_doc.find("<content>");//在格式化字符串取得content n2 =_doc.find("</content>"); cit1 =_doc.begin()+n1+9; cit2 =_doc.begin()+n2; _docTitle.insert(_docContent.end(),cit1,cit2); processDoc(_docContent,jieba); } void WebPage::processDoc(string & doc,WordSegmentation & jieba) { vector<string> results =jieba.Cut(doc); for(auto myresult:results) { auto cit =_wordsMap.find(myresult); if(cit ==_wordsMap.end()) _wordsMap.insert(std::make_pair(myresult,1)); else ++cit->second; } } int WebPage::getDocId() { return _docId; } const string & WebPage::getDoc() const { return _doc; } map<string ,int> & WebPage::getWordsMap() { return _wordsMap; } WebPage & WebPage::operator=(WebPage && rhs) { _doc =std::move(rhs._doc); _docId =std::move(rhs._docId); _docTitle =std::move(rhs._docTitle); _docUrl=std::move(rhs._docUrl); _docSummary=std::move(rhs._docSummary); _docContent=std::move(rhs._docContent); _wordsMap =std::move(rhs._wordsMap); _conf =rhs._conf; return *this; } WebPage & WebPage::operator=(const WebPage & rhs) { _doc =rhs._doc; _docId =rhs._docId; _docTitle=rhs._docTitle; _docUrl=rhs._docUrl; _docSummary=rhs._docSummary; _docContent=rhs._docContent; _wordsMap=rhs._wordsMap; _conf=rhs._conf; return *this; } WebPage::WebPage(const WebPage & rhs) :_conf(rhs._conf) { _doc =rhs._doc; _docId =rhs._docId; _docTitle=rhs._docTitle; _docUrl=rhs._docUrl; _docSummary=rhs._docSummary; _docContent=rhs._docContent; _wordsMap=rhs._wordsMap; } WebPage::WebPage(WebPage && rhs) :_conf(std::ref((rhs._conf))) { _doc =std::move(rhs._doc); _docId=std::move(rhs._docId); _docUrl=std::move(rhs._docUrl); _docTitle=std::move(rhs._docTitle); _docContent=std::move(rhs._docContent); _docSummary=std::move(rhs._docSummary); _wordsMap=std::move(rhs._wordsMap); }
///======================================= /// File: WordQuery.h /// Author: wtptorres(1584292712@qq.com) /// Date: 2019-06-23 10:54:39 /// Dream: Don't forget your dreams! /// ====================================== #ifndef __WORDQUERY_H__ #define __WORDQUERY_H__ #include "ConFiguration.h" #include "WordSegmentation.h" #include "WebPage.h" #include "TcpConnection.h" using namespace std; #include <unordered_map> #include <string> #include <vector> #include <iostream> #include <utility> namespace wd { class WordQuery { public: WordQuery(Configuration & conf); void doQuery(string &str,TcpConnnectionPtr & conn); private: void loadLibary(); map<string,int>getQueryWordsMap(vector<string> & queryWords); vector<pair<string,double>>getQueryWordsWeightVector(map<string,int>&); bool exexuteQuery(const vector<pair<string,double>>&,vector<pair<int,vector<double>>>&); string createJson(vector<pair<int,double>>&); string returnNoAnswer(); int sumApperaance(const string& word); void processQuery(string& line); Configuration & _conf; WordSegmentation _jieba; unordered map<int,WebPage> _pageLib; unordered map<int,pair<int,int>> _offsetLib; unordered map<string,set<pair<int,double>>> _inverIndexTable; }; }; #endif
///======================================= /// File: WordQuery.cc /// Author: wtptorres(1584292712@qq.com) /// Date: 2019-06-23 10:54:39 /// Dream: Don't forget your dreams! /// ====================================== #include "WordQuery.h" #include "TcpConnection.h" #include "WebPage.h" #include "json/json.h" #include <math.h> #include <iostream> #include <fstream> #include <sstream> #include <algorithm> #include <map> #include <utility> #include <cctype> #define DEBUG 0 using namespace std; using namespace wd; WordQuery::WordQuery(Configuration& conf) :_conf(conf) ,_jieba() { loadLibary(); } void WordQuery::loadLibary() { map<string,string>& config = _conf.getConfigMap(); string pageLibPath = config["myPageLib"]; string offsetLibPath = config["myOffsetLib"]; string indexLibPath = config["myIndexLib"]; ifstream ifsPage(pageLibPath); ifstream ifsOffset(offsetLibPath); ifstream ifsIndex(indexLibPath); if(!ifsPage || !ifsOffset ||!ifsIndex) { cout << "loadLibary file open error" << endl; return; } int offset,len; while(ifsOffset >> offset) { string doc = ""; ifsOffset >> len; ifsPage.seekg(offset,std::ios_base::beg); int left = len; while(left > 0) { char tmp; ifsPage >> tmp; --left; doc += tmp; } WebPage tmpPage(doc,_conf,_jieba); int id = tmpPage.getDocId(); pair<int,int> myoffset = std::make_pair(offset,len); _offsetLib.insert(std::make_pair(id,myoffset)); _pageLib.insert(std::make_pair(id,tmpPage)); } string indexLine; while(std::getline(ifsIndex,indexLine)) { istringstream iss(indexLine); string word; iss >> word; int key; double value; while(iss >> key) { iss >> value; auto cit = _inverIndexTable.find(word); if(cit == _inverIndexTable.end()){ set<pair<int,double>> tmp; tmp.insert(std::make_pair(key,value)); _inverIndexTable.insert(std::make_pair(word,tmp)); }else{ cit->second.insert(std::make_pair(key,value)); } } } } void WordQuery::processQuery(string& line) { auto cit = line.begin(); for(;cit != line.end(); ++cit) { if(isspace(*cit)){ cit = line.erase(cit); --cit; } } } void WordQuery::doQuery(string& str,TcpConnectionPtr& conn) { #if 0 //DEBUG cout << "In WordQuery:: doQuery" << endl; #endif processQuery(str); //將str中的空白符去除 vector<string> queryWords = _jieba.Cut(str); for(auto word:queryWords) cout << ">>> cut word is " << word << endl; map<string,int> processedQueryWords = getQueryWordsMap(queryWords); //返回處理過的查詢詞(合併相同的查詢詞) vector<pair<string,double>> queryWordsWeightVec = getQueryWordsWeightVector(processedQueryWords); #if 0 //DEBUG for(auto a:queryWordsWeightVec) { cout << ">>>> in queryWordsWeightVec " << a.first << " " << a.second << endl; } #endif string firstQueryWord = queryWordsWeightVec.begin()->first; double moduleQuery = 0; for(auto queryPair:queryWordsWeightVec) moduleQuery += queryPair.second * queryPair.second; moduleQuery = ::sqrt(moduleQuery); #if DEBUG cout << ">>>>moduleQuery is " << moduleQuery << endl; #endif vector<pair<int,vector<double>>> resultVec; bool flag = executeQuery(queryWordsWeightVec,resultVec); if(flag){ #if DEBUG cout << "flag is true" << endl; #endif vector<pair<int,double>> cosinRes; for(auto myres:resultVec) //生成摘要並計算模 { double moduleRes = 0; double sum = 0; int docId = myres.first; auto pageIter = _pageLib.find(docId); WebPage& mypage = pageIter->second; string pagedoc = mypage.getDoc(); //一篇pagedoc就是一行 int pos = pagedoc.find(firstQueryWord); string docline(pagedoc.begin() + pos -50,pagedoc.begin() + pos +50); mypage._docSummary = docline; #if DEBUG cout << ">>>> SUMMARY IS " << docline << endl; #endif for(size_t i=0;i<myres.second.size();++i) { sum += myres.second[i]*(queryWordsWeightVec[i].second); moduleRes += (myres.second)[i] * (myres.second)[i]; } moduleRes = ::sqrt(moduleRes); cosinRes.push_back(std::make_pair(docId,sum/(moduleRes * moduleQuery))); } string jsonResult = createJson(cosinRes); conn->sendInLoop(jsonResult); #if DEBUG cout << ">>>JsonResult is " << jsonResult << endl; #endif }else{ #if DEBUG cout << "flag is false" << endl; #endif string jsonResult = returnNoAnswer(); conn->sendInLoop(jsonResult); } } string WordQuery::returnNoAnswer() { Json::FastWriter writerinfo; Json::Value obj,new_item; new_item["標題"] = "404,not found"; new_item["摘要"] = "親,I cannot find what you want. What a pity"; obj.append(new_item); string strEmail = writerinfo.write(obj); return strEmail; } bool cmp(pair<int,double> a,pair<int,double> b) { if(a.second!=b.second) return a.second > b.second; else return a.first <= b.first ; } string WordQuery::createJson(vector<pair<int,double>>& resultVec) { std::sort(resultVec.begin(),resultVec.end(),cmp); Json::FastWriter writerinfo; Json::Value arrayObj; auto cit = resultVec.begin(); for(;cit != resultVec.begin() + 10&& cit != resultVec.end(); ++cit) { auto pageIter = _pageLib.find(cit->first); WebPage& mypage = pageIter->second; Json::Value new_item; new_item["標題"] = mypage.getTitle(); new_item["url"] = mypage.getUrl(); new_item["摘要"] = mypage._docSummary; arrayObj.append(new_item); } string strEmail = writerinfo.write(arrayObj); return strEmail; } bool WordQuery::executeQuery(const vector<pair<string,double>>& queryWordsWeightVec,vector<pair<int,vector<double>>>& resultVec) { #if 1 //找到文章Id的集合 auto iter = queryWordsWeightVec.begin(); auto cit = _inverIndexTable.find(iter->first); set<int> docIdGather; if(cit == _inverIndexTable.end()) return false; set<pair<int,double>> articleGather = cit->second; //取出第一個查詢詞的全部文章 for(auto oneArticle:articleGather) //對查詢詞中的文章進行查詢,若其餘查詢詞也在其中,則爲結果文章 { bool flag = true; int docId = oneArticle.first; auto pageIter = _pageLib.find(docId); WebPage& tmp = pageIter->second; map<string,int>& wordsMap = tmp.getWordsMap(); for(auto myQueryWord: queryWordsWeightVec) { auto queryIter = wordsMap.find(myQueryWord.first); if(queryIter == wordsMap.end()){ //若是有一篇文章中不包含全部查詢詞,就返回false flag = false; } } if(flag){ vector<double> weightVec; docIdGather.insert(docId); } } #endif if(docIdGather.size() == 0) return false; else{ for(auto docId: docIdGather) //查找每一篇文章中對應查詢詞的權重,構成向量 { vector<double> tmp; for(auto myQueryWord:queryWordsWeightVec) { auto citer = _inverIndexTable.find(myQueryWord.first); for(auto mypair:citer->second) { if(mypair.first == docId){ tmp.push_back(mypair.second); break; } } } resultVec.push_back(std::make_pair(docId,tmp)); } return true; } } vector<pair<string,double> > WordQuery::getQueryWordsWeightVector(map<string,int>& processedQueryWords) { #if DEBUG cout << "In WordQuery getQueryWordsWeightVector() " << endl; #endif vector<pair<string,double>> tmp; double w_total = 0; for(auto& mypair:processedQueryWords) { int tf = mypair.second; int DF = sumApperaance(mypair.first) + 1; int N = _pageLib.size() + 1; #if DEBUG //DEBUG cout << " >> N is " << N << endl; cout << ">>>> DF is " << DF << endl; #endif double w = (double)tf * :: log10(N/DF)/ ::log10(2); #if DEBUG //DEBUG cout << ">>>>> w is " << w << endl; #endif w_total += w*w; tmp.push_back(std::make_pair(mypair.first,w)); } #if DEBUG cout << ">>> w_total is " << w_total << endl; #endif if(w_total) for(auto& mypair:tmp) //歸一化處理 { mypair.second /= (::sqrt(w_total)); #if DEBUG cout << "In the tmpVec w is " << mypair.second << endl; #endif } return tmp; } int WordQuery::sumApperaance(const string& word) { #if DEBUG cout << "In sumApperaance" << endl; #endif int cnt = 0; for(auto& mypage:_pageLib) { map<string,int>& oneArticle = mypage.second.getWordsMap(); auto cit = oneArticle.find(word); if(cit != oneArticle.end()) ++cnt; } return cnt; } map<string,int> WordQuery::getQueryWordsMap(vector<string>& queryWords) { #if DEBUG //DEBUG cout << "In the getQueryWordsMap" << endl; #endif map<string,int> tmp; for(auto word:queryWords) { auto cit = tmp.find(word); if(cit == tmp.end()) { tmp.insert(std::make_pair(word,1)); }else ++cit->second; } #if DEBUG //DEBUG for(auto mytmp:tmp) cout << ">>> In tmp Map " << mytmp.first << " " << mytmp.second << endl; #endif return tmp; }
///======================================= /// File: WordQueryServer.h /// Author: wtptorres(1584292712@qq.com) /// Date: 2019-06-25 10:14:30 /// Dream: Don't forget your dreams! /// ====================================== #ifndef __WORDQUERYSERVER_H__ #define __WORDQUERYSERVER_H__ #include "TcpServer.h" #include "Condition.h" #include "WordQuery.h" #include "Threadpool.h" #include "TcpConnection.h" #include <iostream> #include <utility> using namespace std; namespace wd { class WordQueryServer { public: WordQueryServer(const string & filename); void start(); private: void onConnection(const TcpConnnectionPtr & conn); void onMessage(const TcpConnnectionPtr & conn); void onClose(const TcpConnectionPtr & conn); void doTaskThread(const TcpConnectionPtr & conn,const string & msg); private: Configuration _conf; WordQuery _wordQuery; TcpServer _tcpServer; Threadpool _pool; }; } #endif
///======================================= /// File: WordQueryServer.cc /// Author: wtptorres(1584292712@qq.com) /// Date: 2019-06-25 10:14:30 /// Dream: Don't forget your dreams! /// ====================================== #include "WordQueryServer.h" #include <stdlib.h> #include <stdio.h> #include <iostream> #include <functional> #include <string> #define THREADNUM 4 #define QUESIZE 10 using std::cout; using std::endl; using std::string; using namespace wd; WordQueryServer::WordQueryServer(const string& filename) :_conf(filename) ,_wordQuery(_conf) ,_tcpServer(_conf.getIp(),_conf.getPort()) ,_pool(THREADNUM,QUESIZE) {} void WordQueryServer::onConnection(const TcpConnectionPtr& conn) { cout << conn->toString() << endl; conn->send("hello,welcome to Chat Server.\r\n"); } void WordQueryServer::onMessage(const TcpConnectionPtr& conn) { string s(conn->receive()); _pool.addTask(std::bind(&WordQuery::doQuery,&_wordQuery,s,conn)); cout << "> add task to threadpool" << endl; } void WordQueryServer::onClose(const TcpConnectionPtr& conn) { ::printf("%s close\n",conn->toString().c_str()); } void WordQueryServer::start() { _pool.start(); _tcpServer.setConnectionCallback(std::bind(&WordQueryServer::onConnection,this,std::placeholders::_1)); _tcpServer.setMessageCallback(std::bind(&WordQueryServer::onMessage,this,std::placeholders::_1)); _tcpServer.setCloseCallback(std::bind(&WordQueryServer::onClose,this,std::placeholders::_1)); _tcpServer.start(); }
///======================================= /// File: WordSegmentation.h /// Author: wtptorres(1584292712@qq.com) /// Date: 2019-06-23 10:45:14 /// Dream: Don't forget your dreams! /// ====================================== #ifndef __WORDSEGMENTATIAON_H__ #define __WORDSEGMENTATIAON_H__ #include "/home/wtp/search/cppjieba/include/cppjieba/Jieba.hpp" #include <string> #include <vector> using namespace std; const char * const DICT_PATH="/home/wtp/search/cppjieba/dict/jieba.dict.utf8"; const char * const HMM_PATH="/home/wtp/search/cppjieba/dict/hmm_model.utf8"; const char * const USER_DICT_PATH="/home/wtp/search/cppjieba/dict/user.dict.utf8"; const char * const IDF_PATH="/home/wtp/search/cppjieba/dict/idf.utf8"; const char * const STOP_WORD_PATH="/home/wtp/search/cppjieba/dict/stop_words.utf8"; class WordSegmentation { public: WordSegmentation() :_jieba(DICT_PATH,HMM_PATH,USER_DICT_PATH,IDF_PATH,STOP_WORD_PATH) {} vector<string> Cut(string & sentence) { vector<string>tmp; _jieba.Cut(sentence,tmp); return tmp; } private: cppjieba::Jieba _jieba; }; #endif
///======================================= /// File: main.cc /// Author: wtptorres(1584292712@qq.com) /// Date: 2019-06-07 21:09:32 /// Dream: Don't forget your dreams! /// ====================================== #include "WordQueryServer.h" #include "ConFiguration.h" #include "CacheManger.h" #include "TimerThread.h" #include <iostream> #include <functional> using namespace std; using namespace wd; int main() { WordQueryServer wordQueryServer("./conf/configure.txt"); wordQueryServer.start(); return 0; }
(1)已經實現項目需求,中文和英文單詞都能查詢
(2)相比spellcorrect的衆多bug,這一次幾乎沒有測試出bug
(3)因爲時間問題,沒有導入php前端進行測試,後期考慮用戶界面及Reddish內存(mysql)
(4)陳碩的《linux多線程服務端編程》使用linux接口(timerfd),沒用posix接口(eventfd)