-對多維度特徵進行topk排序,使用c++ 優先隊列模擬最大堆.node
/* ---------------------------------- Version : ?? File Name : dm2.py Description : Author : xijun1 Email : Date : 2018/12/7 ----------------------------------- Change Activity : 2018/12/7 ----------------------------------- __author__ = 'xijun1' */ #include <iostream> #include <queue> #include <unordered_map> #include <future> using namespace std; class MinHeapUtil { private: struct HeapGreatcompare { // min heap bool operator()(const pair<unsigned int, int>& node_a, const pair<unsigned int, int>& node_b) { return node_a.second > node_b.second; } }; std::vector< std::priority_queue< std::pair<unsigned int, int>, std::vector< std::pair<unsigned int, int> > ,HeapGreatcompare> > pq_mat; unsigned int max_topk; public: bool setShape(const int dim , const unsigned int max_topk ); int getdim(){ return this->pq_mat.size(); } int getMaxTopK(){ return this->max_topk; } size_t getSize( int k ){ return this->pq_mat[ k ].size(); } int push_back(std::vector<std::pair<unsigned int, int> >& data , int k ); int push_back( std::pair<unsigned int, int> word_prob , int k ); int push_back( std::pair<unsigned int, int> &word_prob , int k ); std::pair<unsigned int , int >get_itop(int k){ auto word_prob = this->pq_mat[ k ].top(); this->pq_mat[ k ].pop(); return word_prob; } }; bool MinHeapUtil::setShape(const int dim, const unsigned int max_topk) { this->pq_mat.resize(dim ); this->max_topk = max_topk; return true; } int MinHeapUtil::push_back(std::vector< std::pair<unsigned int, int> > &data ,int k) { for (auto const& word_prob : data) { this->pq_mat[ k ].push(std::make_pair(word_prob.second, word_prob.first)); if ( this->pq_mat[ k ].size() > this->max_topk) //維護這個K大小 this->pq_mat[ k ].pop(); } return 0; } int MinHeapUtil::push_back(std::pair<unsigned int, int> word_prob, int k) { if( this->pq_mat[k].size() < this->max_topk || this->pq_mat[ k ].top().second < word_prob.second) { this->pq_mat[k].push(word_prob); if (this->pq_mat[k].size() > this->max_topk) //維護這個K大小 this->pq_mat[k].pop(); } return 0; } int MinHeapUtil::push_back(std::pair<unsigned int, int> &word_prob, int k) { this->pq_mat[k].push(word_prob); if (this->pq_mat[k].size() > this->max_topk) //維護這個K大小 this->pq_mat[k].pop(); return 0; } std::unordered_map<unsigned int , std::vector< int > > test_data; int main() { MinHeapUtil top_words ; top_words.setShape(5,4); int total_size = 5; // MinHeapUtil.push_back(std::pair<unsigned int, int>(8,2),0); // MinHeapUtil.push_back(std::pair<unsigned int, int>(7,3),0); // MinHeapUtil.push_back(std::pair<unsigned int, int>(6,4),0); // MinHeapUtil.push_back(std::pair<unsigned int, int>(5,5),0); // MinHeapUtil.push_back(std::pair<unsigned int, int>(4,6),0); // MinHeapUtil.push_back(std::pair<unsigned int, int>(3,7),0); // MinHeapUtil.push_back(std::pair<unsigned int, int>(2,4),0); // MinHeapUtil.push_back(std::pair<unsigned int, int>(1,8),0); for (int j = 0; j < 1000000; ++j) { std::vector< int > tmp(5); for (int i = 0; i < 5; ++i) { tmp[i] = j*5 + i; } test_data[j] = std::move(tmp); } struct save_func { void operator () (int tn ,int thread_nums,int total_size , MinHeapUtil *top_words ) { std::cout<<tn <<" : "<< thread_nums<<" : "<<total_size<<std::endl; for (int i = tn; i < total_size ; i+=thread_nums) { for ( auto word_key : test_data) { //std::cout<<word_key.first; top_words->push_back( std::pair<unsigned int, int>(word_key.first, word_key.second[ i ] ) , i ); } } std::cout << "func " << tn << std::endl; } } func; unsigned thread_nums = 8; std::vector<std::future< void >> muliti_topword_threads; for (unsigned tn = 0; tn < thread_nums; ++tn) muliti_topword_threads.emplace_back( std::async(std::launch::async, func, (int)tn,(int)thread_nums,total_size,&top_words)); for (unsigned tn = 0; tn < thread_nums; ++tn) muliti_topword_threads[tn].get(); for (int k = 0; k < total_size; ++k) { size_t range = top_words.getSize( k ); for (int i = 0; i < range; ++i) { auto temp = top_words.get_itop( k ); std::cout <<"k:"<<k<<" ( " << temp.first << " " << temp.second << ")\n"; } } return 0; }
--結果ios
01 : : 88 : : 55 2 : 8 : 5 3 : 8 : 5 4 : 8 : 5 5 : 8 : 5 func 5 6 : 8 : 5 func 6 7 : 8 : 5 func 7 func 1 func 4 func 0 func 2 func 3 k:0 ( 999996 4999980) k:0 ( 999997 4999985) k:0 ( 999998 4999990) k:0 ( 999999 4999995) k:1 ( 999996 4999981) k:1 ( 999997 4999986) k:1 ( 999998 4999991) k:1 ( 999999 4999996) k:2 ( 999996 4999982) k:2 ( 999997 4999987) k:2 ( 999998 4999992) k:2 ( 999999 4999997) k:3 ( 999996 4999983) k:3 ( 999997 4999988) k:3 ( 999998 4999993) k:3 ( 999999 4999998) k:4 ( 999996 4999984) k:4 ( 999997 4999989) k:4 ( 999998 4999994) k:4 ( 999999 4999999)