本文首發於我的博客https://kezunlin.me/post/8d877e63/,歡迎閱讀!ios
cpp caffe net run in multiple threadsgit
Caffe fails to use GPU in a new thread ???
see heregithub
the `Caffe::mode_` variable that controls this is thread-local, so ensure you’re calling `caffe.set_mode_gpu()` in each thread before running any Caffe functions. That should solve your issue. Caffe set_mode GPU 在多線程下失效 在main thread中設置GPU模式,在worker thread中調用網絡進行檢測, GPU模式不起效,默認仍然使用CPU模式,因此速度很慢,和GPU相比慢了 10倍左右。 解決方案:在子線程中set_mode,而後調用網絡進行檢測。 (1)建立網絡在main thread。static 網絡存儲在全局靜態數據區。 worker thread能夠直接使用。 (2) 在worker thread中檢測,須要在子線程中set_mode,而後調用網絡進行檢測。 結論: (1)caffe的set_mode所在的線程必須和使用nets進行forward的線程相同。不然默認使用CPU模式,速度會很慢。 (2)caffe的nets初始化能夠在main thread也能夠在worker thread。
#include <iostream> #include <string> #include <thread> #include <gtest/gtest.h> #include <glog/logging.h> #include <boost/date_time/posix_time/posix_time.hpp> // opencv #include <opencv2/core.hpp> #include <opencv2/highgui.hpp> #include <opencv2/imgproc.hpp> using namespace std; #include "algorithm/algorithm.h" using namespace kezunlin::algorithm; #pragma region net-demo void topwire_demo(bool run_in_worker_thread) { if (run_in_worker_thread) { CaffeApi::set_mode(true, 0, 1234);// set in worker thread-1, use GPU-0 } // do net detect // ... } void railway_demo(bool run_in_worker_thread) { if (run_in_worker_thread) { CaffeApi::set_mode(true, 0, 1234);// set in worker thread-1, use GPU-0 } // do net detect // ... } void sidewall_demo(bool run_in_worker_thread) { if (run_in_worker_thread) { CaffeApi::set_mode(true, 0, 1234);// set in worker thread-1, use GPU-0 } // do net detect // ... } void lockcatch_demo(bool run_in_worker_thread) { if (run_in_worker_thread) { CaffeApi::set_mode(true, 0, 1234);// set in worker thread-1, use GPU-0 } // do net detect // ... } #pragma endregion #pragma region worker-thread-demo void worker_thread_topwire_demo(bool run_in_worker_thread) { std::thread thr(topwire_demo, run_in_worker_thread); thr.join(); } void worker_thread_railway_demo(bool run_in_worker_thread) { std::thread thr(railway_demo, run_in_worker_thread); thr.join(); } void worker_thread_sidewall_demo(bool run_in_worker_thread) { std::thread thr(sidewall_demo, run_in_worker_thread); thr.join(); } void worker_thread_lockcatch_demo(bool run_in_worker_thread) { std::thread thr(lockcatch_demo, run_in_worker_thread); thr.join(); } #pragma endregion enum DETECT_TYPE { SET_IN_MAIN_DETECT_IN_MAIN, // 主線程set_mode,主線程檢測,40ms左右,使用GPU SET_IN_WORKER_DETECT_IN_WORKER, // 子線程set_mode,子線程檢測,40ms左右,使用GPU SET_IN_MAIN_DETECT_IN_WORKER // 主線程set_mode,子線程檢測,400ms左右,慢了10倍左右,沒有使用GPU }; void thread_demo() { DETECT_TYPE detect_type = SET_IN_MAIN_DETECT_IN_MAIN; detect_type = SET_IN_WORKER_DETECT_IN_WORKER; detect_type = SET_IN_MAIN_DETECT_IN_WORKER; init_algorithm_api(); switch (detect_type) { case SET_IN_MAIN_DETECT_IN_MAIN: topwire_demo(false); railway_demo(false); sidewall_demo(false); lockcatch_demo(false); break; case SET_IN_WORKER_DETECT_IN_WORKER: worker_thread_topwire_demo(true); worker_thread_railway_demo(true); worker_thread_sidewall_demo(true); worker_thread_lockcatch_demo(true); break; case SET_IN_MAIN_DETECT_IN_WORKER: worker_thread_topwire_demo(false); worker_thread_railway_demo(false); worker_thread_sidewall_demo(false); worker_thread_lockcatch_demo(false); break; default: break; } free_algorithm_api(); } void test_algorithm_api() { thread_demo(); } TEST(algorithn_test, test_algorithm_api) { test_algorithm_api(); }