c++ - 带有 boost:asio 的线程组
问题描述
我编写了具有以下目的的小程序
- 它读取目录,并且需要为每个线程分配每个路径,这决定了目录的大小
- /home/dir1
- /home/dir2 我确实参考了这个问题。
我看到它比单线程应用程序更慢。任何错误。
#include<boost/tokenizer.hpp>
#include<boost/asio.hpp>
#include <boost/bind/bind.hpp>
#include "scan_dir.h" //local file
using namespace std::chrono;
/*
* Process directory fucntion
* Input : Project path, maxdepth
*/
void process_dir(const std::string &proj, uint64_t &count, std::vector<std::string> &dirs) {
std::cout<<"Creating Directory"<<std::endl;
//dirs = Util::get_top_dir_depth(proj, 0);
dirs = Util::traverse_dir(proj, 1);
count = dirs.size();
}
int main(int argc, char *argv[]) {
po::options_description desc("DiskAnalyzer Tool");
po::variables_map vm;
std::string user, proj;
uint64_t f_size, maxdepth=0, dir_size=0;
bool show_dir;
Dirs d;
desc.add_options()
("help,h", "DiskAnalyzer option")
("proj,p", po::value<string>(),"provide directory path which you would like to search data")
("user,u", po::value<string>(), "display file which is associated/Owner with user")
("dirsize,ds", po::value<uint64_t>()->default_value(1000000), "display dir which dir_size>=size by default 1000000 Byte:1MB")
("showdir,sh", po::value<bool>()->default_value(false), "show only dir which is associated with user")
("maxdepth", po::value<uint64_t>()->default_value(5), "show only dir which is associated with user")
("filesize,fs", po::value<uint64_t>()->default_value(10000), "display file which file_size>=size by default 10000 Byte:10KB");
try {
po::store(po::parse_command_line(argc, argv, desc), vm);
po::notify(vm);
}catch(const std::exception &err) {
std::cerr<<err.what()<<std::endl;
std::cout<<desc<<std::endl;
}
catch(...) {
std::cout<<"Unkown exception"<<std::endl;
}
if(vm.count("help")) {
std::cout<<"scan -p <proj_name> -u <user_name> -maxdepth <maxdepth> -fs <file_size> -d <debug>\n\n";
std::cout<<desc<<std::endl;
return 1;
}
if(vm.count("user")){
user = vm["user"].as<string>();
}
if(vm.count("proj")){
proj = vm["proj"].as<string>();
}
if(vm.count("filesize")){
f_size = vm["filesize"].as<uint64_t>();
}
if(vm.count("showdir")) {
show_dir = vm["showdir"].as<bool>();
}
if(vm.count("dirsize")) {
dir_size = vm["dirsize"].as<uint64_t>();
}
if(vm.count("maxdepth")){
maxdepth = vm["maxdepth"].as<uint64_t>();
}
if(show_dir) {
d.scan_dir_name(proj, user, dir_size, maxdepth);
return 0;
} else {
uint64_t count = 0;
std::vector<std::string> dir;
process_dir(proj, count, dir);
std::cout<<"createing database["<<proj<<" "<<count<<" ]"<<std::endl;
std::string db_name = Command::basename(proj);
DataBase db[count];
for (uint64_t i = 0; i<count; i++){
db[i].set_db_name("DiskAnalyzer_"+ std::to_string(i)+"_" +db_name);
if(!db[i].prepare_db()){
std::cerr<<"[Error] DataBase operation failed"<<std::endl;
return 1;
}
}
std::size_t max_thread = dir.size() > 1000 ? 1000 : dir.size();
//max_thread = 10;
std::cout<<dir.size()<<std::endl;
//contain directory information
while(dir.size()){
std::size_t dir_traverse = 0, db_count = 0;
boost::asio::io_service io_service;
boost::asio::io_service::work work(io_service);
boost::thread_group threads;
for (std::size_t i = 0; i < max_thread; ++i)
threads.create_thread(boost::bind(&boost::asio::io_service::run, &io_service));
for(auto it = dir.begin(); it != dir.end() && dir_traverse <max_thread; ++it){
if(db_count>=count)
db_count = 0;
try {
//this function determine determine size of directory. I had expectation
// each directory will go each thread
io_service.post(boost::bind(&Dirs::scan_dir, boost::ref(d), *it, db[db_count], user));
} catch(...) {
std::cerr<<"got error"<<std::endl;
continue;
}
dir_traverse++;
//dir_traverse = dir_traverse + max_thread;
db_count++;
//boost::this_thread::sleep(boost::posix_time::seconds(1));
}
io_service.stop();
threads.join_all();
dir.erase(dir.begin(), dir.begin()+dir_traverse);
std::cout<<" [Remaining Processing dir cout "<<dir.size()<<std::endl;
}
return 0;
}
std::cout<<desc<<std::endl;
return 0;
}
解决方案
每次围绕顶层循环时,您都会停止整个池。
这不仅会在重新创建和销毁线程时产生大量开销。
它还通过执行stop
. 即使任务没有完成,也会stop
强制执行上下文停止。事实上,在线程加入之前,可能根本没有任何任务开始。
自然的解决方法是将池从循环中取出并在最后加入它(不stop
-ping 服务,因此所有任务都完成)。但是,在您的情况下,它需要进行更多更改,因为 tasks post
-ed 在循环中引用了本地变量。
推荐阅读
- java - 在 Spring Data/hibernate 中获取实体的异常
- ld - 为什么递归 ld rpath 的应用程序不起作用?
- spring-boot - 嵌入 Vert.x 的 Spring Boot
- c - 在循环中使用 scanf() 扫描 CSV 文件在第一行和第二行开始之后停止
- python - 使用 Wand python 将 PDF 转换为图像
- opengl - 如何在带有 GPU 的 Google Compute Engine 上运行 Gazebo?
- asp.net-mvc - 如何在两个 React 组件之间切换到一个文件中?
- reporting-services - 每页的标题 SSRS
- java - Hikari 抛出错误 目标类 org.postgresql.ds.PGPoolingDataSource 上不存在属性 url
- c# - ASP.NET Core 中的多种分布式缓存