c++ - 多线程 C++ 比单线程执行时间更长
问题描述
我正在尝试编写一段代码,该代码遍历 8^12 次迭代的循环,并且在每次迭代中,当满足某些条件时,我将 push_back 推回一个向量(每个线程都有自己的 push_back 向量,我在循环)。但似乎我的执行越多,线程越活跃。这是传递给每个线程的函数(对象的方法):
void HamiltonianKH::mapping_kernel(ull_int start, ull_int stop, std::vector<ull_int>* map_threaded, int _id) {
int n = 1;
out << "A new thread joined tha party! from " << start << " to " << stop << endl;
for (ull_int j = start; j < stop; j++) {
int bSz = 0, fSz = 0, N_e = 0;
std::tie(bSz, fSz, N_e) = calculateSpinElements(this->L, j);
if ((bSz + fSz == this->Sz) && N_e == this->num_of_electrons)
map_threaded->push_back(j);
if (show_system_size_parameters == true && (j - start) % ull_int((stop - start) * n / 4) == 0 && j > 0) {
out << n << "-th quarter of " << _id << endl;
n++;
}
}
} ,这里是 caulculate_spineelements 函数:
std::tuple<int, int, int> calculateSpinElements(int L, ull_int& j) {
int bSz = 0; //bosonic total spin - spin of upper orbital locked to n=1 filling
int fSz = 0; //fermionic total spin
int N_e = 0; // numer of electrons in given state
std::vector<int> temp = int_to_binary(j, L);
for (int k = 0; k < L; k++) {
if (temp[k] < 4) bSz += 1;
else bSz -= 1;
if (temp[k] % 4 == 1) {
fSz += 1;
N_e += 1;
}
else if (temp[k] % 4 == 2) {
fSz -= 1;
N_e += 1;
}
else if (temp[k] % 4 == 3)
N_e += 2;
}
return std::make_tuple(bSz, fSz, N_e);
}
她是线程的分离:
void HamiltonianKH::generate_mapping() {
ull_int start = 0, stop = std::pow(8, L);
//mapping_kernel(start, stop, mapping, L, Sz, num_of_electrons);
//Threaded
std::vector<std::vector<ull_int>*> map_threaded(num_of_threads);
std::vector<std::thread> threads;
threads.reserve(num_of_threads);
for (int t = 0; t < num_of_threads; t++) {
start = t * (ull_int)std::pow(8, L) / num_of_threads;
stop = ((t + 1) == num_of_threads ? (ull_int)std::pow(8, L) : (ull_int)std::pow(8, L) * (t + 1) / num_of_threads);
map_threaded[t] = new std::vector<ull_int>();
threads.emplace_back(&HamiltonianKH::mapping_kernel, this, start, stop, map_threaded[t], t);
}
for (auto& t : threads) t.join();
for (auto& t : threads) t.~thread();
ull_int size = 0;
for (auto& t : map_threaded) {
size += t->size();
}
out << "size = " << size << endl;
for (auto & t : map_threaded)
mapping->insert(mapping->end(), t->begin(), t->end());
//sort(mapping->begin(), mapping->end());
if (show_system_size_parameters == true) {
out << "Mapping generated with " << mapping->size() << " elements" << endl;
out << "Last element = " << mapping->at(mapping->size() - 1) << endl;
}
//out << mapping[0] << " " << mapping[mapping.size() - 1] << endl;
assert(mapping->size() > 0 && "Not possible number of electrons - no. of states < 1");
}
变量:mapping、L、num_of_electrons 和 Sz 是对象中的公共字段。整个代码有 2000 多行,但是 generate_mapping() 调用之后的执行与问题无关。
你们有谁知道,为什么这段代码在更多线程上执行时间更长?
非常感谢您提前。
解决方案
推荐阅读
- java - 如何使用 selenium java 在对话框中自动滚动
- html - 相对位置如何使 dom 节点在固定位置上绘制?
- java - 使用 twaitforfile 或自定义 java 代码将文件从一个目录传输到另一个目录时如何诱导时间延迟?
- python - crontab 运行时在 MacOS 上出现 python 导入错误
- javascript - 将 Dart MediaStreamTrack 传递给 JavaScript
- regex - 如何在不同的可能星座中重写具有多个变量“目录”的 URL?
- r - 如何摆脱 geom_smooth 连续错误代码?
- javascript - 按钮样式在 JS 中不起作用
- java - Lombok 的注释中的编译错误 - Eclipse
- javascript - 如何在 JavaScript 中将数千行数字更改为数组