c++ - OpenMP 并行化 std::next_permutation
问题描述
我编写了函数scores_single
,它计算棋盘的所有排列并对其进行评分。
我尝试按照这个 SO 答案使用 OpenMP 并行化该函数,并提出了scores_parallel
. 问题是并行版本在多个循环中迭代相同的排列。
代码如下:
#include <iostream>
#include <omp.h>
#include <vector>
// Single threaded version
int scores_single(const int tokens, const int board_length) {
std::vector<int> scores;
// Generate boards
std::vector<char> board(board_length);
std::fill(board.begin(), board.end() - tokens, '-');
std::fill(board.end() - tokens, board.end(), 'T');
do {
// printf("Board: %s\n", std::string(board.data(), board.size()).c_str());
// Score board
auto value = 3;
scores.push_back(value);
} while (std::next_permutation(board.begin(), board.end()));
return scores.size();
}
// OpenMP parallel version
int scores_parallel(const int tokens, const int board_length) {
std::vector<std::vector<int>*> score_lists(board_length);
// Generate boards
std::vector<char> board(board_length);
std::fill(board.begin(), board.end() - tokens, '-');
std::fill(board.end() - tokens, board.end(), 'T');
printf("Starting\n");
#pragma omp parallel default(none) shared(board, board_length, score_lists)
{
#pragma omp single nowait
for (int i = 0; i < board_length; ++i) {
#pragma omp task untied
{
auto *scores = new std::vector<int>;
// Make a copy of the board for this thread
auto board_thread = board;
// Subset for this thread, see: https://stackoverflow.com/questions/30865231/parallel-code-for-next-permutation
std::rotate(board_thread.begin(), board_thread.begin() + i, board_thread.begin() + i + 1);
do {
printf("[%02d] board: %s\n", i, std::string(board_thread.data(), board_thread.size()).c_str());
// Score board
auto value = 3;
scores->push_back(value);
} while (std::next_permutation(board_thread.begin() + 1, board_thread.end()));
score_lists[i] = scores;
printf("[%02d] Finished on thread %d with %lu values\n", i, omp_get_thread_num(), scores->size());
}
}
}
std::vector<int> scores;
int k = 0;
for (auto & list : score_lists) {
for (int j : *list) {
scores.push_back(j);
}
}
printf("Finished, size: %lu\n", scores.size());
return scores.size();
}
int main() {
int p = scores_parallel(2, 4);
int s = scores_single(2, 4);
std::cout << p << " != " << s << std::endl;
return 0;
}
输出:
Starting
[01] board: --TT
[03] board: T--T
[03] board: T-T-
[02] board: T--T
[03] board: TT--
[01] board: -T-T
[02] board: T-T-
[00] board: --TT
[03] Finished on thread 10 with 3 values
[00] board: -T-T
[00] board: -TT-
[02] board: TT--
[00] Finished on thread 11 with 3 values
[01] board: -TT-
[02] Finished on thread 12 with 3 values
[01] Finished on thread 4 with 3 values
Finished, size: 12
12 != 6
我想我理解我正在复制的 SO 答案,但我不确定我做错了什么。
6 是预期的答案,因为 4C2 = 6。
解决方案
想通了,我多次计算相同的排列。我用下面的 if 语句修复了它。
#include <iostream>
#include <omp.h>
#include <vector>
// OpenMP parallel version
int scores_parallel(const int tokens, const int board_length) {
std::vector<std::vector<int>*> score_lists(board_length);
// Generate boards
std::vector<char> board(board_length);
std::fill(board.begin(), board.end() - tokens, '-');
std::fill(board.end() - tokens, board.end(), 'T');
printf("Starting\n");
#pragma omp parallel default(none) shared(board, board_length, score_lists)
{
#pragma omp single nowait
for (int i = 0; i < board_length; ++i) {
#pragma omp task untied
{
auto *scores = new std::vector<int>;
// No need to process this branch if it will be identical to a prior branch.
if (board[i] != board[i + 1]) {
// Make a copy of the board for this thread
auto board_thread = board;
printf("[%02d] evaluating: %s\n", i, std::string(board_thread.data(), board_thread.size()).c_str());
// Subset for this thread, see: https://stackoverflow.com/questions/30865231/parallel-code-for-next-permutation
std::rotate(board_thread.begin(), board_thread.begin() + i, board_thread.begin() + i + 1);
do {
printf("[%02d] board: %s\n", i, std::string(board_thread.data(), board_thread.size()).c_str());
// Score board
auto value = 3;
scores->push_back(value);
} while (std::next_permutation(board_thread.begin() + 1, board_thread.end()));
}
score_lists[i] = scores;
printf("[%02d] Finished on thread %d with %lu values\n", i, omp_get_thread_num(), scores->size());
}
}
}
std::vector<int> scores;
int k = 0;
for (auto & list : score_lists) {
for (int j : *list) {
scores.push_back(j);
}
}
printf("Finished, size: %lu\n", scores.size());
return scores.size();
}
int main() {
int p = scores_parallel(2, 4);
int s = scores_single(2, 4);
std::cout << p << " == " << s << std::endl;
return p != s;
}
推荐阅读
- python - 使用python提取不允许页面提取pdf
- c# - IIS 重写提供程序:如何记录请求的 URL?
- javascript - 为什么我从减速器功能收到“NaN”?
- python - 如何从 pod 内的交互式 shell 获取输出
- javascript - 选项标签 HTML 中的下载链接
- visual-studio-code - 使用正则表达式搜索 * 图标
- python - 如何从 pyqt5 中的 QListView 获取项目值?
- c# - 序列化时如何本地化类属性?
- c++ - 在类型特征中获取 std::reference_wrapper 中的原始类型
- javascript - 如何使用 Discord 机器人在多个渠道中嵌入消息?