首页 > 解决方案 > OpenMP 并行化 std::next_permutation

问题描述

我编写了函数scores_single,它计算棋盘的所有排列并对其进行评分。

我尝试按照这个 SO 答案使用 OpenMP 并行化该函数,并提出了scores_parallel. 问题是并行版本在多个循环中迭代相同的排列。

代码如下:

#include <iostream>
#include <omp.h>
#include <vector>

// Single threaded version
int scores_single(const int tokens, const int board_length) {
    std::vector<int> scores;

    // Generate boards
    std::vector<char> board(board_length);
    std::fill(board.begin(), board.end() - tokens, '-');
    std::fill(board.end() - tokens, board.end(), 'T');

    do {
//        printf("Board: %s\n", std::string(board.data(), board.size()).c_str());

        // Score board
        auto value = 3;

        scores.push_back(value);
    } while (std::next_permutation(board.begin(), board.end()));
    return scores.size();
}

// OpenMP parallel version
int scores_parallel(const int tokens, const int board_length) {
    std::vector<std::vector<int>*> score_lists(board_length);

    // Generate boards
    std::vector<char> board(board_length);
    std::fill(board.begin(), board.end() - tokens, '-');
    std::fill(board.end() - tokens, board.end(), 'T');

    printf("Starting\n");
#pragma omp parallel default(none) shared(board, board_length, score_lists)
    {
#pragma omp single nowait
        for (int i = 0; i < board_length; ++i) {
#pragma omp task untied
            {
                auto *scores = new std::vector<int>;

                // Make a copy of the board for this thread
                auto board_thread = board;
                // Subset for this thread, see: https://stackoverflow.com/questions/30865231/parallel-code-for-next-permutation
                std::rotate(board_thread.begin(), board_thread.begin() + i, board_thread.begin() + i + 1);
                do {
                    printf("[%02d] board: %s\n", i, std::string(board_thread.data(), board_thread.size()).c_str());

                    // Score board
                    auto value = 3;

                    scores->push_back(value);
                } while (std::next_permutation(board_thread.begin() + 1, board_thread.end()));
                score_lists[i] = scores;
                printf("[%02d] Finished on thread %d with %lu values\n", i, omp_get_thread_num(), scores->size());
            }
        }
    }

    std::vector<int> scores;

    int k = 0;
    for (auto & list : score_lists) {
        for (int j : *list) {
            scores.push_back(j);
        }
    }
    printf("Finished, size: %lu\n", scores.size());

    return scores.size();
}

int main() {
    int p = scores_parallel(2, 4);
    int s = scores_single(2, 4);
    std::cout << p << " != " << s << std::endl;
    return 0;
}

输出:

Starting
[01] board: --TT
[03] board: T--T
[03] board: T-T-
[02] board: T--T
[03] board: TT--
[01] board: -T-T
[02] board: T-T-
[00] board: --TT
[03] Finished on thread 10 with 3 values
[00] board: -T-T
[00] board: -TT-
[02] board: TT--
[00] Finished on thread 11 with 3 values
[01] board: -TT-
[02] Finished on thread 12 with 3 values
[01] Finished on thread 4 with 3 values
Finished, size: 12
12 != 6

我想我理解我正在复制的 SO 答案,但我不确定我做错了什么。

6 是预期的答案,因为 4C2 = 6。

标签: c++parallel-processingopenmppermutationc++20

解决方案


想通了,我多次计算相同的排列。我用下面的 if 语句修复了它。

#include <iostream>
#include <omp.h>
#include <vector>

// OpenMP parallel version
int scores_parallel(const int tokens, const int board_length) {
    std::vector<std::vector<int>*> score_lists(board_length);

    // Generate boards
    std::vector<char> board(board_length);
    std::fill(board.begin(), board.end() - tokens, '-');
    std::fill(board.end() - tokens, board.end(), 'T');

    printf("Starting\n");
#pragma omp parallel default(none) shared(board, board_length, score_lists)
    {
#pragma omp single nowait
        for (int i = 0; i < board_length; ++i) {
#pragma omp task untied
            {
                auto *scores = new std::vector<int>;

                // No need to process this branch if it will be identical to a prior branch.
                if (board[i] != board[i + 1]) {
                    // Make a copy of the board for this thread
                    auto board_thread = board;
                    printf("[%02d] evaluating: %s\n", i, std::string(board_thread.data(), board_thread.size()).c_str());
                    // Subset for this thread, see: https://stackoverflow.com/questions/30865231/parallel-code-for-next-permutation
                    std::rotate(board_thread.begin(), board_thread.begin() + i, board_thread.begin() + i + 1);
                    do {
                        printf("[%02d] board: %s\n", i, std::string(board_thread.data(), board_thread.size()).c_str());

                        // Score board
                        auto value = 3;

                        scores->push_back(value);
                    } while (std::next_permutation(board_thread.begin() + 1, board_thread.end()));
                }
                score_lists[i] = scores;
                printf("[%02d] Finished on thread %d with %lu values\n", i, omp_get_thread_num(), scores->size());
            }
        }
    }

    std::vector<int> scores;

    int k = 0;
    for (auto & list : score_lists) {
        for (int j : *list) {
            scores.push_back(j);
        }
    }
    printf("Finished, size: %lu\n", scores.size());

    return scores.size();
}

int main() {
    int p = scores_parallel(2, 4);
    int s = scores_single(2, 4);
    std::cout << p << " == " << s << std::endl;
    return p != s;
}

推荐阅读