c - 如何将 OpenMP 与 PARI/GP 一起使用?
问题描述
我想并行化一个包含来自 PARI/GP 库的一些操作以及其他一些操作的 for 循环。但是,将 OpenMP 与 PARI/GP 一起使用似乎并不是很简单。我发现的唯一类似的例子是this,它实际上似乎是sections
从 OpenMP 中使用的。parallel for
正如您在下面看到的,我尝试使用子句做一些事情,但是,它会在 PARI/GP 上引发分段错误。
#include <omp.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <time.h>
#include "pari/pari.h"
#define MAX_THREAD 8
#define MAX_COUNT 8
#define CLOCK_PRECISION 1E9
typedef struct vector {
void **items;
unsigned int capacity;
unsigned int size;
} vector_st;
typedef vector_st *vector_t;
vector_t vector_init(unsigned int capacity) {
if (capacity <= 0) {
fprintf(stderr, "Error: invalid vector capacity.\n");
exit(1);
}
vector_t v = (vector_t) calloc(1, sizeof(vector_st));
v->capacity = capacity;
v->size = 0;
v->items = malloc(sizeof(void *) * v->capacity);
if (v->items == NULL) {
fprintf(stderr, "Error: could not allocate memory.\n");
exit(1);
}
return v;
}
unsigned int vector_size(vector_t v) {
return v->size;
}
void *vector_get(vector_t v, unsigned int index) {
if (index >= v->size) {
fprintf(stderr, "Error: index larger than the vector size.\n");
exit(1);
}
return v->items[index];
}
void vector_resize(vector_t v, unsigned int capacity) {
#ifdef DEBUG
printf("Vector resize, from %u to %u.\n", v->capacity, capacity);
#endif
void **items = realloc(v->items, sizeof(void *) * capacity);
if (items) {
v->items = items;
v->capacity = capacity;
} else {
fprintf(stderr, "Error: could not re-allocate memory.\n");
exit(1);
}
}
void vector_add(vector_t v, void *item) {
if (v->capacity == v->size) {
vector_resize(v, v->capacity * 2);
}
v->items[v->size++] = item;
}
void vector_copy(vector_t c, vector_t a) {
unsigned int i;
for (i = 0; i < vector_size(a); i++) {
vector_add(c, vector_get(a, i));
}
}
void vector_free(vector_t v) {
if (v) {
if (v->items) {
free(v->items);
}
v->capacity = 0;
v->size = 0;
free(v);
v = NULL;
}
}
typedef struct {
GEN sk;
GEN pk;
} cl_key_pair_st;
typedef cl_key_pair_st *cl_key_pair_t;
#define cl_key_pair_null(key_pair) key_pair = NULL;
#define cl_key_pair_new(key_pair) \
do { \
key_pair = malloc(sizeof(cl_key_pair_st)); \
if (key_pair == NULL) { \
exit(1); \
} \
} while (0)
#define cl_key_pair_free(key_pair) \
do { \
free(key_pair); \
key_pair = NULL; \
} while (0)
uint64_t mtimer(void) {
struct timespec time;
clock_gettime(CLOCK_MONOTONIC, &time);
return (uint64_t) (time.tv_sec * CLOCK_PRECISION + time.tv_nsec);
}
void test(vector_t keys, const unsigned count) {
GEN bound = strtoi("25413151665722220203610173826311975594790577398151861612310606875883990655261658217495681782816066858410439979225400605895077952191850577877370585295070770312182177789916520342292660169492395314400288273917787194656036294620169343699612953311314935485124063580486497538161801803224580096");
GEN g_q_a = strtoi("4008431686288539256019978212352910132512184203702279780629385896624473051840259706993970111658701503889384191610389161437594619493081376284617693948914940268917628321033421857293703008209538182518138447355678944124861126384966287069011522892641935034510731734298233539616955610665280660839844718152071538201031396242932605390717004106131705164194877377");
GEN g_q_b = negi(strtoi("3117991088204303366418764671444893060060110057237597977724832444027781815030207752301780903747954421114626007829980376204206959818582486516608623149988315386149565855935873517607629155593328578131723080853521348613293428202727746191856239174267496577422490575311784334114151776741040697808029563449966072264511544769861326483835581088191752567148165409"));
GEN g_q_c = strtoi("7226982982667784284607340011220616424554394853592495056851825214613723615410492468400146084481943091452495677425649405002137153382700126963171182913281089395393193450415031434185562111748472716618186256410737780813669746598943110785615647848722934493732187571819575328802273312361412673162473673367423560300753412593868713829574117975260110889575205719");
GEN g_q = qfi(g_q_a, g_q_b, g_q_c);
size_t numthread = MAX_THREAD;
if (numthread > count) {
numthread = count;
omp_set_num_threads(numthread);
}
struct pari_thread pth[numthread];
for (size_t i = 0; i < numthread; i++) {
pari_thread_alloc(&pth[i], 100000000, NULL);
}
#pragma omp parallel
{
int thnum = omp_get_thread_num();
if (thnum) {
(void) pari_thread_start(&pth[thnum]);
}
vector_t keys_private = vector_init(count / numthread);
#pragma omp for schedule(static)
for (size_t i = 0; i < count; i++) {
cl_key_pair_t key_pair_i;
cl_key_pair_null(key_pair_i);
cl_key_pair_new(key_pair_i);
key_pair_i->sk = randomi(bound);
key_pair_i->pk = nupow(g_q, key_pair_i->sk, NULL);
vector_add(keys_private, key_pair_i);
}
#pragma omp for ordered
for (size_t i = 0; i < numthread; i++) {
#pragma omp ordered
{
vector_copy(keys, keys_private);
}
}
if (thnum) {
pari_thread_close();
}
}
for (size_t i = 0; i < numthread; i++) {
if (&pth[i] != NULL) pari_thread_free(&pth[i]);
}
}
int main(void) {
pari_init(1000000000, 2);
setrand(getwalltime());
vector_t keys = vector_init(MAX_COUNT);
uint64_t start_time = mtimer();
test(keys, MAX_COUNT);
uint64_t stop_time = mtimer();
double total_time = ((stop_time - start_time) / CLOCK_PRECISION);
printf("Elapsed time: %.5lf s\n", total_time);
printf("\nvector_size(keys): %u\n", vector_size(keys));
for (size_t i = 0; i < vector_size(keys); i++) {
cl_key_pair_t key_pair_i = (cl_key_pair_t) vector_get(keys, i);
printf("%zu->sk: %s\n", i, GENtostr(key_pair_i->sk));
printf("%zu->pk: %s\n", i, GENtostr(key_pair_i->pk));
}
vector_free(keys);
pari_close();
return 0;
}
任何想法如何使用GEN
insidestruct
和 with OpenMP
?
解决方案
该修复涉及删除以下行:
for (size_t i = 0; i < numthread; i++) {
if (&pth[i] != NULL) pari_thread_free(&pth[i]);
}
我在 Bill Allombert 的 PARI/GP 邮件列表中收到了这个修复,他还提供了以下解释:
每个线程都使用自己的 PARI 堆栈来存储结果。调用 pari_thread_free 会破坏 PARI 堆栈线程以及该线程计算的任何结果。因此,无论您想保留什么结果,都需要在调用 pari_thread_free 之前使用 gcopy 将其移动到主堆栈。
实际上这就是 API 有 pari_thread_alloc 的原因:允许线程堆栈在 pari_thread_close 中存活,以便主线程获得结果。
推荐阅读
- javascript - 使用 Route 组件中的 react Router 设置父级的状态
- android - DJI 相机位置相对于 Phatom DJI 4 位置
- r - Tidy 不起作用,出现此错误:No tidy method for objects of class LDA_Gibbs
- android - 获取操作 GET_CONTENT 的文件路径
- linux - 如何在 Linux 中将 R 版本从 3.5 降级到 3.4?
- react-native - 反应原生 fs ios 下载超时
- java - 调度程序 servlet 中未找到映射错误
- javascript - three.js 对象淡入/淡出或应用不透明度
- javascript - 如何使用 JavaScript 将两个值添加到数组中
- c++ - 遍历 unordered_maps cpp 的无序映射中的元素