首页 > 解决方案 > 如何将 OpenMP 与 PARI/GP 一起使用?

问题描述

我想并行化一个包含来自 PARI/GP 库的一些操作以及其他一些操作的 for 循环。但是,将 OpenMP 与 PARI/GP 一起使用似乎并不是很简单。我发现的唯一类似的例子是this,它实际上似乎是sections从 OpenMP 中使用的。parallel for正如您在下面看到的,我尝试使用子句做一些事情,但是,它会在 PARI/GP 上引发分段错误。

#include <omp.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <time.h>
#include "pari/pari.h"

#define MAX_THREAD      8
#define MAX_COUNT       8
#define CLOCK_PRECISION 1E9

typedef struct vector {
  void **items;
  unsigned int capacity;
  unsigned int size;
} vector_st;

typedef vector_st *vector_t;

vector_t vector_init(unsigned int capacity) {
  if (capacity <= 0) {
    fprintf(stderr, "Error: invalid vector capacity.\n");
    exit(1);
  }

  vector_t v = (vector_t) calloc(1, sizeof(vector_st));
  v->capacity = capacity;
  v->size = 0;
  v->items = malloc(sizeof(void *) * v->capacity);
  if (v->items == NULL) {
    fprintf(stderr, "Error: could not allocate memory.\n");
    exit(1);
  }

  return v;
}

unsigned int vector_size(vector_t v) {
  return v->size;
}


void *vector_get(vector_t v, unsigned int index) {
  if (index >= v->size) {
    fprintf(stderr, "Error: index larger than the vector size.\n");
    exit(1);
  }
  return v->items[index];
}

void vector_resize(vector_t v, unsigned int capacity) {
#ifdef DEBUG
  printf("Vector resize, from %u to %u.\n", v->capacity, capacity);
#endif

  void **items = realloc(v->items, sizeof(void *) * capacity);
  if (items) {
    v->items = items;
    v->capacity = capacity;
  } else {
    fprintf(stderr, "Error: could not re-allocate memory.\n");
    exit(1);
  }
}

void vector_add(vector_t v, void *item) {
  if (v->capacity == v->size) {
    vector_resize(v, v->capacity * 2);
  }
  v->items[v->size++] = item;
}

void vector_copy(vector_t c, vector_t a) {
  unsigned int i;
  for (i = 0; i < vector_size(a); i++) {
    vector_add(c, vector_get(a, i));
  }
}

void vector_free(vector_t v) {
  if (v) {
    if (v->items) {
      free(v->items);
    }

    v->capacity = 0;
    v->size = 0;

    free(v);
    v = NULL;
  }
}

typedef struct {
  GEN sk;
  GEN pk;
} cl_key_pair_st;

typedef cl_key_pair_st *cl_key_pair_t;

#define cl_key_pair_null(key_pair) key_pair = NULL;

#define cl_key_pair_new(key_pair)                     \
  do {                                                \
    key_pair = malloc(sizeof(cl_key_pair_st));        \
    if (key_pair == NULL) {                           \
      exit(1);                                        \
    }                                                 \
  } while (0)

#define cl_key_pair_free(key_pair)                    \
  do {                                                \
    free(key_pair);                                   \
    key_pair = NULL;                                  \
  } while (0)


uint64_t mtimer(void) {
    struct timespec time;
    clock_gettime(CLOCK_MONOTONIC, &time);
    return (uint64_t) (time.tv_sec * CLOCK_PRECISION + time.tv_nsec);
}

void test(vector_t keys, const unsigned count) {
  GEN bound = strtoi("25413151665722220203610173826311975594790577398151861612310606875883990655261658217495681782816066858410439979225400605895077952191850577877370585295070770312182177789916520342292660169492395314400288273917787194656036294620169343699612953311314935485124063580486497538161801803224580096");
  GEN g_q_a = strtoi("4008431686288539256019978212352910132512184203702279780629385896624473051840259706993970111658701503889384191610389161437594619493081376284617693948914940268917628321033421857293703008209538182518138447355678944124861126384966287069011522892641935034510731734298233539616955610665280660839844718152071538201031396242932605390717004106131705164194877377");
  GEN g_q_b = negi(strtoi("3117991088204303366418764671444893060060110057237597977724832444027781815030207752301780903747954421114626007829980376204206959818582486516608623149988315386149565855935873517607629155593328578131723080853521348613293428202727746191856239174267496577422490575311784334114151776741040697808029563449966072264511544769861326483835581088191752567148165409"));
  GEN g_q_c = strtoi("7226982982667784284607340011220616424554394853592495056851825214613723615410492468400146084481943091452495677425649405002137153382700126963171182913281089395393193450415031434185562111748472716618186256410737780813669746598943110785615647848722934493732187571819575328802273312361412673162473673367423560300753412593868713829574117975260110889575205719");
  GEN g_q = qfi(g_q_a, g_q_b, g_q_c);

  size_t numthread = MAX_THREAD;
  if (numthread > count) {
    numthread = count;
    omp_set_num_threads(numthread);
  }

  struct pari_thread pth[numthread];
  for (size_t i = 0; i < numthread; i++) {
    pari_thread_alloc(&pth[i], 100000000, NULL);
  }

  #pragma omp parallel
  {
    int thnum = omp_get_thread_num();
    if (thnum) {
      (void) pari_thread_start(&pth[thnum]);
    }

    vector_t keys_private = vector_init(count / numthread);

    #pragma omp for schedule(static)
    for (size_t i = 0; i < count; i++) {
      cl_key_pair_t key_pair_i;
      cl_key_pair_null(key_pair_i);
      cl_key_pair_new(key_pair_i);

      key_pair_i->sk = randomi(bound);
      key_pair_i->pk = nupow(g_q, key_pair_i->sk, NULL);
      vector_add(keys_private, key_pair_i);
    }

    #pragma omp for ordered
    for (size_t i = 0; i < numthread; i++) {
      #pragma omp ordered
      {
        vector_copy(keys, keys_private);
      }
    }

    if (thnum) {
      pari_thread_close();
    }
  }

  for (size_t i = 0; i < numthread; i++) {
    if (&pth[i] != NULL) pari_thread_free(&pth[i]);
  }
}

int main(void) {
  pari_init(1000000000, 2);
    setrand(getwalltime());

  vector_t keys = vector_init(MAX_COUNT);

  uint64_t start_time = mtimer();
  test(keys, MAX_COUNT);
  uint64_t stop_time = mtimer();
  double total_time = ((stop_time - start_time) / CLOCK_PRECISION);
  printf("Elapsed time: %.5lf s\n", total_time);

  printf("\nvector_size(keys): %u\n", vector_size(keys));
  for (size_t i = 0; i < vector_size(keys); i++) {
    cl_key_pair_t key_pair_i = (cl_key_pair_t) vector_get(keys, i);
    printf("%zu->sk: %s\n", i, GENtostr(key_pair_i->sk));
    printf("%zu->pk: %s\n", i, GENtostr(key_pair_i->pk));
  }

  vector_free(keys);
  pari_close();
  return 0;
}

任何想法如何使用GENinsidestruct和 with OpenMP

标签: cparallel-processingopenmppari

解决方案


该修复涉及删除以下行:

for (size_t i = 0; i < numthread; i++) {
  if (&pth[i] != NULL) pari_thread_free(&pth[i]);
}

我在 Bill Allombert 的 PARI/GP 邮件列表中收到了这个修复,他还提供了以下解释:

每个线程都使用自己的 PARI 堆栈来存储结果。调用 pari_thread_free 会破坏 PARI 堆栈线程以及该线程计算的任何结果。因此,无论您想保留什么结果,都需要在调用 pari_thread_free 之前使用 gcopy 将其移动到主堆栈。

实际上这就是 API 有 pari_thread_alloc 的原因:允许线程堆栈在 pari_thread_close 中存活,以便主线程获得结果。


推荐阅读