c - 带有超过 2GB RAM 限制的 AVX 和 AVX2 指令的 C 程序
问题描述
我正在编写一个 C 程序来定位以“-symbol”开头并以“-symbol”或双换行符 \n 结尾的引用文本。为了让它尽可能快,我使用了 AVX 和 AVX2 指令。我通过 SSH 在其上运行它的 PC 会使用超过 2GB 的 RAM 启动任何进程。
我不明白我的代码的哪一部分正在使用所有这些内存。谁能指导我正确的方向或给我一些见解?
这是被调用的功能。我使用的文本文件是一个 1.2GB 的 txt 文件。
检测报价.c
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <malloc.h>
#include <time.h>
#define DEFINE_INTERFACE 0
#define bad_call(exec) { print_usage(exec); return 1; }
#define bad_alloc() { fprintf(stderr, "out of memory\n"); exit(EXIT_FAILURE); }
#define test_alloc(expr) { if (!(expr)) { bad_alloc(); } }
void
print_usage(const char *exec)
{
printf("Usage: %s (FILE|-)\n", exec);
printf(" EXAMPLE: %s text.txt\n", exec);
printf(" EXAMPLE: gzip -dc text.txt.gz | %s -\n", exec);
printf(" EXAMPLE: echo \"my interesting \\\"test\\\"\" | %s -\n", exec);
}
// -- Text data structure -----------------------------------------------------
typedef struct {
/** The text data to analyze. */
char *characters;
/** The number of characters in this text. */
size_t len;
/** The capacity of the `characters` buffer.*/
size_t cap;
} Text;
Text
alloc_text()
{
Text text;
text.len = 0;
text.cap = 1024 * 512; // half a megabyte
text.characters = aligned_alloc(32, sizeof(char) * text.cap);
test_alloc(text.characters);
memset(text.characters, 0, text.cap);
return text;
}
void
require_capacity(Text *text, size_t cap)
{
// maintain an aditional 64 bytes of zeros after the last character
if (text->cap > cap + 64)
return;
char *new_characters = aligned_alloc(32, sizeof(char) * text->cap * 2);
test_alloc(new_characters);
memset(new_characters, 0, text->cap * 2);
memcpy(new_characters, text->characters, sizeof(char) * text->len);
free(text->characters);
text->cap *= 2;
text->characters = new_characters;
}
void
dealloc_text(Text *text)
{
text->cap = 0;
text->len = 0;
free(text->characters);
text->characters = NULL;
}
void
load_text(char *file, Text *text)
{
FILE *fh;
if (strcmp(file, "-") == 0)
{
fh = stdin;
}
else
{
fh = fopen(file, "r");
}
if (fh == NULL)
{
fprintf(stderr, "cannot open file %s\n", file);
exit(EXIT_FAILURE);
}
size_t chars_read = 0;
while (1)
{
require_capacity(text, text->len + 1024);
char *buf = text->characters + text->len;
if ((chars_read = fread(buf, sizeof(char), 1024, fh)) == 0)
break;
if (ferror(fh))
{
fprintf(stderr, "error reading file %s\n", file);
exit(EXIT_FAILURE);
}
text->len += chars_read;
}
if (strcmp(file, "-") != 0)
fclose(fh);
}
void
print_text(Text *text)
{
fwrite(text->characters, sizeof(char), text->len, stdout);
}
void
print_text_quote(Text *text, int quote_begin, int quote_end)
{
if (quote_begin >= quote_end)
{
fprintf(stderr, "invalid quote begin and end positions: %d and %d\n",
quote_begin, quote_end);
exit(EXIT_FAILURE);
}
char *begin = text->characters + quote_begin;
int len = quote_end - quote_begin + 1;
printf("<<<");
fwrite(begin, sizeof(char), len, stdout);
printf(">>>");
putchar('\n');
}
// -- Output data structure ---------------------------------------------------
typedef struct {
size_t begin;
size_t end;
} OutputPair;
typedef struct {
OutputPair *pairs;
size_t len;
size_t cap;
} Output;
Output
alloc_output()
{
Output output;
output.len = 0;
output.cap = 1024;
output.pairs = malloc(sizeof(OutputPair) * output.cap);
test_alloc(output.pairs);
return output;
}
/** Function to push a quote starting at position `begin` and ending
* at postion `end` to the output. */
void
push_pair(Output *output, int begin, int end)
{
if (output->len == output->cap)
{
output->cap *= 2;
output->pairs = realloc(output->pairs, sizeof(Output) * output->cap);
test_alloc(output->pairs);
}
output->pairs[output->len].begin = begin;
output->pairs[output->len].end = end;
output->len++;
}
void
dealloc_output(Output *output)
{
output->len = 0;
output->cap = 0;
free(output->pairs);
output->pairs = NULL;
}
// ---External symbols: DEFINED IN `submission.c` -------------
void
detect_quotes(Text *text, Output *output);
#include "submission.c"
// ----------------------------------------------------------------------------
int
main(int argc, char **args)
{
if (argc != 2) bad_call(args[0]);
char *file = args[1];
Text text = alloc_text();
Output output = alloc_output();
load_text(file, &text);
//print_text(&text);
clock_t begin, end;
double time_spent;
begin = clock();
detect_quotes(&text, &output);
end = clock();
time_spent = (double)(end - begin) / CLOCKS_PER_SEC;
// Debug print: print out all the quotes that were found
// for (size_t i = 0; i < output.len; ++i)
// {
// size_t b = output.pairs[i].begin;
// size_t e = output.pairs[i].end;
// printf("quote %lu, %lu: ", b, e);
// print_text_quote(&text, b, e);
// }
printf("\n%lu quotes detected in %f seconds", output.len, time_spent);
printf("\nThat is %.2f MiB/s\n", (text.len / (1024*1024))/time_spent);
dealloc_output(&output);
dealloc_text(&text);
}
This is the main function
int main(int argc, char **args){
if (argc != 2) bad_call(args[0]);
char *file = args[1];
Text text = alloc_text();
Output output = alloc_output();
load_text(file, &text);
//print_text(&text);
clock_t begin, end;
double time_spent;
begin = clock();
detect_quotes(&text, &output);
end = clock();
time_spent = (double)(end - begin) / CLOCKS_PER_SEC;
// Debug print: print out all the quotes that were found
// for (size_t i = 0; i < output.len; ++i)
// {
// size_t b = output.pairs[i].begin;
// size_t e = output.pairs[i].end;
// printf("quote %lu, %lu: ", b, e);
// print_text_quote(&text, b, e);
// }
printf("\n%lu quotes detected in %f seconds", output.len, time_spent);
printf("\nThat is %.2f MiB/s\n", (text.len / (1024*1024))/time_spent);
dealloc_output(&output);
dealloc_text(&text);
}
和提交.c
#include <immintrin.h>
#include <stdbool.h>
#include "stdint.h"
#ifndef DEFINE_INTERFACE
#include <stdio.h>
typedef struct {
char *characters;
size_t len;
size_t cap;
} Text;
typedef struct Output Output;
void
push_pair(Output *output, int begin, int end);
#endif
void __attribute__ ((noinline))
detect_quotes(Text *text, Output *output) {
__m256i q32 = _mm256_set1_epi8(0x22);
__m256i nl32 = _mm256_set1_epi8(0x0A);
int len32 = text->len / 32 * 32;
len32 = 0;
int start = 0;
bool isStartSet = false;
bool isNlActive = false;
unsigned int nlPrevLast = 0;
unsigned int nlThisLast = 0;
int i = 0;
for (; i < len32; i += 32) {
nlCmp = _mm256_loadu_si256((__m256i *) &text->characters[i]);
__m256i qCmp = _mm256_cmpeq_epi8(nlCmp, q32);
__m256i nlCmp = _mm256_cmpeq_epi8(nlCmp, nl32);
unsigned int qMask = _mm256_movemask_epi8(qCmp);
unsigned int nlMask = _mm256_movemask_epi8(nlCmp);
nlThisLast = (nlMask & 1 << 31) >> 31;
nlMask = nlMask & ((nlMask >> 1) | nlPrevLast);
if (qMask != 0 || (isStartSet && nlMask != 0)) {
for (int j = 0; j < 32; ++j) {
if (qMask & 1) {
if (isStartSet) {
push_pair(output, start, i + j);
isStartSet = false;
} else {
start = i + j;
isStartSet = true;
}
} else if (nlMask & 1) {
if (isStartSet) {
push_pair(output, start, i + j);
isStartSet = false;
}
}
qMask >>= 1;
nlMask >>= 1;
}
}
nlPrevLast = nlThisLast;
}
if (nlPrevLast != 0){
isNlActive = true;
}
for ( ; i < text->len; i++) {
if (text->characters[i] == '"') {
if (isStartSet) {
push_pair(output, start, i);
isStartSet = false;
} else {
start = i;
isStartSet = true;
}
isNlActive = false;
} else if (text->characters[i] == '\n') {
if (isStartSet) {
if (isNlActive) {
push_pair(output, start, i);
} else {
isNlActive = true;
}
}
} else {
isNlActive = false;
}
}
}
它们是使用这个 make 文件编译的
CC = gcc
CFLAGS = -Wpedantic -Wall -Wextra -std=c11 -mavx -mavx2 -O3
detect_quotes: detect_quotes.c submission.c
$(CC) $(CFLAGS) -o detect_quotes detect_quotes.c
detect_quotes_g: detect_quotes.c submission.c
$(CC) $(CFLAGS) -g -o detect_quotes_g detect_quotes.c
detect_quotes_gprof: detect_quotes.c submission.c
$(CC) $(CFLAGS) -g -pg -o detect_quotes_gprof detect_quotes.c
最后,可以从我的 Google Drive 下载文本文件的示例,这是一个 41 MB 的 txt 文件。
要在编译后运行程序,使用 txt 文件作为第一个也是唯一的参数:detect_quotes.exe gutenbergsmall.txt
解决方案
推荐阅读
- android - 多行文本视图与其基线对齐(ConstraintLayout Android Studio)
- python - Python - 绑定未检测到 Tkinter event_generate
- c# - 在 JsonConverter.ReadJson 中读取原始输入
- java - 如何在 Spring Integration DSL 中为通道设置多个消息处理程序?
- arduino - 在使用 SD 卡读取文件时需要帮助
- xml - 如何使用python根据subelem文件删除xml中的元素?
- ios - 如何将数据从集合视图单元传递到新的视图控制器(以编程方式且没有情节提要)?
- css - 如何在保持线条居中的同时对齐线条上方和下方的文本?
- javascript - 如何将带有表单控件的表单组动态添加到表单组
- css - 使用 CSS 显示姓名首字母