首页 > 解决方案 > C - 将逗号分隔的字符串从文件中提取到数组中 - 分段错误

问题描述

我需要从文件中读取以逗号分隔的不同字符串并将它们存储到数组中。

我有以下代码,我开发了在线阅读不同的问题。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int main (){
int N = 200; // Number of sequences 
int L = 1000; // length of sequences
char Nseq[N][L];

FILE *myfile;
char *token;
const char s[2] = ",";
char line[300];
char* filename = "pathofile.txt";
int n = 0;

myfile = fopen(filename, "r");
if (myfile == NULL) {printf("could not open file %s", filename); exit(0);}
while (fgets(line, sizeof(line), myfile) != NULL){
  token = strtok(line, s);
  while (token != NULL){
    strcpy(Nseq[n], token);
    printf("%s\t%u\n", token, n);
    token = strtok(NULL, s);
    n++;
  }
}
fclose(myfile);
for (int n=0; n<100; n++){
  printf ("%s\t%u\n", Nseq[n], n);}
}

我的文件如下(有 200 个序列):

AAAGCCGCCAAAGUAGGCGG,AAAGCCGCCAAAGUAGGCGG,AAAGCCGCCAAAGUAGGCGG,AAAGCCGCCAAAGUAGGCGG,AAAGCCGCCAAAGUAGGCGG,AAAGCCGCCAAAGUAGGCGG,AAAGCCCGCCAAAGAAGGCGG,AAAGCCCGCCAAAGAAGGCGG,AAAGCCCGCCAAAGAAGGCGG,AAAGCCCGGCCAAAGAAGGCGG,AAAGCCCGCCAAAGUAGGCGG,AAAGCCCGCCAAAGUAGGCGG,AAAGCCCGCCAGAAGUAGGCGG,AAAGCCCGCCAAAGUAGGCGG,AAAGCCCGCCAAAGUAGGCGG,AAAGCACCGCCAAUGGGCGG,AAAGCACCGCCAAUAGGCGG,AAAGCACCGCCAAUAGGCGG,AUAGCACCGCCAAUAGGCGG,AUAGCACCGCCAAUAGGCGG,AUAGCACCGCCAGUAGGCGG,AUAGCACCGCCAAUAGGCGG,AAAGCACCGCCAAAUAAGGCGGG,AAAGCACCGCCAAAUAAGGCGGG,AAAGCACCGCCAAAUAGGCGGG, AAAGCACCGCCAAAUAAGGCGG,AAAGCACCGCCAAAUAAGGCGG,AAAGCACCGCCAAAUAAGGCGG,AAAGCACCGCCAAAUAAGGCGG,AAAGCACCGCCAAAUAAGGCGG,AAAGCACCGCCAAAUAAGGCGG,AAAGCACCGCCAAAUAAGGCGG,AAAGCACCGCCAAAGCAGGCGG,AAAGCACCGCCAAAUAAGGCGG,AAAGCACCGCCAAAUAAGGCGG,AAAGCACCGCCAAAUAAGGCGG,AAAGCACCGCCAAAGAAGGCGG,AAAGCACCGCCAAAGCACCGCCAAAUAAGGCGG,AAAGCACCGCCAAAUAAGGCGG,AAAGCACCGCCAAAUAAGGCGG,AAAGCACGGCCAAAUAAGGCGG,AAAGCACCGCCAAAUAAGGCGG,AAAGCACCGCCAAUAAGGCGG,AAAGCACCGCCAAAAGUCGAGGCGG,AAAGCACCGCCAAAAUGUGAGGCGG,AAAGCACCGCCAAAUGUGAGGCGG,AAAGCACCGCCAAAAUGGUGAGGCGG,AAAGCACCGCCAAAAGUGAGGCGG,AAAGCACCGCCAAAAGUGAGGCGG,AAAGCACCGCCAAAAGUGAGGCGG,AAAGCACCGCCAAAAGUGAGGCGG,AAAGCACCGCCAAAAGUAAGGCGG,AAAGACCGCCAAAAGUAAGGCGG,AAAGCACCGCCAAAAGUAAGGCGG,AAAGCACCGCCAAAAGUAAGGCGG,AAAGCACCGCCAAAGUUAAGGCGG,AAAGCACCGCCAAAGUAAGGCGG,AAAGCACCGCCAAAGUAAGGCGG,AAAGCACCGCCAAAGUAAGGCGG,UAACGCCGGCCAACUAGGGCGG,AACAGCCCGGCCAAAUAGGGCGG,AAAGCCGCCAAACUGGCGG, AAAGCCGCCAAACUGGCGG,AAACCGCCCAAAUAGGGCGG,AAAGCCGCCCAAAUAGGGCGG,AAAGCCGCCCAAAUAGGCGG,AAAGCCGCCAAAUAGGCGG,AAAGCCGCCAAAUAGGCGG,AAAGCCGCCCAAAUAGGCGG,AAAUCCGCCCAAAUAGGCGG,UAAAGCCGCCCUAAAUAGGCGG,AAAGCCGCGCAAAUAGGCGG,AAAGCCGCCCCAAAUAGGCGG,AAAGCCCCAAAUAAGGCGG,AAAGCCGCCCAAAUAGGCGUG,AAAGCCGCCCAAAUAGGCGG,AAAGCCGCCCAAAUAGGCGG,AAAGCCGCCCAAAUAGGCGG,AAAGCCGCCCAAAUAGGCGG,AAAGCCGCCAAAUAGGCGG,AAAGCCGCCAAAUAGGCGG,AAAGCCGCCAAAUAGGCGG,AAAGCCGCCCAAAUAGGCGG,AAAGCCGCCAAAUGGCGGA,AAAGCCGCCAACCGGCGG,AAAGCCGCCAACCGGCGG,AAAGCCGCCAACCGGCGG,AAAGCCGUCAACCGGCGG,AAAGCCGCCAACCGGCGG,AAAGCCGCCAACCGGCGG,AAAGCGCCAACCGGCGG,AAAGCCGCCAACCGGCGG,AAAGCCGCCAACCGGCGG,AAAGCCGCCAACCGGCGG,CACUGCCGGCCAAGUCGGCGG,CAUUGCCGGCCAAGUCGGCGG,CACUGCCGGCCAAGUCGGCGG,CAUGCCGGCCAAGUCGGCGG,CACUCCGGCCAAGUCGGCGG, CACUGCCGGCCAAGUCGGCGG,CACUGCCGGACCAAGUCGGCGG,CACUGCCGGCCAAGUCGGCGG,UCAAUUGCCGGCCAAGUCGGCGG,UCAAUUGCCGGCCAAGUCGGCGG,UUUAAGGCCGCACAUGCGGCCGUG,UUAAGGCCGGAAACAUUCGGCCGUG,UUAAGGCCGCACAUUCGGCCGGG,UUAAGGCCGCACAUUCGGCCGGG,UUAAGGCCGCACAUUCGGCCGGG,UUAAAAGGCCGACAUUGCGGCCGGG,UUAAAGGCCGACAUUGCGGCCGGG,UUAAGUCCGCACAUUCGGCCGGG,UUAAGGCCGCACAUUCGGCCGGG,UUAAGGCCGCACAUUCGGCCGGG,UUAAGGCCGCACAUUCGGCCGGG,UUAAGGCCGCACAUUCGGCCGGG,UUAAGGCCGCACAUCGGCCGGG,UAAGGCCGCACAUUCGGCCGGG,UAAGGCCGCACAUUCGGCCGGG,UAAGGCCGGCACAUUCGGCCGGG,UAAGGCCGCACAUUCGGCCGGG,UAAGGCCGCACAUUCGGCCGGG,UAAGGCCGCACAUUCGGCCGGG,UAAGGCCGCACAUUCGGCCGGG,UAAGGCCGCACAUUCGGCCGGG,UAAGGCCGCACAUGUCGGCCGGGU,UAAGGCCGCACAUUCGGCCGGG,UAAGGCCGCACAUUCGGCCGGG,UAGGCCGCAAGUCGGCCGGG,UAGGCCGCAAGCGGCCGGG,UAGGCCGCAAGCGGCCGGG,UAGGCCGCAAGCGGCCGGG,UAGGCCGCAAGUCGGCCGGG,UAGGCCGCAAGUCGGCCGGG,UAGGCCGCAAGUCGGCCGGG,UAGGCCGCAAGUCGGCCGGG,GAUCGGCCGGCAGCCUCCCGGCGG,GAUCGGCCGGCAGCCUCCCGGCGG, GAUCGGCCGGCAGCCUCCCGGCGG,GAUCGGCCGGCAGCCUCCCGGCGG,GAUCGGCCCGGCAGCCUCCCGGCGG,GAUCGGCCCGGCAGCCUCCCGGCGG,GAUCGGCCGGCAGCCGUACCGGGCGG,AGAUCGGCCGGCAGCCGUACCGGGCGG,GAUCGGCCGGCAGCCGUACCGGGCGG,UAUCGGCCGGCACCGUACCGGGGG,UAUCGGCCGGCACCGUACCGGCGGG,UAUCGGCGGCACCGUACCGGCGGG,UCGCGGCACCGUACCGGCGGGUAUCGCCGGCACCGUACCGGCGGG,AUUAGGGCCGCCAUAACGGCGG,AUUAGGGCCGCCAAUAACGGCGG,AUUAGGGCCGCCUAUAACGGCGG,GUGUUGCGUGCCGCCUUAAGGCG,GUGUUGCGUGCGCCUUAAGGCG,GUGUUGCGUGCCGCCUUAAGGCG,GUGUUGCGUGCCGCCUUAAGGCG,GUGUUGCGUGCCGCCUUAAGGCG,GUGUUGCGUGCCGCCUUAAGGCG,GUGUUGCGUGCCGCCUUAAGGCG,GUGUUGCGUGCCGUCUGAAGGCG,GUGUUGCGUGCCGCCUUAAUGCG,GUGUUGCGUGCCGCCUUAAGGCG,GUGUUGCGUGCCGCCUUAAGGCG,GUGUUGCGUGCCGCCUUAAGGCG,GUGUUGCGUGCCGCCUUAAGGCG,GUGUUGCGUGCCGCCUUAAGGCG,GUGUUGCGUGCCGCCUUAAGGCG,GUGUUGCGUGCCGCCCUUAAGGCG,GUGUUGCGUGCCGCCUUAAGGCG,GUGUUGCGUGCCGCCUUAAGGCG,GUGUUGCGUGCGCCUUAAGGCG,CUGUUGCGUGCCGCCUUAAGGCG,CUGUUGCGUGCCGCCUUAAGGCG, CUGUUGCGUGCCGCCUUAAGGCG,GUGUUGCGUGCCGCCGUUACAGGCG,GUGUUGCGUGCCGCCGUUACAGGCG,GUGUUGCGUGCCGCCUUAAGGCG,GUGUUGCGUGCCGCCUUAAGGCG,GUGUUGCGUGCCGCCUUAAGGCG,UUGGUCCGCCUUACGGCGGG,UUGGUCCGCCUUACGGCGGG,UUGGUCCGCCCUUACGGCGGGG,UGGUCCGUUCGACGGCGGGG,GUUGUAGCCCGCCUUCGGCGGG,GUUGUUGCCGCCUUACGGCGG,GUUGUUGCCGCCUUACGGCGG,GUUGUUGCCGCCUUACGGCGG,GUUGUUGCCGCCUGACGGCGG,GUUGUUGCCGCCUGACGGCGG,GUUGUUGCCGCCUGACGGCGG,GUUGUUGCCGCCUGACGGCGG,GUUGUUUGCCGCCUGACGGCGG,GUUCCUUGCCAGCCUUACGGCGG,CUUGCGCCGCCUUACGGCGUG,CUUGCGCCGCCUUACGGCGUG,CUUGCGCCGCCUUACGGCGUG,CUUGCGCAGCCUUACGGCGUG,

当我运行代码时,我得到:

AAAGCCGCCAAAGUAGGCGG    0
AAAGCCGCCAAAGUAGGCGG    1
AAAGCCGCCAAAGUAGGCGG    2
AAAGCCGCCAAAGUAGGCGG    3
AAAGCCGCCAAAGUAGGCGG    4
AAAGCCGCCAAAGUAGGCGG    5
AAAGCCCGCCAAAGAAGGCGG   6
AAAGCCCGCCAAAGAAGGCGG   7
AAAGCCCGCCAAAGAAGGCGG   8
AAAGCCCGGCCAAAGAAGGCGG  9
AAAGCCCGCCAAAGUAGGCGG   10
AAAGCCCGCCAAAGUAGGCGG   11
AAAGCCCGCCAGAAGUAGGCGG  12
AAAGCCCGCCAAAGUAG       13
GCGG    14
AAAGCCCGCCAAAGUAGGCGG   15
AAAGCACCGCCAAUGGGCGG    16
AAAGCACCGCCAAUAGGCGG    17
AAAGCACCGCCAAUAGGCGG    18
AUAGCACCGCCAAUAGGCGG    19
AUAGCACCGCCAAUAGGCGG    20
AUAGCACCGCCAGUAGGCGG    21
AUAGCACCGCCAAUAGGCGG    22
AAAGCACCGCCAAAUAAGGCGGG 23
AAAGCACCGCCAAAUAAGGCGGG 24
AAAGCACCGCCAAAUAGGCGGG  25
AAAGCACCGCCAAAUAAGGCGG  26
AAAGCACCGCCAAAUAAGGCGG  27
AAAGCACC        28
GCCAAAUAAGGCGG  29
AAAGCACCGCCAAAUAAGGCGG  30
AAAGCACCGCCAAAUAAGGCGG  31
AAAGCACCGCCAAAUAAGGCGG  32
AAAGCACCGCCAAAUAAGGCGG  33
AAAGCACCGCCAAAUAAGGCGG  34
AAAGCACCGCCAAAUAAGGCGG  35
AAAGCACCGCCAAAUAAGGCGG  36
AAAGCACCGCCAAAUAAGGCGG  37
AAAGCACCGCCAAAUAAGGCGG  38
AAAGCACCGCCAAAUAAGGCGG  39
AAAGCACCGCCAAAUAAGGCGG  40
AAAGCACCGCCAAAUAAGGCGG  41
AAAGCACC        42
GCCAAAUAAGGCGG  43
AAAGCACGGCCAAAUAAGGCGG  44
AAAGCACCGCCAAAUAAGGCGG  45
AAAGCACCGCCAAUAAGGCGG   46
AAAGCACCGCCAAAAGUCGAGGCGG       47
AAAGCACCGCCAAAAUGUGAGGCGG       48
AAAGCACCGCCAAAUGUGAGGCGG        49
AAAGCACCGCCAAAAUGGUGAGGCGG      50
AAAGCACCGCCAAAAGUGAGGCGG        51
AAAGCACCGCCAAAAGUGAGGCGG        52
AAAGCACCGCCAAAAGUGAGGCGG        53
AAAGCACCGCCAAAAGUGAGGCGG        54
AAAGCACCGCCA    55
AAAGUAAGGCGG    56
AAAGACCGCCAAAAGUAAGGCGG 57
AAAGCACCGCCAAAAGUAAGGCGG        58
AAAGCACCGCCAAAAGUAAGGCGG        59
AAAGCACCGCCAAAGUUAAGGCGG        60
AAAGCACCGCCAAAGUAAGGCGG 61
AAAGCACCGCCAAAGUAAGGCGG 62
AAAGCACCGCCAAAGUAAGGCGG 63
UAACGCCGGCCAACUAGGGCGG  64
AACAGCCCGGCCAAAUAGGGCGG 65
AAAGCCGCCAAACUGGCGG     66
AAAGCCGCCAAACUGGCGG     67
AAACCGCCCAAAUAGGCGG     68
AAAGCCGC        69
CCAAAUAGGCGG    70
AAAGCCGCCCAAAUAGGCGG    71
AAAGCCGCCAAAUAGGCGG     72
AAAGCCGCCAAAUAGGCGG     73
AAAGCCGCCCAAAUAGGCGG    74
AAAUCCGCCCAAAUAGGCGG    75
UAAAGCCGCCCUAAAUAGGCGG  76
AAAGCCGCGCAAAUAGGCGG    77
AAAGCCGCCCCAAAUAGGCGG   78
AAAGCCGCCCCAAAUAGGCGG   79
AAAGCCGCCCAAAUAGGCGUG   80
AAAGCCGCCCAAAUAGGCGG    81
AAAGCCGCCCAAAUAGGCGG    82
AAAGCCGCCCAAAUAGGCGG    83
AAAGCCGCCC      84
AAAUAGGCGG      85
AAAGCCGCCAAAUAGGCGG     86
AAAGCCGCCAAAUAGGCGG     87
AAAGCCGCCAAAUAGGCGG     88
AAAGCCGCCCAAAUAGGCGG    89
AAAGCCGCCAAAUGGCGGA     90
AAAGCCGCCAACCGGCGG      91
AAAGCCGCCAACCGGCGG      92
AAAGCCGCCAACCGGCGG      93
AAAGCCGUCAACCGGCGG      94
AAAGCCGCCAACCGGCGG      95
AAAGCCGCCAACCGGCGG      96
AAAGCGCCAACCGGCGG       97
AAAGCCGCCAACCGGCGG      98
AAAGCCGCCAACCGGCGG      99
AAAGCCGCCAACCGGCG       100
G       101
CACUGCCGGCCAAGUCGGCGG   102
CAUUGCCGGCCAAGUCGGCGG   103
CACUGCCGGCCAAGUCGGCGG   104
CAUGCCGGCCAAGUCGGCGG    105
CACUCCGGCCAAGUCGGCGG    106
CACUGCCGGCCAAGUCGGCGG   107
CACUGCCGGACCAAGUCGGCGG  108
CACUGCCGGCCAAGUCGGCGG   109
UCAAUUGCCGGCCAAGUCGGCGG 110
UCAAUUGCCGGCCAAGUCGGCGG 111
UUUAAGGCCGCACAUGCGGCCGUG        112
UUAAGGCCGGAAACAUUCGGCCGUG       113
UUAAGGCCGCACAUUCGGCCGGG 114
UUAAGGCCGCACAUUCGGCCGGG 115
UUAAGGCCGCACAUUCGGCCGGG 116
UUAAAAGGCCGACAUUGCGGCCGGG       117
UUAAAGGCCGACAUUGCGGCCGGG        118
UUAAGUCCGCACAUUCGGCCGGG 119
UUAAGGCCGCACAUUCGGCCGGG 120
UUAAGGCCGCACAUUCGGCCGGG 121
UUAAGGCCGCACAUUCGGCCGGG 122
UUAAGGCCGCACAUUCGGCCGGG 123
UUAAGGCCGCACAUCGGCCGGG  124
UAAGGCCGCACAUUCGGCCGGG  125
UAAGGCCGCACAUUCGGCCGGG  126
UAAGGCCGGC      127
ACAUUCGGCCGGG   128
UAAGGCCGCACAUUCGGCCGGG  129
UAAGGCCGCACAUUCGGCCGGG  130
UAAGGCCGCACAUUCGGCCGGG  131
UAAGGCCGCACAUUCGGCCGGG  132
UAAGGCCGCACAUUCGGCCGGG  133
UAAGGCCGCACAUGUCGGCCGGGU        134
UAAGGCCGCACAUUCGGCCGGG  135
UAAGGCCGCACAUUCGGCCGGG  136
UAGGCCGCAAGUCGGCCGGG    137
UAGGCCGCAAGCGGCCGGG     138
UAGGCCGCAAGCGGCCGGG     139
UAGGCCGCAAGCGGCCGGG     140
UAGGCCGCAAGUCGGCCG      141
GG      142
UAGGCCGCAAGUCGGCCGGG    143
UAGGCCGCAAGUCGGCCGGG    144
UAGGCCGCAAGUCGGCCGGG    145
GAUCGGCCGGCAGCCUCCCGGCGG        146
GAUCGGCCGGCAGCCUCCCGGCGG        147
GAUCGGCCGGCAGCCUCCCGGCGG        148
GAUCGGCCGGCAGCCUCCCGGCGG        149
GAUCGGCCCGGCAGCCUCCCGGCGG       150
GAUCGGCCCGGCAGCCUCCCGGCGG       151
GAUCGGCCGGCAGCCGUACCGGCGG       152
AGAUCGGCCGGCAGCCGUACCGGCGG      153
GAUCGGCCGGCAGCCGUACCGGCGG       154
UA      155
UCGGCCGGCACCGUACCGGGGG  156
UAUCGGCCGGCACCGUACCGGCGGG       157
UAUCGGCGGCACCGUACCGGCGGG        158
UAUCGGCCGGCACCGUACCGGCGGG       159
UAUCGCCGGCACCGUACCGGCGGG        160
AUUAGGGCCGCCAUAACGGCGG  161
AUUAGGGCCGCCAAUAACGGCGG 162
AUUAGGGCCGCCUAUAACGGCGG 163
GUGUUGCGUGCCGCCUUAAGGCG 164
GUGUUGCGUGCGCCUUAAGGCG  165
GUGUUGCGUGCCGCCUUAAGGCG 166
GUGUUGCGUGCCGCCUUAAGGCG 167
GUGUUGCG        168
UGCCGCCUUAAGGCG 169
GUGUUGCGUGCCGCCUUAAGGCG 170
GUGUUGCGUGCCGCCUUAAGGCG 171
GUGUUGCGUGCCGUCUGAAGGCG 172
GUGUUGCGUGCCGCCUUAAUGCG 173
GUGUUGCGUGCCGCCUUAAGGCG 174
GUGUUGCGUGCCGCCUUAAGGCG 175
GUGUUGCGUGCCGCCUUAAGGCG 176
GUGUUGCGUGCCGCCUUAAGGCG 177
GUGUUGCGUGCCGCCUUAAGGCG 178
GUGUUGCGUGCCGCCUUAAGGCG 179
GUGUUGCGUGCCGCCCUUAAGGCG        180
GUGUUGCGUGCCGCCUUA      181
AGGCG   182
GUGUUGCGUGCCGCCUUAAGGCG 183
GUGUUGCGUGCGCCUUAAGGCG  184
CUGUUGCGUGCCGCCUUAAGGCG 185
CUGUUGCGUGCCGCCUUAAGGCG 186
CUGUUGCGUGCCGCCUUAAGGCG 187
GUGUUGCGUGCCGCCGUUACAGGCG       188
GUGUUGCGUGCCGCCGUUACAGGCG       189
GUGUUGCGUGCCGCCUUAAGGCG 190
GUGUUGCGUGCCGCCUUAAGGCG 191
GUGUUGCGUGCCGCCUUAAGGCG 192
UUGGUCCGCCUUACGGCGGG    193
UUGGUCCGCCUUACGGCGGG    194
UUGGUCCG        195
CCUUACGGCGGG    196
UUGGUCCGCCUUACGGCGGG    197
UUCGUCCGCCUUACGGCGGG    198
GUUGUAGCCCGCCUUCGGCGGG  199
GUUGUUGCCGCCUUACGGCGG   200
GUUGUUGCCGCCUUACGGCGG   201
GUUGUUGCCGCCUUACGGCGG   202
GUUGUUGCCGCCUGACGGCGG   203
GUUGUUGCCGCCUGACGGCGG   204
GUUGUUGCCGCCUGACGGCGG   205
GUUGUUGCCGCCUGACGGCGG   206
GUUGUUUGCCGCCUGACGGCGG  207
GUUCCUUGCCAGCCUUACGGCGG 208
Segmentation fault (core dumped)

我知道我必须解决新行问题,但我不知道为什么会出现分段错误。因为它似乎有效,但我没有到达文件的末尾。关于导致这种情况的任何想法?谢谢

标签: csegmentation-fault

解决方案


由于读取缓冲区的(任意)大小,您正在拆分一些序列,因此您的程序会看到超过200 个序列,因此您的数组容纳它们太小了。


推荐阅读