c - 从文件中删除尾随和前导空格
问题描述
我正在尝试从长度未知的文本文件中读取行。在该行中,可以有前导和尾随空格,直到字符串出现。所以我的第一步是逐行读取并为字符串分配内存。然后删除所有前导和尾随空格。之后,我想检查字符串中是否包含任何无效字符的空白字符。例如,字符串不能看起来像这样"bad string"
,但可以看起来像这样"goodstring"
。但是,当我调用该函数来删除前导和尾随空格时,它也会删除空格之前或之后的字符。
有人可以告诉我我做错了什么吗?
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define NCHAR 64
char *readline (FILE *fp, char **buffer);
char *strstrip(char *s);
int main (int argc, char **argv) {
char *line = NULL;
size_t idx = 0;
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) {
fprintf (stderr, "error: file open failed '%s'.\n", argv[1]);
return 1;
}
while (readline (fp, &line)) { /* read each line in 'fp' */
printf (" line[%2zu] : %s\n", idx++, line);
free (line);
line = NULL;
}
if (fp != stdin) fclose (fp);
return 0;
}
/* read line from 'fp' allocate *buffer NCHAR in size
* realloc as necessary. Returns a pointer to *buffer
* on success, NULL otherwise.
*/
char *readline (FILE *fp, char **buffer)
{
int ch;
size_t buflen = 0, nchar = NCHAR;
size_t n;
char *invalid_character = " ";
*buffer = malloc (nchar); /* allocate buffer nchar in length */
if (!*buffer) {
fprintf (stderr, "readline() error: virtual memory exhausted.\n");
return NULL;
}
while ((ch = fgetc(fp)) != '\n' && ch != EOF)
{
(*buffer)[buflen++] = ch;
if (buflen + 1 >= nchar) { /* realloc */
char *tmp = realloc (*buffer, nchar * 2);
if (!tmp) {
fprintf (stderr, "error: realloc failed, "
"returning partial buffer.\n");
(*buffer)[buflen] = 0;
return *buffer;
}
*buffer = tmp;
nchar *= 2;
}
strstrip(*buffer); //remove traiing/leading spaces
}
(*buffer)[buflen] = 0; /* nul-terminate */
if (invalid_character[n = strspn(invalid_character, *buffer)] == '\0') //check if a string has invalid character ' ' in it
{
puts(" invalid characters");
}
if (buflen == 0 && ch == EOF) { /* return NULL if nothing read */
free (*buffer);
*buffer = NULL;
}
return *buffer;
}
char *strstrip(char *s)
{
size_t size;
char *end;
size = strlen(s);
if (!size)
return s;
end = s + size - 1;
while (end >= s && isspace(*end))
end--;
*(end + 1) = '\0';
while (*s && isspace(*s))
s++;
return s;
}
解决方案
您无需担心传递给 的字符串的长度strstrip()
,只需遍历字符串中的所有字符即可删除空格字符,例如以下版本会从 中删除所有空格s
:
/** remove ALL leading, interleaved and trailing whitespace, in place.
* the original start address is preserved but due to reindexing,
* the contents of the original are not preserved. returns pointer
* to 's'. (ctype.h required)
*/
char *strstrip (char *s)
{
if (!s) return NULL; /* valdiate string not NULL */
if (!*s) return s; /* handle empty string */
char *p = s, *wp = s; /* pointer and write-pointer */
while (*p) { /* loop over each character */
while (isspace ((unsigned char)*p)) /* if whitespace advance ptr */
p++;
*wp++ = *p; /* use non-ws char */
if (*p)
p++;
}
*wp = 0; /* nul-terminate */
return s;
}
(注意:如果 to 的参数isspace()
是 type char
,unsigned char
则需要强制转换,请参阅NOTES部分,例如man 3 isalpha)
仅删除多余的空白
以下版本删除了前导和尾随空格并将多个空格序列折叠为一个空格:
/** remove excess leading, interleaved and trailing whitespace, in place.
* the original start address is preserved but due to reindexing,
* the contents of the original are not preserved. returns pointer
* to 's'. (ctype.h required) NOTE: LATEST
*/
char *strstrip (char *s)
{
if (!s) return NULL; /* valdiate string not NULL */
if (!*s) return s; /* handle empty string */
char *p = s, *wp = s; /* pointer and write-pointer */
while (*p) {
if (isspace((unsigned char)*p)) { /* test for ws */
if (wp > s) /* ignore leading ws, while */
*wp++ = *p; /* preserving 1 between words */
while (*p && isspace (unsigned char)(*p)) /* skip remainder */
p++;
if (!*p) /* bail on end-of-string */
break;
}
if (*p == '.') /* handle space between word and '.' */
while (wp > s && isspace ((unsigned char)*(wp - 1)))
wp--;
*wp++ = *p; /* use non-ws char */
p++;
}
while (wp > s && isspace ((unsigned char)*(wp - 1))) /* trim trailing ws */
wp--;
*wp = 0; /* nul-terminate */
return s;
}
(注意: s
必须是可变的,因此不能是字符串文字)
推荐阅读
- python - 如何将栅格格式的标签数据加载到 Keras/Tensorflow 中
- c++ - 如何在 C 中编译,但如果我使用任何 C++ 会抛出错误?
- c# - 在 C# 中检测 Python 缩进开始
- c++ - 使用 Qt API 获取连接到我的计算机的设备列表
- javascript - 在 React 组件中访问嵌套的 JSON 数据
- javascript - 按钮数组的onClick函数不起作用
- bazaar - 有没有人为 Windows 构建过 BZR 2.7.0?
- firebase - 如何在不登录的情况下通过 Firebase Auth 创建新用户?
- android - 防止安装特定制造商-API 组合
- r - 如何迭代地形成文本行以显示在 R 闪亮的应用程序中?