c - 如何处理 c 中的换行符和分隔符间距?
问题描述
实际的文本文件只是用于测试 lex 和解析的随机内容。上面的图片是结果,控制台在运行时给我。在绿色中,它应该是换行符或分隔符时调用标识符,因此不需要任何内容。在红色中它没有识别分隔符,在黄色中它根本没有读取 something.something。我假设它与前面的 c 有关系;没有正确分开。
所以我的问题是如何正确分离标记,并识别换行符,或者我做错了什么。下面是我用来进行分离和标记化的代码。
#define _CRT_SECURE_NO_WARNINGS
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define BUFFER_SIZE 1024
// Returns 'true' if the character is a DELIMITER.
bool isDelimiter(char ch)
{
if (isspace (ch))
return (true);
return (false);
}
// Returns 'true' if the character is a SEPERATOR.
bool isSeperator(char str)
{
if (str == ',' || str == ';' || str == '>' ||
str == '<' || str == '(' || str == ')' || str == '[' || str == ']' ||
str == '{' || str == '}' || str == '.' )
return (true);
return (false);
}
// Returns 'true' if the character is an OPERATOR.
bool isOperator(char ch)
{
if (ch == '+' || ch == '-' || ch == '*' ||
ch == '/' || ch == '>' || ch == '<' ||
ch == '=')
return (true);
return (false);
}
// Returns 'true' if the string is a VALID IDENTIFIER.
bool validIdentifier(char* str)
{
if (str[0] == '0' || str[0] == '1' || str[0] == '2' ||
str[0] == '3' || str[0] == '4' || str[0] == '5' ||
str[0] == '6' || str[0] == '7' || str[0] == '8' ||
str[0] == '9' )
return (false);
return (true);
}
// Returns 'true' if the string is a KEYWORD.
bool isKeyword(char* str)
{
if (!strcmp(str, "if") || !strcmp(str, "else") ||
!strcmp(str, "while") || !strcmp(str, "do") ||
!strcmp(str, "break") || !strcmp(str, "elem") ||
!strcmp(str, "lout") || !strcmp(str, "file") ||
!strcmp(str, "console") || !strcmp(str, "read") ||
!strcmp(str, "write") || !strcmp(str, "mark") ||
!strcmp(str, "emblemnize") || !strcmp(str, "lin") ||
!strcmp(str, "send") || !strcmp(str, "dint") ||
!strcmp(str, "continue") || !strcmp(str, "int")
|| !strcmp(str, "double") || !strcmp(str, "float")
|| !strcmp(str, "return") || !strcmp(str, "char")
|| !strcmp(str, "case") || !strcmp(str, "char")
|| !strcmp(str, "sizeof") || !strcmp(str, "long")
|| !strcmp(str, "short") || !strcmp(str, "typedef")
|| !strcmp(str, "switch") || !strcmp(str, "unsigned")
|| !strcmp(str, "void") || !strcmp(str, "static")
|| !strcmp(str, "struct") || !strcmp(str, "goto"))
return (true);
return (false);
}
// Returns 'true' if the string is an INTEGER.
bool isInteger(char* str)
{
int i, len = strlen(str);
if (len == 0)
return (false);
for (i = 0; i < len; i++) {
if (str[i] != '0' && str[i] != '1' && str[i] != '2'
&& str[i] != '3' && str[i] != '4' && str[i] != '5'
&& str[i] != '6' && str[i] != '7' && str[i] != '8'
&& str[i] != '9' || (str[i] == '-' && i > 0))
return (false);
}
return (true);
}
// Returns 'true' if the string is a REAL NUMBER.
bool isRealNumber(char* str)
{
int i, len = strlen(str);
bool hasDecimal = false;
if (len == 0)
return (false);
for (i = 0; i < len; i++) {
if (str[i] != '0' && str[i] != '1' && str[i] != '2'
&& str[i] != '3' && str[i] != '4' && str[i] != '5'
&& str[i] != '6' && str[i] != '7' && str[i] != '8'
&& str[i] != '9' && str[i] != '.' ||
(str[i] == '-' && i > 0))
return (false);
if (str[i] == '.')
hasDecimal = true;
}
return (hasDecimal);
}
// Extracts the SUBSTRING.
char* subString(char* str, int left, int right)
{
int i;
char* subStr = (char*)malloc(sizeof(char) * (right - left + 2));
for (i = left; i <= right; i++)
subStr[i - left] = str[i];
subStr[right - left + 1] = '\0';
return (subStr);
}
// Parsing the input STRING.
void parse(char* str)
{
int left = 0, right = 0;
int len = strlen(str);
while (right <= len && left <= right)
{
if (isDelimiter(str[right]) == false)
right++;
if (isDelimiter(str[right]) == true && left == right)
{
if (isOperator(str[right]) == true)
printf("'%c' IS A OPERATOR\n", str[right]);
right++;
left = right;
}
if (isDelimiter(str[right]) == true && left == right)
{
if (isDelimiter(str[right]) == true)
printf("'%c' IS A DELIMITER\n", str[right]);
right++;
left = right;
}
if (isSeperator(str[right]) == true && left == right)
{
//needed to recognize seperator to the right
if (isSeperator(str[right]) == true)
printf("'%c' IS A SEPERATOR\n", str[right]);
right++;
left = right;
//needed to recognize seperator to the left
if (isSeperator(str[right]) == true)
printf("'%c' IS A SEPERATOR\n", str[left]);
right++;
left = right;
}
else if (isDelimiter(str[right]) == true && left != right
|| (right == len && left != right)) {
char* subStr = subString(str, left, right - 1);
if (isKeyword(subStr) == true)
printf("'%s' IS A KEYWORD\n", subStr);
else if (isInteger(subStr) == true)
printf("'%s' IS AN INTEGER\n", subStr);
else if (isRealNumber(subStr) == true)
printf("'%s' IS A REAL NUMBER\n", subStr);
else if (validIdentifier(subStr) == true
&& isDelimiter(str[right - 1]) == false
&& isSeperator(str[right - 1]) == false)
printf("'%s' IS A VALID IDENTIFIER\n", subStr);
left = right;
}
}
return;
}
int main(int argc, char *argv)
{
/* declare a file pointer */
FILE *file;
char *buffer;
long numbytes;
/* open an existing file for reading */
file = fopen("Text.txt", "r");
/* quit if the file does not exist */
if (file == NULL)
return 1;
/* Get the number of bytes */
fseek(file, 0L, SEEK_END);
numbytes = ftell(file);
/* reset the file position indicator to
the beginning of the file */
fseek(file, 0L, SEEK_SET);
/* grab sufficient memory for the
buffer to hold the text */
buffer = (char*)calloc(numbytes, sizeof(char));
/* memory error */
if (buffer == NULL)
return 1;
/* copy all the text into the buffer */
fread(buffer, sizeof(char), numbytes, file);
/* confirm we have read the file by
outputing it to the console */
printf(" The file called Text.txt contains this text \n \n %s \n\n", buffer);
parse(buffer); // calling the parse function
fclose(file);
/* free the memory we used for the buffer */
free(buffer);
return 0;
}
解决方案
Looks like the problem is your isDelimiter
function isn't picking up all possible values. If you change it to use isspace()
it will match on all forms of whitespace.
bool isDelimiter(char ch)
{
if (isspace(ch))
return (true);
return (false);
}
As an example, here is a very simple state machine to give you an idea of what sort of thing I meant. It can be in either of one of two states - INSIDE_IDENTIFIER or OUTSIDE_IDENTIFIER - and it switches between those two states depending on what kind of character it is looking at.
#define OUTSIDE_IDENTIFIER (0)
#define INSIDE_IDENTIFIER (1)
void parse(char *str)
{
char *ch;
int state=OUTSIDE_IDENTIFIER;
char buffer[1000];
char *pos=buffer;
for(ch=str;*ch!='\0';ch++)
{
switch(state)
{
case INSIDE_IDENTIFIER:
if(isOperator(*ch))
{
*pos='\0';
printf("Identifier[%s]\n",buffer);
printf("Operator[%c]\n",*ch);
state=OUTSIDE_IDENTIFIER;
}
else if(isDelimiter(*ch))
{
*pos='\0';
printf("Identifier[%s]\n",buffer);
printf("Delimiter[%c]\n",*ch);
state=OUTSIDE_IDENTIFIER;
}
else if(isspace(*ch))
{
*pos='\0';
printf("Identifier[%s]\n",buffer);
printf("Space[%d]\n",*ch);
state=OUTSIDE_IDENTIFIER;
}
else
{
*pos=*ch;
pos++;
}
break;
case OUTSIDE_IDENTIFIER:
default:
if(isOperator(*ch))
{
printf("Operator[%c]\n",*ch);
}
else if(isDelimiter(*ch))
{
printf("Delimiter[%c]\n",*ch);
}
else if(isSeperator(*ch))
{
printf("Seperator[%c]\n",*ch);
}
else
{
state = INSIDE_IDENTIFIER;
pos=buffer;
*pos=*ch;
pos++;
}
break;
}
}
}
推荐阅读
- python - 如何在 Anaconda 中使用 boto3.dynamodb.condition 导入
- swift - Swift:当应用程序进入后台时启动另一个计时器
- php - 如何在当前启用的 PHP 版本中安装 imagick?
- c# - 如何使标准 ASCII 折叠分析器与建议一起使用?
- python - 在阻塞脚本的线程中循环
- reactjs - 使用循环设置跨度标签
- java - 在变量初始化程序中获取空指针异常
- django - 具有递归模型的 HyperlinkedModelSerializer 问题
- c# - C# 中是否有 gcloud auth application-default print-access-token 等效项?
- java - 如何解决这个找不到由于标识符而不断出现的符号?