java - 如何识别与模式不匹配的数据
问题描述
所以我正在使用匹配器类并使用它来识别我在枚举中定义的标记。
public static enum TokenType {
// Definitions of accepted tokens
IF("if"), WHILE("while"), PRINT("print"), TYPE("int|string|boolean"), BOOLOP("==|!="), BOOLVAL("false|true"), INTOP("[+]"), CHAR("[a-z]"), DIGIT("[0-9]"), WHITESPACE("[ \t\f\r\n]+"), LPAREN("[(]"), RPAREN("[)]");
public final String pattern;
private TokenType(String pattern) {
this.pattern = pattern;
}
}
public static class Token {
public TokenType type;
public String data;
我的问题是,我还需要识别我的模式中未定义的任何内容,并在发生这种情况时打印错误。
这就是我的匹配器逻辑的样子
// Begin matching tokens
Matcher matcher = tokenPatterns.matcher(input);
while (matcher.find()) {
if (matcher.group(TokenType.DIGIT.name()) != null) {
tokens.add(new Token(TokenType.DIGIT, matcher.group(TokenType.DIGIT.name())));
continue;
} else if (matcher.group(TokenType.IF.name()) != null) {
tokens.add(new Token(TokenType.IF, matcher.group(TokenType.IF.name())));
continue;
} else if (matcher.group(TokenType.WHILE.name()) != null) {
tokens.add(new Token(TokenType.WHILE, matcher.group(TokenType.WHILE.name())));
continue;
} else if (matcher.group(TokenType.TYPE.name()) != null) {
tokens.add(new Token(TokenType.TYPE, matcher.group(TokenType.TYPE.name())));
continue;
} else if (matcher.group(TokenType.PRINT.name()) != null) {
tokens.add(new Token(TokenType.PRINT, matcher.group(TokenType.PRINT.name())));
continue;
} else if (matcher.group(TokenType.BOOLOP.name()) != null) {
tokens.add(new Token(TokenType.BOOLOP, matcher.group(TokenType.BOOLOP.name())));
continue;
} else if (matcher.group(TokenType.BOOLVAL.name()) != null) {
tokens.add(new Token(TokenType.BOOLVAL, matcher.group(TokenType.BOOLVAL.name())));
continue;
} else if (matcher.group(TokenType.INTOP.name()) != null) {
tokens.add(new Token(TokenType.INTOP, matcher.group(TokenType.INTOP.name())));
continue;
} else if (matcher.group(TokenType.CHAR.name()) != null) {
tokens.add(new Token(TokenType.CHAR, matcher.group(TokenType.CHAR.name())));
continue;
} else if (matcher.group(TokenType.LPAREN.name()) != null) {
tokens.add(new Token(TokenType.LPAREN, matcher.group(TokenType.LPAREN.name())));
continue;
} else if (matcher.group(TokenType.RPAREN.name()) != null) {
tokens.add(new Token(TokenType.RPAREN, matcher.group(TokenType.RPAREN.name())));
continue;
} else if (matcher.group(TokenType.WHITESPACE.name()) != null) {
continue;
}
}
return tokens;
}
一个可能的解决方案是在我的模式中添加一个案例来解释尚未定义的所有内容,这看起来像这样 WHITESPACE("[ \t\f\r\n]+"), LPAREN("[ (]"), ERROR("@|#,$,%,^,&.....")但我不确定实现它的任何现实方法。
谢谢你的帮助。这是完整代码的链接,以防我遗漏任何内容 - https://pastebin.com/jLtnJwgj
解决方案
你尝试这样的事情吗?
public class Lexer {
public static enum TokenType {
// Definitions of accepted tokens
IF("if"),
WHILE("while"),
PRINT("print"),
TYPE("int|string|boolean"),
BOOLOP("==|!="),
BOOLVAL("false|true"),
INTOP("[+]"),
CHAR("[a-z]"),
DIGIT("[0-9]"),
WHITESPACE("[ \t\f\r\n]+"),
LPAREN("[(]"),
RPAREN("[)]"),
OTHER(".");
public final String pattern;
private TokenType(final String pattern) {
this.pattern = pattern;
}
}
public static class Token {
public TokenType type;
public String data;
public Token(final TokenType type, final String data) {
this.type = type;
this.data = data;
}
}
public static ArrayList<Token> lex(final String input) {
// The tokens to return
final ArrayList<Token> tokens = new ArrayList<Token>();
// allows us to work with a mutable string
final StringBuffer tokenPatternsBuffer = new StringBuffer();
for (final TokenType tokenType : TokenType.values()) {
tokenPatternsBuffer.append(String.format("|(?<%s>%s)",
tokenType.name(), tokenType.pattern));
}
final Pattern tokenPatterns = Pattern.compile(
new String(tokenPatternsBuffer.substring(1)));
// Begin matching tokens
String other = "";
final Matcher matcher = tokenPatterns.matcher(input);
while (matcher.find()) {
if (matcher.group(TokenType.DIGIT.name()) != null) {
other = unknow(tokens, other);
tokens.add(new Token(TokenType.DIGIT,
matcher.group(TokenType.DIGIT.name())));
continue;
} else if (matcher.group(TokenType.IF.name()) != null) {
other = unknow(tokens, other);
tokens.add(new Token(TokenType.IF,
matcher.group(TokenType.IF.name())));
continue;
} else if (matcher.group(TokenType.WHILE.name()) != null) {
other = unknow(tokens, other);
tokens.add(new Token(TokenType.WHILE,
matcher.group(TokenType.WHILE.name())));
continue;
} else if (matcher.group(TokenType.TYPE.name()) != null) {
other = unknow(tokens, other);
tokens.add(new Token(TokenType.TYPE,
matcher.group(TokenType.TYPE.name())));
continue;
} else if (matcher.group(TokenType.PRINT.name()) != null) {
other = unknow(tokens, other);
tokens.add(new Token(TokenType.PRINT,
matcher.group(TokenType.PRINT.name())));
continue;
} else if (matcher.group(TokenType.BOOLOP.name()) != null) {
other = unknow(tokens, other);
tokens.add(new Token(TokenType.BOOLOP,
matcher.group(TokenType.BOOLOP.name())));
continue;
} else if (matcher.group(TokenType.BOOLVAL.name()) != null) {
other = unknow(tokens, other);
tokens.add(new Token(TokenType.BOOLVAL,
matcher.group(TokenType.BOOLVAL.name())));
continue;
} else if (matcher.group(TokenType.INTOP.name()) != null) {
other = unknow(tokens, other);
tokens.add(new Token(TokenType.INTOP,
matcher.group(TokenType.INTOP.name())));
continue;
} else if (matcher.group(TokenType.CHAR.name()) != null) {
other = unknow(tokens, other);
tokens.add(new Token(TokenType.CHAR,
matcher.group(TokenType.CHAR.name())));
continue;
} else if (matcher.group(TokenType.LPAREN.name()) != null) {
other = unknow(tokens, other);
tokens.add(new Token(TokenType.LPAREN,
matcher.group(TokenType.LPAREN.name())));
continue;
} else if (matcher.group(TokenType.RPAREN.name()) != null) {
other = unknow(tokens, other);
tokens.add(new Token(TokenType.RPAREN,
matcher.group(TokenType.RPAREN.name())));
continue;
} else if (matcher.group(TokenType.WHITESPACE.name()) != null) {
continue;
} else if (matcher.group(TokenType.OTHER.name()) != null) {
other += matcher.group(TokenType.OTHER.name());
continue;
}
}
other = unknow(tokens, other);
return tokens;
}
private static String unknow(final ArrayList<Token> tokens, final String _unknow) {
if (!_unknow.isEmpty()) {
tokens.add(new Token(TokenType.OTHER,_unknow));
}
return "";
}
public static void main(final String[] args) {
final String input = "if\nprint\nta!?!?!taelse?§.?toto";
// Create tokens and print them
final ArrayList<Token> tokens = lex(input);
for (final Token token : tokens)
System.out.println("DEBUG Lexer - " + token.type + " [ "
+ token.data + " ] " + "found at " + "linenumber");
}
}
推荐阅读
- sorting - Go 语言界面中的排序
- javascript - 当滚动()调用时,jQuery fadeTo 调用未按预期运行
- r - 如何仅为一种结果类型计算新数据框?
- loops - 找到第一行的第一个数值并将其插入到第二行的末尾,对同一目录中的多个文件执行此操作
- c++ - 一个“constexpr”标记的变量和一个静态存储持续时间的变量是否可以通过带有推导指南的类类型进行存储?
- arrays - SAS 错误:引用的未声明数组:attrib
- c# - char 二维数组压缩
- javascript - 从字符串计算天数
- c - 为什么我在添加赋值语句时会得到不同的结果?
- .net - 使用 SDL2 和 C++ 在单个 HTML 片段中创建 WASM 的模式