package play.parser; import java.util.*; /** * 能够表达EBNF的对象。 * 1.每个GrammarNode可以有多个子节点; * 2.子节点之间可以是And关系,或Or关系,由type属性来确定。 * 3.minTimes和maxTimes属性规定了该节点的重复次数。比如对于+号,minTimes=1,maxTimes=-1,-1代表很多个。 * 4.该节点可以有名称,也就是词法规则和语法规则中左边的部分。如果没有起名称,系统会根据它的父节点的名称生成自己的缺省名称, * 并且以下划线开头。比如_add_Or_1。 */ public class GrammarNode { //子节点 private List children = new LinkedList(); //节点类型 private GrammarNodeType type = null; //用于词法规则,指该节点能匹配的字符的集合。其中Charset可以是一个树状结构,由多个子结合构成。比如:[a-z][A-Z][0-9]等。 private CharSet charSet = null; //改节点可以重复的次数 private int minTimes = 1; private int maxTimes = 1; //节点名称,可以作为Token名称或非终结符名称 private String name = null; //语法规则中的Token,即终结符 private Token token = null; //是否被词法处理器忽略,比如空白字符 private boolean neglect = false; public static GrammarNode EPSILON = new GrammarNode(GrammarNodeType.Epsilon); protected GrammarNode(GrammarNodeType type){ this.type = type; } protected GrammarNode(String name, GrammarNodeType type){ this.name = name; this.type = type; } protected GrammarNode(String name, GrammarNodeType type, GrammarNode child, Token token){ this.name = name; this.type = type; this.children.add(child); this.token = token; } protected GrammarNode(String name, GrammarNodeType type, List children, Token token){ this.name = name; this.type = type; this.children.addAll(children); this.token = token; } protected GrammarNode(Token token){ this.type = GrammarNodeType.Token; this.token = token; } protected GrammarNode(CharSet charSet){ this.type = GrammarNodeType.Char; this.charSet = charSet; } protected GrammarNode(String name, CharSet charSet){ this.name = name; this.type = GrammarNodeType.Char; this.charSet = charSet; } protected GrammarNode createChild(CharSet charSet){ GrammarNode grammarNode = new GrammarNode(charSet); addChild(grammarNode); return grammarNode; } protected GrammarNode createChild(String name, CharSet charSet){ GrammarNode grammarNode = new GrammarNode(name, charSet); addChild(grammarNode); return grammarNode; } protected GrammarNode createChild(GrammarNodeType type){ GrammarNode grammarNode = new GrammarNode(type); addChild(grammarNode); return grammarNode; } protected GrammarNode createChild(String name, GrammarNodeType type){ GrammarNode grammarNode = new GrammarNode(name, type); addChild(grammarNode); return grammarNode; } protected GrammarNode createChild(Token token){ GrammarNode grammarNode = new GrammarNode(token); grammarNode.type = GrammarNodeType.Token; addChild(grammarNode); return grammarNode; } //添加子节点,并创建缺省名称 protected void addChild(GrammarNode child){ children.add(child); if (child.name == null){ if (child.getGrammarName() != null) { child.name = "_" + child.getGrammarName() + children.size(); } else{ child.name = "_" + child.type + children.size(); } if (this.name!=null){ child.name = this.name + child.name; } if (child.name.charAt(0) != '_'){ child.name = "_"+child.name; } } } /** * 是否是显式命名的子节点。词法规则中的Token、语法规则中的非终结符,都有名称。 * @return */ protected boolean isNamedNode(){ if (name!= null && name.length()> 1 && name.charAt(0) != '_'){ return true; } return false; } /** * 子节点列表。只读。 * @return */ public List children(){ return Collections.unmodifiableList(children); } public int getChildCount(){ return children.size(); } public GrammarNode getChild(int index){ return children.get(index); } //节点类型 public GrammarNodeType getType(){ return type; } public String getName(){ return name; } public String getGrammarName(){ if (token!= null){ return token.getType(); } else if (isNamedNode()){ return name; } return null; } @Override public boolean equals(Object obj) { if (this == obj) return true; GrammarNode node = (GrammarNode)obj; //类型相同 if (node.type != type){ return false; } //名称相同 if (node.name == null){ if (name != null){ return false; } } else { if (!node.name.equals(name)){ return false; } } //比较Token if(type == GrammarNodeType.Token){ return token.equals(node.token); } //Epsilon else if (type == GrammarNodeType.Epsilon){ return true; } //比较字符集合 else if (type == GrammarNodeType.Char){ return charSet.equals(node.charSet); } //子节点也相同 if(children.size() != node.children.size()){ return false; } for (int i = 0; i< children().size(); i++){ if (!children.get(i).equals(node.children.get(i))){ return false; } } return true; } /** * 以比较易读的方式显式。 * @return */ @Override public String toString(){ if (type == GrammarNodeType.Epsilon) return "ε"; String rtn = null; if (charSet!=null){ rtn = charSet.toString(); } else if (token != null){ if (token.getText() != null){ rtn = "'" + token.getText() + "'"; } else { rtn = token.getType(); } } else if (name != null){ rtn = name; } else if (type != null){ rtn = type.toString(); } else{ rtn = "GrammarNode"; } if (minTimes != 1 || maxTimes !=1){ if (minTimes == 0 && maxTimes == -1){ rtn = rtn + "*"; } else if (minTimes == 0 && maxTimes == 1){ rtn = rtn + "?"; } else if (minTimes == 1 && maxTimes == -1){ rtn = rtn + "+"; } else { rtn = rtn + "{" + minTimes + "," + maxTimes + "}"; } } return rtn; } protected void setRepeatTimes(int minTimes, int maxTimes){ this.minTimes = minTimes; this.maxTimes = maxTimes; } /** * 以文本方式显示Node。显式结果的格式与Antlr的文法格式相同。 * 对于命名的节点,要把它的子节点都显示出来。 * 比如:primary节点的:primary: ID | INT_LITERAL | add * @return */ protected String getText(){ String delim = null; if (type == GrammarNodeType.And){ delim = " "; } else if (type == GrammarNodeType.Or){ delim = " | "; } StringBuffer sb = new StringBuffer(); if (children.size()>0){ for (int i = 0; i< children.size(); i++){ if (i > 0){ sb.append(delim); } GrammarNode child = children.get(i); if (child.isNamedNode()) { sb.append(child.toString()); } else { sb.append(child.getText()); } } } else{ sb.append(this.toString()); } String rtn = sb.toString(); if (isNamedNode()) { rtn = wrapNamedNode(rtn); } else{ if (type == GrammarNodeType.Or){ rtn = "(" + rtn + ")"; } } return rtn; } private String wrapNamedNode(String str){ if (name != null){ String header = name; if (name.length() <= 3) header += "\t"; header+=("\t: "); return header + str + " ;"; } return str; } /** * 把当前节点及下级节点以树状结构打印出来。 */ protected void dump(){ if (isGraph(this, new HashSet())){ dumpGraph(this, new HashSet()); } else { dumpTree(this, ""); } } /** * 打印正则表达式的树状结构 * @param node * @param indent 缩进。每一级增加一个tab。 */ private static void dumpTree(GrammarNode node, String indent){ if (node.isNamedNode()){ System.out.println(indent + node.getText()); } else { System.out.println(indent + node); } for (GrammarNode child : node.children){ dumpTree(child, indent+"\t"); } } /** * 打印图。因为存在循环引用,所以不能以树状的方式打印。 * @param node * @param dumpedNodes */ private static void dumpGraph(GrammarNode node, Set dumpedNodes){ if (node.isNamedNode()){ System.out.println(node.getText()); } dumpedNodes.add(node); for (GrammarNode child : node.children){ if (!dumpedNodes.contains(child)){ dumpGraph(child, dumpedNodes); } } } /** * 以某节点作为起始节点,判断是树还是图。 * @param node * @param scannedNodes * @return */ private static boolean isGraph(GrammarNode node, Set scannedNodes){ scannedNodes.add(node); for (GrammarNode child: node.children){ if (scannedNodes.contains(child)){ return true; } } for (GrammarNode child: node.children){ boolean rtn = isGraph(child, scannedNodes); if (rtn) { return true; } } return false; } /** * 该节点是否是可空的,也就是能否返回Epsilon。算法: * 1.如果是Epsilon节点,或者是 *或 ?,可空。 * 2.And节点:所有子节点都可空; * 3.Or节点:任意子节点可空。 * 4.其他,不为空。 * @return */ public boolean isNullable(){ boolean rtn = false; if (this.minTimes == 0 || this.type == GrammarNodeType.Epsilon){ return true; } else if (this.type == GrammarNodeType.And) { boolean allNullable = true; for (GrammarNode child : children){ if (!child.isNullable()){ allNullable = false; break; } } rtn = allNullable; } else if (this.type == GrammarNodeType.Or){ boolean anyNullable = false; for (GrammarNode child : children){ if (child.isNullable()){ anyNullable = true; break; } } rtn = anyNullable; } return rtn; } /** * 获得以本节点为起始节点,能到达的所有语法节点。 * @return */ protected List getAllNodes(){ List allNodes = new LinkedList(); getAllNodes(this,allNodes); return allNodes; } private static void getAllNodes(GrammarNode node, List allNodes){ allNodes.add(node); for (GrammarNode child : node.children){ if(!allNodes.contains(child)){ getAllNodes(child, allNodes); } } } /** * 是否是叶子节点 * @return */ public boolean isLeaf(){ return children.size() == 0; } /** * 是否代表一个Token * @return */ public boolean isToken(){ return token != null; } public CharSet getCharSet() { return charSet; } public int getMinTimes() { return minTimes; } public int getMaxTimes() { return maxTimes; } public Token getToken() { return token; } public boolean isNeglect() { return neglect; } public void setNeglect(boolean neglect){ this.neglect = neglect; } }