标签:
一个简单语法分析器的设计与实现
实验目的:
1.掌握语法分析的基本任务;
2.掌握语法分析器的工作流程;
3.能设计一个简单的语法分析器,并完成对指定句子的分析。
实验内容:
给定一个LL(1)文法,编制语法分析程序,输出其预测分析表,用LL(1)预测分析程序实现其分析过程并输出分析过程和分析结果。
主要实验内容:
求出预测分析表,需要求解此文法的select集;而select集是由first集和follow集求解出来的。所以一般是先求出first集,再求出follow集,再最后求出select集。
由于题目要求的是会给定一个LL1文法。这里我就暂时假设输入的是一个LL1文法。(如果不是LL1的文法,含有左递归的话,需要先消除左递归
算法是:
对文法G:
S->Sa
S->b
可改写为:
S->bS‘
S‘->aS‘|ε
)
分以下2种情况:
分以下4种情况:
分以下3种情况:
import java.io.Serializable; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.Set; import java.util.TreeMap; import java.util.TreeSet; import com.zhushoumao.bianyi.util.TextUtil; /** * @author PuHaiyang * @createTime 2016年6月10日 下午7:46:33 * @email 761396462@qq.com * @function LL(1)文法 * */ public class Gs implements Serializable { /** * */ private static final long serialVersionUID = 1L; public Gs() { super(); gsArray = new ArrayList<String>(); nvSet = new TreeSet<Character>(); ntSet = new TreeSet<Character>(); firstMap = new HashMap<Character, TreeSet<Character>>(); followMap = new HashMap<Character, TreeSet<Character>>(); selectMap = new TreeMap<Character, HashMap<String, TreeSet<Character>>>(); } private String[][] analyzeTable; /** * Select集合 */ private TreeMap<Character, HashMap<String, TreeSet<Character>>> selectMap; /** * LL(1)文法产生集合 */ private ArrayList<String> gsArray; /** * 表达式集合 */ private HashMap<Character, ArrayList<String>> expressionMap; /** * 开始符 */ private Character s; /** * Vn非终结符集合 */ private TreeSet<Character> nvSet; /** * Vt终结符集合 */ private TreeSet<Character> ntSet; /** * First集合 */ private HashMap<Character, TreeSet<Character>> firstMap; /** * Follow集合 */ private HashMap<Character, TreeSet<Character>> followMap; public String[][] getAnalyzeTable() { return analyzeTable; } public void setAnalyzeTable(String[][] analyzeTable) { this.analyzeTable = analyzeTable; } public TreeMap<Character, HashMap<String, TreeSet<Character>>> getSelectMap() { return selectMap; } public void setSelectMap(TreeMap<Character, HashMap<String, TreeSet<Character>>> selectMap) { this.selectMap = selectMap; } public HashMap<Character, TreeSet<Character>> getFirstMap() { return firstMap; } public void setFirstMap(HashMap<Character, TreeSet<Character>> firstMap) { this.firstMap = firstMap; } public HashMap<Character, TreeSet<Character>> getFollowMap() { return followMap; } public void setFollowMap(HashMap<Character, TreeSet<Character>> followMap) { this.followMap = followMap; } public HashMap<Character, ArrayList<String>> getExpressionMap() { return expressionMap; } public void setExpressionMap(HashMap<Character, ArrayList<String>> expressionMap) { this.expressionMap = expressionMap; } public ArrayList<String> getGsArray() { return gsArray; } public void setGsArray(ArrayList<String> gsArray) { this.gsArray = gsArray; } public Character getS() { return s; } public void setS(Character s) { this.s = s; } public TreeSet<Character> getNvSet() { return nvSet; } public void setNvSet(TreeSet<Character> nvSet) { this.nvSet = nvSet; } public TreeSet<Character> getNtSet() { return ntSet; } public void setNtSet(TreeSet<Character> ntSet) { this.ntSet = ntSet; } /** * 获取非终结符集与终结符集 * * @param gsArray * @param nvSet * @param ntSet */ public void getNvNt() { for (String gsItem : gsArray) { String[] nvNtItem = gsItem.split("->"); String charItemStr = nvNtItem[0]; char charItem = charItemStr.charAt(0); // nv在左边 nvSet.add(charItem); } for (String gsItem : gsArray) { String[] nvNtItem = gsItem.split("->"); // nt在右边 String nvItemStr = nvNtItem[1]; // 遍历每一个字 for (int i = 0; i < nvItemStr.length(); i++) { char charItem = nvItemStr.charAt(i); if (!nvSet.contains(charItem)) { ntSet.add(charItem); } } } } /** * 初始化表达式集合 */ public void initExpressionMaps() { expressionMap = new HashMap<Character, ArrayList<String>>(); for (String gsItem : gsArray) { String[] nvNtItem = gsItem.split("->"); String charItemStr = nvNtItem[0]; String charItemRightStr = nvNtItem[1]; char charItem = charItemStr.charAt(0); if (!expressionMap.containsKey(charItem)) { ArrayList<String> expArr = new ArrayList<String>(); expArr.add(charItemRightStr); expressionMap.put(charItem, expArr); } else { ArrayList<String> expArr = expressionMap.get(charItem); expArr.add(charItemRightStr); expressionMap.put(charItem, expArr); } } } /** * 获取First集 */ public void getFirst() { // 遍历所有Nv,求出它们的First集合 Iterator<Character> iterator = nvSet.iterator(); while (iterator.hasNext()) { Character charItem = iterator.next(); ArrayList<String> arrayList = expressionMap.get(charItem); for (String itemStr : arrayList) { boolean shouldBreak = false; // Y1Y2Y3...Yk for (int i = 0; i < itemStr.length(); i++) { char itemitemChar = itemStr.charAt(i); TreeSet<Character> itemSet = firstMap.get(charItem); if (null == itemSet) { itemSet = new TreeSet<Character>(); } shouldBreak = calcFirst(itemSet, charItem, itemitemChar); if (shouldBreak) { break; } } } } } /** * 计算First函数 * * @param itemSet * @param charItem * @param itemitemChar * @return */ private boolean calcFirst(TreeSet<Character> itemSet, Character charItem, char itemitemChar) { // get ago // TreeSet<Character> itemSet = new TreeSet<Character>(); // 将它的每一位和Nt判断下 // 是终结符或空串,就停止,并将它加到FirstMap中 if (itemitemChar == 'ε' || ntSet.contains(itemitemChar)) { itemSet.add(itemitemChar); firstMap.put(charItem, itemSet); // break; return true; } else if (nvSet.contains(itemitemChar)) {// 这一位是一个非终结符 ArrayList<String> arrayList = expressionMap.get(itemitemChar); for (int i = 0; i < arrayList.size(); i++) { String string = arrayList.get(i); char tempChar = string.charAt(0); calcFirst(itemSet, charItem, tempChar); } } return true; } /** * 获取Follow集合 */ public void getFollow() { for (Character tempKey : nvSet) { TreeSet<Character> tempSet = new TreeSet<Character>(); followMap.put(tempKey, tempSet); } // 遍历所有Nv,求出它们的First集合 Iterator<Character> iterator = nvSet.descendingIterator(); // nvSet.descendingIterator(); while (iterator.hasNext()) { Character charItem = iterator.next(); System.out.println("charItem:" + charItem); Set<Character> keySet = expressionMap.keySet(); for (Character keyCharItem : keySet) { ArrayList<String> charItemArray = expressionMap.get(keyCharItem); for (String itemCharStr : charItemArray) { System.out.println(keyCharItem + "->" + itemCharStr); TreeSet<Character> itemSet = followMap.get(charItem); calcFollow(charItem, charItem, keyCharItem, itemCharStr, itemSet); } } } } /** * 计算Follow集 * * @param putCharItem * 正在查询item * @param charItem * 待找item * @param keyCharItem * 节点名 * @param itemCharStr * 符号集 * @param itemSet * 结果集合 */ private void calcFollow(Character putCharItem, Character charItem, Character keyCharItem, String itemCharStr, TreeSet<Character> itemSet) { /////// // (1)A是S(开始符),加入# if (charItem.equals(s)) { itemSet.add('#'); System.out.println("---------------find S:" + charItem + " ={#}+Follow(E)"); followMap.put(putCharItem, itemSet); // return; } // (2)Ab,=First(b)-ε,直接添加终结符 if (TextUtil.containsAb(ntSet, itemCharStr, charItem)) { Character alastChar = TextUtil.getAlastChar(itemCharStr, charItem); System.out.println("---------------find Ab:" + itemCharStr + " " + charItem + " =" + alastChar); itemSet.add(alastChar); followMap.put(putCharItem, itemSet); // return; } // (2).2AB,=First(B)-ε,=First(B)-ε,添加first集合 if (TextUtil.containsAB(nvSet, itemCharStr, charItem)) { Character alastChar = TextUtil.getAlastChar(itemCharStr, charItem); System.out.println( "---------------find AB:" + itemCharStr + " " + charItem + " =First(" + alastChar + ")"); TreeSet<Character> treeSet = firstMap.get(alastChar); itemSet.addAll(treeSet); if (treeSet.contains('ε')) { itemSet.add('#'); } itemSet.remove('ε'); followMap.put(putCharItem, itemSet); /////////////////////// if (TextUtil.containsbAbIsNull(nvSet, itemCharStr, charItem, expressionMap)) { char tempChar = TextUtil.getAlastChar(itemCharStr, charItem); System.out.println("tempChar:" + tempChar + " key" + keyCharItem); if (!keyCharItem.equals(charItem)) { System.out.println("---------------find tempChar bA: " + "tempChar:" + tempChar + keyCharItem + " " + itemCharStr + " " + charItem + " =Follow(" + keyCharItem + ")"); Set<Character> keySet = expressionMap.keySet(); for (Character keyCharItems : keySet) { ArrayList<String> charItemArray = expressionMap.get(keyCharItems); for (String itemCharStrs : charItemArray) { calcFollow(putCharItem, keyCharItem, keyCharItems, itemCharStrs, itemSet); } } } } } // (3)B->aA,=Follow(B),添加followB if (TextUtil.containsbA(nvSet, itemCharStr, charItem, expressionMap)) { if (!keyCharItem.equals(charItem)) { System.out.println("---------------find bA: " + keyCharItem + " " + itemCharStr + " " + charItem + " =Follow(" + keyCharItem + ")"); Set<Character> keySet = expressionMap.keySet(); for (Character keyCharItems : keySet) { ArrayList<String> charItemArray = expressionMap.get(keyCharItems); for (String itemCharStrs : charItemArray) { calcFollow(putCharItem, keyCharItem, keyCharItems, itemCharStrs, itemSet); } } } } } /** * 获取Select集合 */ public void getSelect() { // 遍历每一个表达式 // HashMap<Character, HashMap<String, TreeSet<Character>>> Set<Character> keySet = expressionMap.keySet(); for (Character selectKey : keySet) { ArrayList<String> arrayList = expressionMap.get(selectKey); // 每一个表达式 HashMap<String, TreeSet<Character>> selectItemMap = new HashMap<String, TreeSet<Character>>(); for (String selectExp : arrayList) { /** * 存放select结果的集合 */ TreeSet<Character> selectSet = new TreeSet<Character>(); // set里存放的数据分3种情况,由selectExp决定 // 1.A->ε,=follow(A) if (TextUtil.isEmptyStart(selectExp)) { selectSet = followMap.get(selectKey); selectSet.remove('ε'); selectItemMap.put(selectExp, selectSet); } // 2.Nt开始,=Nt // <br>终结符开始 if (TextUtil.isNtStart(ntSet, selectExp)) { selectSet.add(selectExp.charAt(0)); selectSet.remove('ε'); selectItemMap.put(selectExp, selectSet); } // 3.Nv开始,=first(Nv) if (TextUtil.isNvStart(nvSet, selectExp)) { selectSet = firstMap.get(selectKey); selectSet.remove('ε'); selectItemMap.put(selectExp, selectSet); } selectMap.put(selectKey, selectItemMap); } } } /** * 生成预测分析表 */ public void genAnalyzeTable() throws Exception { Object[] ntArray = ntSet.toArray(); Object[] nvArray = nvSet.toArray(); // 预测分析表初始化 analyzeTable = new String[nvArray.length + 1][ntArray.length + 1]; // 输出一个占位符 System.out.print("Nv/Nt" + "\t\t"); analyzeTable[0][0] = "Nv/Nt"; // 初始化首行 for (int i = 0; i < ntArray.length; i++) { if (ntArray[i].equals('ε')) { ntArray[i] = '#'; } System.out.print(ntArray[i] + "\t\t"); analyzeTable[0][i + 1] = ntArray[i] + ""; } System.out.println(""); for (int i = 0; i < nvArray.length; i++) { // 首列初始化 System.out.print(nvArray[i] + "\t\t"); analyzeTable[i + 1][0] = nvArray[i] + ""; for (int j = 0; j < ntArray.length; j++) { String findUseExp = TextUtil.findUseExp(selectMap, Character.valueOf((Character) nvArray[i]), Character.valueOf((Character) ntArray[j])); if (null == findUseExp) { System.out.print("\t\t"); analyzeTable[i + 1][j + 1] = ""; } else { System.out.print(nvArray[i] + "->" + findUseExp + "\t\t"); analyzeTable[i + 1][j + 1] = nvArray[i] + "->" + findUseExp; } } System.out.println(); } } }
import java.io.Serializable; /** * @author PuHaiyang * @createTime 2016年6月12日 下午4:15:37 * @email 761396462@qq.com * @function 分析过程Bean * */ public class AnalyzeProduce implements Serializable{ private static final long serialVersionUID = 10L; private Integer index; private String analyzeStackStr; private String str; private String useExpStr; public Integer getIndex() { return index; } public void setIndex(Integer index) { this.index = index; } public String getAnalyzeStackStr() { return analyzeStackStr; } public void setAnalyzeStackStr(String analyzeStackStr) { this.analyzeStackStr = analyzeStackStr; } public String getStr() { return str; } public void setStr(String str) { this.str = str; } public String getUseExpStr() { return useExpStr; } public void setUseExpStr(String useExpStr) { this.useExpStr = useExpStr; } }
import java.util.ArrayList; import java.util.Stack; import com.zhushoumao.bianyi.pojo.AnalyzeProduce; import com.zhushoumao.bianyi.pojo.Gs; import com.zhushoumao.bianyi.util.TextUtil; /** * @author PuHaiyang * @createTime 2016年6月11日 下午11:27:00 * @email 761396462@qq.com * @function 句子分析器 * */ public class Analyzer { public Analyzer() { super(); analyzeStatck = new Stack<Character>(); // 结束符进栈 analyzeStatck.push('#'); } private ArrayList<AnalyzeProduce> analyzeProduces; /** * LL(1)文法 */ private Gs ll1Gs; public Gs getLl1Gs() { return ll1Gs; } public void setLl1Gs(Gs ll1Gs) { this.ll1Gs = ll1Gs; } /** * 开始符 */ private Character startChar; /** * 分析栈 */ private Stack<Character> analyzeStatck; /** * 剩余输入串 */ private String str; /** * 推导所用产生或匹配 */ private String useExp; public ArrayList<AnalyzeProduce> getAnalyzeProduces() { return analyzeProduces; } public void setAnalyzeProduces(ArrayList<AnalyzeProduce> analyzeProduces) { this.analyzeProduces = analyzeProduces; } public Character getStartChar() { return startChar; } public void setStartChar(Character startChar) { this.startChar = startChar; } public Stack<Character> getAnalyzeStatck() { return analyzeStatck; } public void setAnalyzeStatck(Stack<Character> analyzeStatck) { this.analyzeStatck = analyzeStatck; } public String getStr() { return str; } public void setStr(String str) { this.str = str; } public String getUseExp() { return useExp; } public void setUseExp(String useExp) { this.useExp = useExp; } /** * 分析 */ public void analyze() { analyzeProduces = new ArrayList<AnalyzeProduce>(); // 开始符进栈 analyzeStatck.push(startChar); System.out.println("开始符:" + startChar); int index = 0; // 开始分析 // while (analyzeStatck.peek() != '#' && str.charAt(0) != '#') { while (!analyzeStatck.empty()) { index++; if (analyzeStatck.peek() != str.charAt(0)) { // 到分析表中找到这个产生式 String nowUseExpStr = TextUtil.findUseExp(ll1Gs.getSelectMap(), analyzeStatck.peek(), str.charAt(0)); System.out.println(index + "\t\t\t" + analyzeStatck.toString() + "\t\t\t" + str + "\t\t\t" + analyzeStatck.peek() + "->" + nowUseExpStr); AnalyzeProduce produce = new AnalyzeProduce(); produce.setIndex(index); produce.setAnalyzeStackStr(analyzeStatck.toString()); produce.setStr(str); if (null == nowUseExpStr) { produce.setUseExpStr("无法匹配!"); } else { produce.setUseExpStr(analyzeStatck.peek() + "->" + nowUseExpStr); } analyzeProduces.add(produce); // 将之前的分析栈中的栈顶出栈 analyzeStatck.pop(); // 将要用到的表达式入栈,反序入栈 if (null != nowUseExpStr && nowUseExpStr.charAt(0) != 'ε') { for (int j = nowUseExpStr.length() - 1; j >= 0; j--) { char currentChar = nowUseExpStr.charAt(j); analyzeStatck.push(currentChar); } } continue; } // 如果可以匹配,分析栈出栈,串去掉一位 if (analyzeStatck.peek() == str.charAt(0)) { System.out.println(index + "\t\t\t" + analyzeStatck.toString() + "\t\t\t" + str + "\t\t\t" + "“" + str.charAt(0) + "”匹配"); AnalyzeProduce produce = new AnalyzeProduce(); produce.setIndex(index); produce.setAnalyzeStackStr(analyzeStatck.toString()); produce.setStr(str); produce.setUseExpStr("“" + str.charAt(0) + "”匹配"); analyzeProduces.add(produce); analyzeStatck.pop(); str = str.substring(1); continue; } } } }
public class Main { public static void main(String[] args) throws Exception { // // LL(1)文法产生集合 ArrayList<String> gsArray = new ArrayList<String>(); // // Vn非终结符集合 // TreeSet<Character> nvSet = new TreeSet<Character>(); // // Vt终结符集合 // TreeSet<Character> ntSet = new TreeSet<Character>(); Gs gs = new Gs(); initGs(gsArray); gs.setGsArray(gsArray); // getNvNt(gsArray, gs.getNvSet(), gs.getNtSet()); gs.getNvNt(); gs.initExpressionMaps(); gs.getFirst(); // 设置开始符 gs.setS('E'); gs.getFollow(); gs.getSelect(); // 创建一个分析器 Analyzer analyzer = new Analyzer(); analyzer.setStartChar('E'); analyzer.setLl1Gs(gs); analyzer.setStr("i+i*i#"); analyzer.analyze(); gs.genAnalyzeTable(); System.out.println(""); } /** * 获取非终结符集与终结符集 * * @param gsArray * @param nvSet * @param ntSet */ private static void getNvNt(ArrayList<String> gsArray, TreeSet<Character> nvSet, TreeSet<Character> ntSet) { for (String gsItem : gsArray) { String[] nvNtItem = gsItem.split("->"); String charItemStr = nvNtItem[0]; char charItem = charItemStr.charAt(0); // nv在左边 nvSet.add(charItem); } for (String gsItem : gsArray) { String[] nvNtItem = gsItem.split("->"); // nt在右边 String nvItemStr = nvNtItem[1]; // 遍历每一个字 for (int i = 0; i < nvItemStr.length(); i++) { char charItem = nvItemStr.charAt(i); if (!nvSet.contains(charItem)) { ntSet.add(charItem); } } } } /** * 初始化LL(1)文法 * * @param gsArray */ private static void initGs(ArrayList<String> gsArray) { gsArray.add("D->*FD"); gsArray.add("D->ε"); gsArray.add("T->FD"); gsArray.add("E->TC"); gsArray.add("F->(E)"); gsArray.add("F->i"); gsArray.add("C->+TC"); gsArray.add("C->ε"); } }测试截图如下:
Web版如下:
部署链接:点击打开此在线分析程序链接
标签:
原文地址:http://blog.csdn.net/puhaiyang/article/details/51793550