码迷,mamicode.com
首页 > 编程语言 > 详细

用JAVA实现LL(1)文法语法分析程序

时间:2016-07-03 20:03:20      阅读:673      评论:0      收藏:0      [点我收藏+]

标签:

一个简单语法分析器的设计与实现

实验目的:

1.掌握语法分析的基本任务;

2.掌握语法分析器的工作流程;

3.能设计一个简单的语法分析器,并完成对指定句子的分析。

实验内容:     

   给定一个LL(1)文法,编制语法分析程序,输出其预测分析表,用LL(1)预测分析程序实现其分析过程并输出分析过程和分析结果。


主要实验内容:

求出预测分析表,需要求解此文法的select集;而select集是由first集和follow集求解出来的。所以一般是先求出first集,再求出follow集,再最后求出select集。

由于题目要求的是会给定一个LL1文法。这里我就暂时假设输入的是一个LL1文法。(如果不是LL1的文法,含有左递归的话,需要先消除左递归

算法是:

对文法G:

S->Sa

S->b

可改写为:


S->bS‘

S‘->aS‘|ε

)

1.first集求解主要算法

分以下2种情况:

技术分享


2.Follow求解算法

分以下4种情况:

技术分享


3.Select集求解算法

分以下3种情况:

技术分享

4.编程实现

import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;

import com.zhushoumao.bianyi.util.TextUtil;

/**
 * @author PuHaiyang
 * @createTime 2016年6月10日 下午7:46:33
 * @email 761396462@qq.com
 * @function LL(1)文法
 *
 */
public class Gs implements Serializable {

	/**
	 * 
	 */
	private static final long serialVersionUID = 1L;

	public Gs() {
		super();
		gsArray = new ArrayList<String>();
		nvSet = new TreeSet<Character>();
		ntSet = new TreeSet<Character>();
		firstMap = new HashMap<Character, TreeSet<Character>>();
		followMap = new HashMap<Character, TreeSet<Character>>();
		selectMap = new TreeMap<Character, HashMap<String, TreeSet<Character>>>();
	}

	private String[][] analyzeTable;

	/**
	 * Select集合
	 */
	private TreeMap<Character, HashMap<String, TreeSet<Character>>> selectMap;
	/**
	 * LL(1)文法产生集合
	 */
	private ArrayList<String> gsArray;
	/**
	 * 表达式集合
	 */
	private HashMap<Character, ArrayList<String>> expressionMap;
	/**
	 * 开始符
	 */
	private Character s;
	/**
	 * Vn非终结符集合
	 */
	private TreeSet<Character> nvSet;
	/**
	 * Vt终结符集合
	 */
	private TreeSet<Character> ntSet;
	/**
	 * First集合
	 */
	private HashMap<Character, TreeSet<Character>> firstMap;
	/**
	 * Follow集合
	 */
	private HashMap<Character, TreeSet<Character>> followMap;

	public String[][] getAnalyzeTable() {
		return analyzeTable;
	}

	public void setAnalyzeTable(String[][] analyzeTable) {
		this.analyzeTable = analyzeTable;
	}

	public TreeMap<Character, HashMap<String, TreeSet<Character>>> getSelectMap() {
		return selectMap;
	}

	public void setSelectMap(TreeMap<Character, HashMap<String, TreeSet<Character>>> selectMap) {
		this.selectMap = selectMap;
	}

	public HashMap<Character, TreeSet<Character>> getFirstMap() {
		return firstMap;
	}

	public void setFirstMap(HashMap<Character, TreeSet<Character>> firstMap) {
		this.firstMap = firstMap;
	}

	public HashMap<Character, TreeSet<Character>> getFollowMap() {
		return followMap;
	}

	public void setFollowMap(HashMap<Character, TreeSet<Character>> followMap) {
		this.followMap = followMap;
	}

	public HashMap<Character, ArrayList<String>> getExpressionMap() {
		return expressionMap;
	}

	public void setExpressionMap(HashMap<Character, ArrayList<String>> expressionMap) {
		this.expressionMap = expressionMap;
	}

	public ArrayList<String> getGsArray() {
		return gsArray;
	}

	public void setGsArray(ArrayList<String> gsArray) {
		this.gsArray = gsArray;
	}

	public Character getS() {
		return s;
	}

	public void setS(Character s) {
		this.s = s;
	}

	public TreeSet<Character> getNvSet() {
		return nvSet;
	}

	public void setNvSet(TreeSet<Character> nvSet) {
		this.nvSet = nvSet;
	}

	public TreeSet<Character> getNtSet() {
		return ntSet;
	}

	public void setNtSet(TreeSet<Character> ntSet) {
		this.ntSet = ntSet;
	}

	/**
	 * 获取非终结符集与终结符集
	 * 
	 * @param gsArray
	 * @param nvSet
	 * @param ntSet
	 */
	public void getNvNt() {
		for (String gsItem : gsArray) {
			String[] nvNtItem = gsItem.split("->");
			String charItemStr = nvNtItem[0];
			char charItem = charItemStr.charAt(0);
			// nv在左边
			nvSet.add(charItem);
		}
		for (String gsItem : gsArray) {
			String[] nvNtItem = gsItem.split("->");
			// nt在右边
			String nvItemStr = nvNtItem[1];
			// 遍历每一个字
			for (int i = 0; i < nvItemStr.length(); i++) {
				char charItem = nvItemStr.charAt(i);
				if (!nvSet.contains(charItem)) {
					ntSet.add(charItem);
				}
			}
		}
	}

	/**
	 * 初始化表达式集合
	 */
	public void initExpressionMaps() {
		expressionMap = new HashMap<Character, ArrayList<String>>();
		for (String gsItem : gsArray) {
			String[] nvNtItem = gsItem.split("->");
			String charItemStr = nvNtItem[0];
			String charItemRightStr = nvNtItem[1];
			char charItem = charItemStr.charAt(0);
			if (!expressionMap.containsKey(charItem)) {
				ArrayList<String> expArr = new ArrayList<String>();
				expArr.add(charItemRightStr);
				expressionMap.put(charItem, expArr);
			} else {
				ArrayList<String> expArr = expressionMap.get(charItem);
				expArr.add(charItemRightStr);
				expressionMap.put(charItem, expArr);
			}
		}
	}

	/**
	 * 获取First集
	 */
	public void getFirst() {
		// 遍历所有Nv,求出它们的First集合
		Iterator<Character> iterator = nvSet.iterator();
		while (iterator.hasNext()) {
			Character charItem = iterator.next();
			ArrayList<String> arrayList = expressionMap.get(charItem);
			for (String itemStr : arrayList) {
				boolean shouldBreak = false;
				// Y1Y2Y3...Yk
				for (int i = 0; i < itemStr.length(); i++) {
					char itemitemChar = itemStr.charAt(i);
					TreeSet<Character> itemSet = firstMap.get(charItem);
					if (null == itemSet) {
						itemSet = new TreeSet<Character>();
					}
					shouldBreak = calcFirst(itemSet, charItem, itemitemChar);
					if (shouldBreak) {
						break;
					}
				}
			}
		}
	}

	/**
	 * 计算First函数
	 * 
	 * @param itemSet
	 * @param charItem
	 * @param itemitemChar
	 * @return
	 */
	private boolean calcFirst(TreeSet<Character> itemSet, Character charItem, char itemitemChar) {
		// get ago
		// TreeSet<Character> itemSet = new TreeSet<Character>();
		// 将它的每一位和Nt判断下
		// 是终结符或空串,就停止,并将它加到FirstMap中
		if (itemitemChar == 'ε' || ntSet.contains(itemitemChar)) {
			itemSet.add(itemitemChar);
			firstMap.put(charItem, itemSet);
			// break;
			return true;
		} else if (nvSet.contains(itemitemChar)) {// 这一位是一个非终结符
			ArrayList<String> arrayList = expressionMap.get(itemitemChar);
			for (int i = 0; i < arrayList.size(); i++) {
				String string = arrayList.get(i);
				char tempChar = string.charAt(0);
				calcFirst(itemSet, charItem, tempChar);
			}
		}
		return true;
	}

	/**
	 * 获取Follow集合
	 */
	public void getFollow() {
		for (Character tempKey : nvSet) {
			TreeSet<Character> tempSet = new TreeSet<Character>();
			followMap.put(tempKey, tempSet);
		}
		// 遍历所有Nv,求出它们的First集合
		Iterator<Character> iterator = nvSet.descendingIterator();
		// nvSet.descendingIterator();

		while (iterator.hasNext()) {
			Character charItem = iterator.next();
			System.out.println("charItem:" + charItem);
			Set<Character> keySet = expressionMap.keySet();
			for (Character keyCharItem : keySet) {
				ArrayList<String> charItemArray = expressionMap.get(keyCharItem);
				for (String itemCharStr : charItemArray) {
					System.out.println(keyCharItem + "->" + itemCharStr);
					TreeSet<Character> itemSet = followMap.get(charItem);
					calcFollow(charItem, charItem, keyCharItem, itemCharStr, itemSet);
				}
			}
		}
	}

	/**
	 * 计算Follow集
	 * 
	 * @param putCharItem
	 *            正在查询item
	 * @param charItem
	 *            待找item
	 * @param keyCharItem
	 *            节点名
	 * @param itemCharStr
	 *            符号集
	 * @param itemSet
	 *            结果集合
	 */
	private void calcFollow(Character putCharItem, Character charItem, Character keyCharItem, String itemCharStr,
			TreeSet<Character> itemSet) {
		///////
		// (1)A是S(开始符),加入#
		if (charItem.equals(s)) {
			itemSet.add('#');
			System.out.println("---------------find S:" + charItem + "   ={#}+Follow(E)");
			followMap.put(putCharItem, itemSet);
			// return;
		}
		// (2)Ab,=First(b)-ε,直接添加终结符
		if (TextUtil.containsAb(ntSet, itemCharStr, charItem)) {
			Character alastChar = TextUtil.getAlastChar(itemCharStr, charItem);
			System.out.println("---------------find Ab:" + itemCharStr + "    " + charItem + "   =" + alastChar);
			itemSet.add(alastChar);
			followMap.put(putCharItem, itemSet);
			// return;
		}
		// (2).2AB,=First(B)-ε,=First(B)-ε,添加first集合
		if (TextUtil.containsAB(nvSet, itemCharStr, charItem)) {
			Character alastChar = TextUtil.getAlastChar(itemCharStr, charItem);
			System.out.println(
					"---------------find AB:" + itemCharStr + "    " + charItem + "   =First(" + alastChar + ")");
			TreeSet<Character> treeSet = firstMap.get(alastChar);
			itemSet.addAll(treeSet);
			if (treeSet.contains('ε')) {
				itemSet.add('#');
			}
			itemSet.remove('ε');
			followMap.put(putCharItem, itemSet);
			///////////////////////
			if (TextUtil.containsbAbIsNull(nvSet, itemCharStr, charItem, expressionMap)) {
				char tempChar = TextUtil.getAlastChar(itemCharStr, charItem);
				System.out.println("tempChar:" + tempChar + "  key" + keyCharItem);
				if (!keyCharItem.equals(charItem)) {
					System.out.println("---------------find tempChar bA: " + "tempChar:" + tempChar + keyCharItem
							+ "   " + itemCharStr + "    " + charItem + "   =Follow(" + keyCharItem + ")");
					Set<Character> keySet = expressionMap.keySet();
					for (Character keyCharItems : keySet) {
						ArrayList<String> charItemArray = expressionMap.get(keyCharItems);
						for (String itemCharStrs : charItemArray) {
							calcFollow(putCharItem, keyCharItem, keyCharItems, itemCharStrs, itemSet);
						}
					}
				}
			}
		}
		// (3)B->aA,=Follow(B),添加followB
		if (TextUtil.containsbA(nvSet, itemCharStr, charItem, expressionMap)) {
			if (!keyCharItem.equals(charItem)) {
				System.out.println("---------------find bA: " + keyCharItem + "   " + itemCharStr + "    " + charItem
						+ "   =Follow(" + keyCharItem + ")");
				Set<Character> keySet = expressionMap.keySet();
				for (Character keyCharItems : keySet) {
					ArrayList<String> charItemArray = expressionMap.get(keyCharItems);
					for (String itemCharStrs : charItemArray) {
						calcFollow(putCharItem, keyCharItem, keyCharItems, itemCharStrs, itemSet);
					}
				}
			}
		}
	}

	/**
	 * 获取Select集合
	 */
	public void getSelect() {
		// 遍历每一个表达式
		// HashMap<Character, HashMap<String, TreeSet<Character>>>
		Set<Character> keySet = expressionMap.keySet();
		for (Character selectKey : keySet) {
			ArrayList<String> arrayList = expressionMap.get(selectKey);
			// 每一个表达式
			HashMap<String, TreeSet<Character>> selectItemMap = new HashMap<String, TreeSet<Character>>();
			for (String selectExp : arrayList) {
				/**
				 * 存放select结果的集合
				 */
				TreeSet<Character> selectSet = new TreeSet<Character>();
				// set里存放的数据分3种情况,由selectExp决定
				// 1.A->ε,=follow(A)
				if (TextUtil.isEmptyStart(selectExp)) {
					selectSet = followMap.get(selectKey);
					selectSet.remove('ε');
					selectItemMap.put(selectExp, selectSet);
				}
				// 2.Nt开始,=Nt
				// <br>终结符开始
				if (TextUtil.isNtStart(ntSet, selectExp)) {
					selectSet.add(selectExp.charAt(0));
					selectSet.remove('ε');
					selectItemMap.put(selectExp, selectSet);
				}
				// 3.Nv开始,=first(Nv)
				if (TextUtil.isNvStart(nvSet, selectExp)) {
					selectSet = firstMap.get(selectKey);
					selectSet.remove('ε');
					selectItemMap.put(selectExp, selectSet);
				}
				selectMap.put(selectKey, selectItemMap);
			}
		}
	}

	/**
	 * 生成预测分析表
	 */
	public void genAnalyzeTable() throws Exception {
		Object[] ntArray = ntSet.toArray();
		Object[] nvArray = nvSet.toArray();
		// 预测分析表初始化
		analyzeTable = new String[nvArray.length + 1][ntArray.length + 1];

		// 输出一个占位符
		System.out.print("Nv/Nt" + "\t\t");
		analyzeTable[0][0] = "Nv/Nt";
		// 初始化首行
		for (int i = 0; i < ntArray.length; i++) {
			if (ntArray[i].equals('ε')) {
				ntArray[i] = '#';
			}
			System.out.print(ntArray[i] + "\t\t");
			analyzeTable[0][i + 1] = ntArray[i] + "";
		}

		System.out.println("");
		for (int i = 0; i < nvArray.length; i++) {
			// 首列初始化
			System.out.print(nvArray[i] + "\t\t");
			analyzeTable[i + 1][0] = nvArray[i] + "";
			for (int j = 0; j < ntArray.length; j++) {
				String findUseExp = TextUtil.findUseExp(selectMap, Character.valueOf((Character) nvArray[i]),
						Character.valueOf((Character) ntArray[j]));
				if (null == findUseExp) {
					System.out.print("\t\t");
					analyzeTable[i + 1][j + 1] = "";
				} else {
					System.out.print(nvArray[i] + "->" + findUseExp + "\t\t");
					analyzeTable[i + 1][j + 1] = nvArray[i] + "->" + findUseExp;
				}
			}
			System.out.println();
		}
	}
}

分析过程bean

import java.io.Serializable;

/**
 * @author PuHaiyang
 * @createTime 2016年6月12日 下午4:15:37
 * @email 761396462@qq.com
 * @function 分析过程Bean
 *
 */
public class AnalyzeProduce implements Serializable{
	private static final long serialVersionUID = 10L;
	private Integer index;
	private String analyzeStackStr;
	private String str;
	private String useExpStr;

	public Integer getIndex() {
		return index;
	}

	public void setIndex(Integer index) {
		this.index = index;
	}

	public String getAnalyzeStackStr() {
		return analyzeStackStr;
	}

	public void setAnalyzeStackStr(String analyzeStackStr) {
		this.analyzeStackStr = analyzeStackStr;
	}

	public String getStr() {
		return str;
	}

	public void setStr(String str) {
		this.str = str;
	}

	public String getUseExpStr() {
		return useExpStr;
	}

	public void setUseExpStr(String useExpStr) {
		this.useExpStr = useExpStr;
	}

}

句子分析器代码:

import java.util.ArrayList;
import java.util.Stack;

import com.zhushoumao.bianyi.pojo.AnalyzeProduce;
import com.zhushoumao.bianyi.pojo.Gs;
import com.zhushoumao.bianyi.util.TextUtil;

/**
 * @author PuHaiyang
 * @createTime 2016年6月11日 下午11:27:00
 * @email 761396462@qq.com
 * @function 句子分析器
 *
 */
public class Analyzer {

	public Analyzer() {
		super();
		analyzeStatck = new Stack<Character>();
		// 结束符进栈
		analyzeStatck.push('#');
	}

	private ArrayList<AnalyzeProduce> analyzeProduces;

	/**
	 * LL(1)文法
	 */
	private Gs ll1Gs;

	public Gs getLl1Gs() {
		return ll1Gs;
	}

	public void setLl1Gs(Gs ll1Gs) {
		this.ll1Gs = ll1Gs;
	}

	/**
	 * 开始符
	 */
	private Character startChar;

	/**
	 * 分析栈
	 */
	private Stack<Character> analyzeStatck;
	/**
	 * 剩余输入串
	 */
	private String str;
	/**
	 * 推导所用产生或匹配
	 */
	private String useExp;

	public ArrayList<AnalyzeProduce> getAnalyzeProduces() {
		return analyzeProduces;
	}

	public void setAnalyzeProduces(ArrayList<AnalyzeProduce> analyzeProduces) {
		this.analyzeProduces = analyzeProduces;
	}

	public Character getStartChar() {
		return startChar;
	}

	public void setStartChar(Character startChar) {
		this.startChar = startChar;
	}

	public Stack<Character> getAnalyzeStatck() {
		return analyzeStatck;
	}

	public void setAnalyzeStatck(Stack<Character> analyzeStatck) {
		this.analyzeStatck = analyzeStatck;
	}

	public String getStr() {
		return str;
	}

	public void setStr(String str) {
		this.str = str;
	}

	public String getUseExp() {
		return useExp;
	}

	public void setUseExp(String useExp) {
		this.useExp = useExp;
	}

	/**
	 * 分析
	 */
	public void analyze() {
		analyzeProduces = new ArrayList<AnalyzeProduce>();

		// 开始符进栈
		analyzeStatck.push(startChar);
		System.out.println("开始符:" + startChar);
		int index = 0;
		// 开始分析
		// while (analyzeStatck.peek() != '#' && str.charAt(0) != '#') {
		while (!analyzeStatck.empty()) {
			index++;
			if (analyzeStatck.peek() != str.charAt(0)) {
				// 到分析表中找到这个产生式
				String nowUseExpStr = TextUtil.findUseExp(ll1Gs.getSelectMap(), analyzeStatck.peek(), str.charAt(0));
				System.out.println(index + "\t\t\t" + analyzeStatck.toString() + "\t\t\t" + str + "\t\t\t"
						+ analyzeStatck.peek() + "->" + nowUseExpStr);
				AnalyzeProduce produce = new AnalyzeProduce();
				produce.setIndex(index);
				produce.setAnalyzeStackStr(analyzeStatck.toString());
				produce.setStr(str);
				if (null == nowUseExpStr) {
					produce.setUseExpStr("无法匹配!");
				} else {
					produce.setUseExpStr(analyzeStatck.peek() + "->" + nowUseExpStr);
				}
				analyzeProduces.add(produce);
				// 将之前的分析栈中的栈顶出栈
				analyzeStatck.pop();
				// 将要用到的表达式入栈,反序入栈
				if (null != nowUseExpStr && nowUseExpStr.charAt(0) != 'ε') {
					for (int j = nowUseExpStr.length() - 1; j >= 0; j--) {
						char currentChar = nowUseExpStr.charAt(j);
						analyzeStatck.push(currentChar);
					}
				}
				continue;
			}
			// 如果可以匹配,分析栈出栈,串去掉一位
			if (analyzeStatck.peek() == str.charAt(0)) {
				System.out.println(index + "\t\t\t" + analyzeStatck.toString() + "\t\t\t" + str + "\t\t\t" + "“"
						+ str.charAt(0) + "”匹配");
				AnalyzeProduce produce = new AnalyzeProduce();
				produce.setIndex(index);
				produce.setAnalyzeStackStr(analyzeStatck.toString());
				produce.setStr(str);
				produce.setUseExpStr("“" + str.charAt(0) + "”匹配");
				analyzeProduces.add(produce);
				analyzeStatck.pop();
				str = str.substring(1);
				continue;
			}
		}

	}

}

测试类:

public class Main {

	public static void main(String[] args) throws Exception {
		// // LL(1)文法产生集合
		ArrayList<String> gsArray = new ArrayList<String>();
		// // Vn非终结符集合
		// TreeSet<Character> nvSet = new TreeSet<Character>();
		// // Vt终结符集合
		// TreeSet<Character> ntSet = new TreeSet<Character>();
		Gs gs = new Gs();
		initGs(gsArray);
		gs.setGsArray(gsArray);
		// getNvNt(gsArray, gs.getNvSet(), gs.getNtSet());
		gs.getNvNt();
		gs.initExpressionMaps();
		gs.getFirst();
		// 设置开始符
		gs.setS('E');
		gs.getFollow();
		gs.getSelect();
		// 创建一个分析器
		Analyzer analyzer = new Analyzer();
		analyzer.setStartChar('E');
		analyzer.setLl1Gs(gs);
		analyzer.setStr("i+i*i#");
		analyzer.analyze();
		gs.genAnalyzeTable();
		System.out.println("");
	}

	/**
	 * 获取非终结符集与终结符集
	 * 
	 * @param gsArray
	 * @param nvSet
	 * @param ntSet
	 */
	private static void getNvNt(ArrayList<String> gsArray, TreeSet<Character> nvSet, TreeSet<Character> ntSet) {
		for (String gsItem : gsArray) {
			String[] nvNtItem = gsItem.split("->");
			String charItemStr = nvNtItem[0];
			char charItem = charItemStr.charAt(0);
			// nv在左边
			nvSet.add(charItem);
		}
		for (String gsItem : gsArray) {
			String[] nvNtItem = gsItem.split("->");
			// nt在右边
			String nvItemStr = nvNtItem[1];
			// 遍历每一个字
			for (int i = 0; i < nvItemStr.length(); i++) {
				char charItem = nvItemStr.charAt(i);
				if (!nvSet.contains(charItem)) {
					ntSet.add(charItem);
				}
			}
		}

	}

	/**
	 * 初始化LL(1)文法
	 * 
	 * @param gsArray
	 */
	private static void initGs(ArrayList<String> gsArray) {
		gsArray.add("D->*FD");
		gsArray.add("D->ε");
		gsArray.add("T->FD");
		gsArray.add("E->TC");
		gsArray.add("F->(E)");
		gsArray.add("F->i");
		gsArray.add("C->+TC");
		gsArray.add("C->ε");
	}

}
测试截图如下:

技术分享

Web版如下:

技术分享

技术分享

部署链接:点击打开此在线分析程序链接


用JAVA实现LL(1)文法语法分析程序

标签:

原文地址:http://blog.csdn.net/puhaiyang/article/details/51793550

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!