实现一个简单地json解析器。
两部分组成,词法分析、语法分析
词法分析
package com.mahuan.json; import java.util.LinkedList; import java.util.List; /** * 词法分析 */ public class Tokenizer { // 待分析的字符串 private String json; // 读取字符时的索引位置 private int index = 0; // 词法分析结果列表 private List<Token> tokens = new LinkedList<Token>(); // 获取词法分析结果时的索引位置 private int tokenIndex = 0; /** * 构造函数,触发词法分析 * @param json * @throws Exception */ public Tokenizer(String json) throws Exception { this.json = json; this.init(); } /** * 读取字符串中的字符,索引位置加1 * @return */ private Character read() { if (index < json.length()) return json.charAt(index++); else return null; } /** * 读取字符串中的字符,索引位置减1 */ private void unread() { index--; } /** * 进行词法分析 * @throws Exception */ private void init() throws Exception { Token token = null; while ((token = token()) != null) { tokens.add(token); } } /** * 按顺序读取字符串,获取词法分析结果 * @return * @throws Exception */ private Token token() throws Exception { Character c = read(); if (c == null) return null; // 忽略空白字符、换行符等 while (isSpace(c)) { c = read(); } if (isNull(c)) return new Token(TokenType.Null, null); if (c == ‘{‘) return new Token(TokenType.ObjectStart, "{"); if (c == ‘}‘) return new Token(TokenType.ObjectEnd, "}"); if (c == ‘[‘) return new Token(TokenType.ArrayStart, "["); if (c == ‘]‘) return new Token(TokenType.ArrayEnd, "]"); if (c == ‘,‘) return new Token(TokenType.Comma, ","); if (c == ‘:‘) return new Token(TokenType.Colon, ":"); if (isTrue(c)) return new Token(TokenType.Boolean, "true"); if (isFalse(c)) return new Token(TokenType.Boolean, "false"); if (c == ‘"‘) return new Token(TokenType.String, readString()); if (isNum(c)) { unread(); return new Token(TokenType.Number, readNum()); } throw new Exception(""); } /** * 读取字符串 * @return */ private String readString() { char c = read(); StringBuffer sb = new StringBuffer(); while (c != ‘"‘) { sb.append(c); if (isEscape(c)) { c = read(); sb.append(c); } c = read(); } return sb.toString(); } /** * 读取数字,还未考虑所有数字表达形式 * @return */ private String readNum() { char c = read(); StringBuffer sb = new StringBuffer(); while (c != ‘"‘ && c != ‘:‘ && c != ‘,‘ && c != ‘]‘ && c != ‘}‘) { sb.append(c); c = read(); } unread(); return sb.toString(); } /** * 判断是否为数字开头的特征 * @param c * @return */ private boolean isNum(char c) { if (c == ‘-‘ || (‘0‘ <= c && c <= ‘9‘)) return true; return false; } /** * 判断是否为转义字符 * @param c * @return */ private boolean isEscape(char c) { if (c == ‘\\‘) return true; return false; } /** * 是否为true字符串 * @param c * @return * @throws Exception */ private boolean isTrue(char c) throws Exception { if (c == ‘t‘) { c = read(); if (c == ‘r‘) { c = read(); if (c == ‘u‘) { c = read(); if (c == ‘e‘) { return true; } else { throw new Exception("Invalid JSON input."); } } else { throw new Exception("Invalid JSON input."); } } else { throw new Exception("Invalid JSON input."); } } else { return false; } } /** * 是否为false字符串 * @param c * @return * @throws Exception */ private boolean isFalse(char c) throws Exception { if (c == ‘f‘) { c = read(); if (c == ‘a‘) { c = read(); if (c == ‘l‘) { c = read(); if (c == ‘s‘) { c = read(); if (c == ‘e‘) { return true; } else { throw new Exception("Invalid JSON input."); } } else { throw new Exception("Invalid JSON input."); } } else { throw new Exception("Invalid JSON input."); } } else { throw new Exception("Invalid JSON input."); } } else { return false; } } /** * 是否为null字符串 * @param c * @return * @throws Exception */ private boolean isNull(char c) throws Exception { if (c == ‘n‘) { c = read(); if (c == ‘u‘) { c = read(); if (c == ‘l‘) { c = read(); if (c == ‘l‘) { return true; } else { throw new Exception("Invalid JSON input."); } } else { throw new Exception("Invalid JSON input."); } } else { throw new Exception("Invalid JSON input."); } } else { return false; } } /** * 是否为空字符 * @param c * @return */ private boolean isSpace(char c) { if (c == ‘\t‘) return true; if (c == ‘\n‘) return true; if (c == ‘\r‘) return true; if (c == ‘\0‘) return true; if (c == ‘ ‘) return true; return false; } /** * 获取词法分析的下一个结果 * @return */ public Token next() { if (tokenIndex + 1 < tokens.size()) return tokens.get(++tokenIndex); return null; } /** * 获取当前位置的词法分析结果 * @return */ public Token get() { if (tokenIndex < tokens.size()) return tokens.get(tokenIndex); return null; } } /** * 词法分析类型 */ enum TokenType { // object开始 ObjectStart, // object结束 ObjectEnd, // 数组开始 ArrayStart, // 数组结束 ArrayEnd, // 字符串 String, // 数字 Number, // boolean Boolean, // 空 Null, // , Comma, // : Colon } /** * 词法分析单元 * @author mahuan * @version 2017年12月13日 */ class Token { public TokenType type; public String value; public Token(TokenType type, String value) { this.type = type; this.value = value; } }
语法分析
package com.mahuan.json; import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; /** * 语法分析 */ public class Parser { /** * 分析Object,使用map数据结构标识 * @param tokenizer * @return * @throws Exception */ public static Map<String, Object> parserObject(Tokenizer tokenizer) throws Exception { Map<String, Object> map = new HashMap<>(); Token token = null; while (true) { token = tokenizer.get(); if (token.type == TokenType.ObjectEnd) break; if (token.type == TokenType.ObjectStart) { tokenizer.next(); continue; } if (token.type == TokenType.Comma) { tokenizer.next(); continue;// 跳过, } String key = token.value; token = tokenizer.next(); if (token.type != TokenType.Colon) throw new Exception(); tokenizer.next(); map.put(key, parserValue(tokenizer)); } return map; } /** * 分析Array,使用list数据结构标识 * @param tokenizer * @return * @throws Exception */ public static List<Object> parserArray(Tokenizer tokenizer) throws Exception { List<Object> list = new LinkedList<>(); Token token = null; while (true) { token = tokenizer.get(); if (token.type == TokenType.ArrayEnd) break; if (token.type == TokenType.ArrayStart) { tokenizer.next(); continue; } if (token.type == TokenType.Comma) { tokenizer.next(); continue; } list.add(parserValue(tokenizer)); } return list; } /** * 分析值,根据token再判断值的具体类型 * @param tokenizer * @return * @throws Exception */ public static Object parserValue(Tokenizer tokenizer) throws Exception { Token token = tokenizer.get(); try { if (token.type == TokenType.ObjectStart) return parserObject(tokenizer); else if (token.type == TokenType.ArrayStart) return parserArray(tokenizer); else if (token.type == TokenType.Boolean) return Boolean.valueOf(token.value); else if (token.type == TokenType.String) return token.value; else if (token.type == TokenType.Number) return token.value; else if (token.type == TokenType.Null) return null; throw new Exception(""); } finally { // object和array分析完后,要跳过其end的token // 其他类型分析完后,要跳过自身 tokenizer.next(); } } }
测试代码
package com.mahuan.json; public class Test { public static void main(String[] args) throws Exception { String json = "{ \"success\": true, \"message\": \"123\", \"result\": [ -2146464718]}"; Tokenizer tokenizer = new Tokenizer(json); Object map = Parser.parserValue(tokenizer); System.out.println(map); } }