早之前就用过Google的Protobuf做数据编码,一直没有深入理解其中的原理,最近做了一次通讯抓包,发现其中很多Protobuf编码的数据包,于是决定分析一下其中的数据包及其编码。
首先来简单介绍一下Protobuf的使用,这里以windows下java开发为例,几个步骤:编写*.proto ->使用google提供的protoc.exe生成*.java->项目中导入protobuf的.jar包进行开发即可。先看这里的*.proto文件:
package com; message CMsg { required string msghead = 1; required string msgbody = 2; } message CMsgHead { required int32 msglen = 1; required int32 msgtype = 2; required int32 msgseq = 3; required int32 termversion = 4; required int32 msgres = 5; required string termid = 6; } message CMsgReg { optional int32 area = 1; optional int32 region = 2; optional int32 shop = 3; optional int32 ret = 4; optional string termid = 5; }
使用protoc.exe生成java文件,命令如下:
将生成的Msg.java及protobuf-java-2.3.0.jar导入项目中进行开发,这里写一个服务器端ProtobufServer及客户端ProtobufClient
package com; import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; import java.io.InputStream; import java.net.ServerSocket; import java.net.Socket; import com.Msg.CMsg; import com.Msg.CMsgHead; import com.Msg.CMsgReg; public class ProtoServer implements Runnable { @Override public void run() { try { System.out.println("beign:"); ServerSocket serverSocket = new ServerSocket(12345); while (true) { System.out.println("等待接收用户连接:"); // 接受客户端请求 Socket client = serverSocket.accept(); DataOutputStream dataOutputStream; DataInputStream dataInputStream; try { InputStream inputstream = client.getInputStream(); dataOutputStream = new DataOutputStream( client.getOutputStream()); byte len[] = new byte[1024]; int count = inputstream.read(len); byte[] temp = new byte[count]; for (int i = 0; i < count; i++) { temp[i] = len[i]; } CMsg msg = CMsg.parseFrom(temp); CMsgHead head = CMsgHead.parseFrom(msg.getMsghead() .getBytes()); System.out.println("==len===" + head.getMsglen()); System.out.println("==res===" + head.getMsgres()); System.out.println("==seq===" + head.getMsgseq()); System.out.println("==type===" + head.getMsgtype()); System.out.println("==Termid===" + head.getTermid()); System.out.println("==Termversion===" + head.getTermversion()); CMsgReg body = CMsgReg.parseFrom(msg.getMsgbody() .getBytes()); System.out.println("==area==" + body.getArea()); System.out.println("==Region==" + body.getRegion()); System.out.println("==shop==" + body.getShop()); sendProtoBufBack(dataOutputStream); inputstream.close(); } catch (Exception ex) { System.out.println(ex.getMessage()); ex.printStackTrace(); } finally { client.close(); System.out.println("close"); } } } catch (IOException e) { System.out.println(e.getMessage()); } } private byte[] getProtoBufBack() { // head CMsgHead head = CMsgHead.newBuilder().setMsglen(10).setMsgtype(21) .setMsgseq(32).setTermversion(43).setMsgres(54) .setTermid("Server:head").build(); // body CMsgReg body = CMsgReg.newBuilder().setArea(11).setRegion(22) .setShop(33).setRet(44).setTermid("Server:body").build(); // Msg CMsg msg = CMsg.newBuilder() .setMsghead(head.toByteString().toStringUtf8()) .setMsgbody(body.toByteString().toStringUtf8()).build(); return msg.toByteArray(); } private void sendProtoBufBack(DataOutputStream dataOutputStream) { byte[] backBytes = getProtoBufBack(); // Integer len2 = backBytes.length; // byte[] cmdHead2 = BytesUtil.IntToBytes4(len2); try { // dataOutputStream.write(cmdHead2, 0, cmdHead2.length); dataOutputStream.write(backBytes, 0, backBytes.length); dataOutputStream.flush(); } catch (IOException e) { e.printStackTrace(); } } public static void main(String[] args) { Thread desktopServerThread = new Thread(new ProtoServer()); desktopServerThread.start(); } }
package com; import java.io.InputStream; import java.net.Socket; import com.Msg.CMsg; import com.Msg.CMsgHead; import com.Msg.CMsgReg; public class ProtoClient { public static void main(String[] args) { ProtoClient pc=new ProtoClient(); System.out.println("beign:"); pc.runget(); } public void runget() { Socket socket = null; try { //socket = new Socket("localhost", 12345); socket = new Socket("192.168.85.152", 12345); // head CMsgHead head = CMsgHead.newBuilder().setMsglen(5).setMsgtype(1) .setMsgseq(3).setTermversion(41).setMsgres(5) .setTermid("Client:head").build(); // body CMsgReg body = CMsgReg.newBuilder().setArea(11).setRegion(22) .setShop(33).setRet(44).setTermid("Clent:body").build(); // Msg CMsg msg = CMsg.newBuilder() .setMsghead(head.toByteString().toStringUtf8()) .setMsgbody(body.toByteString().toStringUtf8()).build(); // 向服务器发送信息 System.out.println("sendMsg..."); msg.writeTo(socket.getOutputStream()); // 接受服务器的信息 InputStream input = socket.getInputStream(); System.out.println("recvMsg:"); byte[] by = recvMsg(input); printMsg(CMsg.parseFrom(by)); input.close(); socket.close(); } catch (Exception e) { System.out.println(e.toString()); } } public void printMsg(CMsg g) { try { CMsgHead h = CMsgHead.parseFrom(g.getMsghead().getBytes()); StringBuffer sb = new StringBuffer(); if (h.hasMsglen()) sb.append("==msglen===" + h.getMsglen() + "\n"); if (h.hasMsgres()) sb.append("==msgres===" + h.getMsgres() + "\n"); if (h.hasMsgseq()) sb.append("==msgseq===" + h.getMsgseq() + "\n"); if (h.hasMsgtype()) sb.append("==msgtype===" + h.getMsgtype() + "\n"); if (h.hasTermid()) sb.append("==termid===" + h.getTermid() + "\n"); if (h.hasTermversion()) sb.append("==termversion===" + h.getTermversion() + "\n"); CMsgReg bo = CMsgReg.parseFrom(g.getMsgbody().getBytes()); if (bo.hasArea()) sb.append("==area==" + bo.getArea() + "\n"); if (bo.hasRegion()) sb.append("==region==" + bo.getRegion() + "\n"); if (bo.hasShop()) sb.append("==shop==" + bo.getShop() + "\n"); if (bo.hasRet()) sb.append("==ret==" + bo.getRet() + "\n"); if (bo.hasTermid()) sb.append("==termid==" + bo.getTermid() + "\n"); System.out.println(sb.toString()); } catch (Exception e) { e.printStackTrace(); } } public byte[] recvMsg(InputStream inpustream) { byte[] temp = null; try { byte len[] = new byte[1024]; int count = inpustream.read(len); temp = new byte[count]; for (int i = 0; i < count; i++) { temp[i] = len[i]; } return temp; } catch (Exception e) { System.out.println(e.toString()); return temp; } } }
运行结果:
在上面socket通信过程中我使用了wireshark对其进行抓包,结果分析如下图
由上图我们可以很清楚的看到,protobuf编码其实类似tlv(tag length value)编码,其内部就是(tag, length, value)的组合,其中tag由(field_number<<3)|wire_type计算得出,field_number由我们在proto文件中定义,wire_type由protobuf根据proto中定义的字段类型决定,length长度采用一种叫做Varint 的数字表示方法,它是一种紧凑的表示数字的方法,用一个或多个字节来表示一个数字,值越小的数字使用越少的字节数,具体细节可以谷歌Varint。总之Protobuf 序列化后所生成的二进制消息非常紧凑,这得益于 Protobuf 采用了上面的 Encoding 方法。
参考文献:http://www.ibm.com/developerworks/cn/linux/l-cn-gpb/
原文地址:http://blog.csdn.net/wangqiuyun/article/details/42119835