CODE:
#!/usr/bin/python # -*- coding: utf-8 -*- ''' Created on 2014-7-8 @author: guaguastd @name: extractWeiboEntities.py ''' if __name__ == '__main__': import json # get weibo_api to access sina api from sinaWeiboLogin import sinaWeiboLogin sinaWeiboApi = sinaWeiboLogin() # import sinaWeibo from sinaWeibo import extractWeiboEntities # import sinaWeoboStatuses from sinaWeiboStatuses import publicTimeline # get the new 200 weibo statuses = publicTimeline(sinaWeiboApi, 200) status_texts,screen_names,words = extractWeiboEntities(statuses) # Explore the first 5 items for each... print json.dumps(status_texts[0:5], indent=1) print json.dumps(screen_names[0:5], indent=1) print json.dumps(words[0:5], indent=1)
[ "[\u795e\u9a6c]2014\u590f\u5b63\u65b0\u6b3e\u5973\u88c5\u97e9\u56fd\u4e1c\u5927\u95e8\u4ee3\u8d2d \u65e0\u8896t\u6064\u5973\u4fee\u8eab\u5706\u9886\u663e\u7626\u96ea\u7eba\u4e0a\u8863 http://t.cn/RvCUVwB", "\u52ff\u5fd8\u56fd\u803b\uff0c\u632f\u5174\u4e2d\u534e\uff01\u81f3\u4eca\u65e0\u6cd5\u5fd8\u8bb0\u65e5\u5bc7\u523a\u5200\u4e0a\u7684\u5a74\u513f\uff01\uff01\uff01\uff01\uff01\uff01\uff01\u75db\u5fc3\u75be\u9996 \u6211\u5206\u4eab\u4e86http://t.cn/Rvdm1cn", "\u7626\u8138\u7684\u4ea7\u54c1\u7528\u8fc7\u597d\u591a\u597d\u591a\uff0c\u603b\u662f\u4ee5\u89c1\u4e0d\u5230\u6548\u679c\u7ed3\u5c40\uff01\u4f46\u662f\u4e00\u76f4\u8ffd\u6c42V\u8138\u7684\u5fc3\u6ca1\u6539\u53d8\u8fc7\uff01\u76f4\u5230\u6211\u627e\u5230\u4e86\u8fd9\u4e2a\u4f70\u8349\u4e16\u5bb6V\u8138\u795e\u5668\uff01[\u5fc3]\u6d82\u4e0a\u7acb\u523b\u5c31\u6709\u7d27\u81f4\u611f\uff0c\u7761\u524d\u6d82\u62b9\uff0c\u9192\u6765\u770b\u5230\u7684\u6548\u679c\u4f60\u771f\u7684\u4f1a\u5c16\u53eb\u7684\uff01[\u7231\u4f60]\u54ea\u91cc\u4e0d\u7626\u6d82\u54ea\u91cc\uff0c\u518d\u4e5f\u4e0d\u7528\u62c5\u5fc3\u6211\u7684\u5305\u5b50\u8138\u5566\uff01\u7f8e\u4e3d\u4fcf\u4f73\u4eba\u63a8\u8350\uff1a[\u4e2d\u7bad]http://t.cn/RvntLNh", "\u5a01\u6b66MAERZ2014\u6625\u88c5\u65b0\u6b3e\u7537\u88c5\u957f\u8896\u886c\u886b \u97e9\u7248\u4fee\u8eab\u7537\u58eb\u7ecf\u5178\u7eaf\u68c9\u683c\u5b50\u886c\u8863\u6f6e http://t.cn/RvCyu61", "[\u563b\u563b]2014\u590f\u88c5\u65b0\u6b3e\u5973\u58eb\u788e\u82b1\u886c\u886b\u4fee\u8eab\u5927\u7801\u957f\u8896\u7eaf\u68c9\u5370\u82b1\u886c\u8863\u97e9\u7248\u4e0a\u8863\u6f6e http://t.cn/RvCUIw5" ] [ "\u53e4\u6708\u79cb\u666f", "Lcineferit", "\u7efd\u653e\u9ec4\u8272\u7261\u4e39aa", "\u4e8c\u9505\u9505\u4e8c\u59d0\u59d0", "lang\u6d6a\u6f2b\u66f2" ] [ "[\u795e\u9a6c]2014\u590f\u5b63\u65b0\u6b3e\u5973\u88c5\u97e9\u56fd\u4e1c\u5927\u95e8\u4ee3\u8d2d", "\u65e0\u8896t\u6064\u5973\u4fee\u8eab\u5706\u9886\u663e\u7626\u96ea\u7eba\u4e0a\u8863", "http://t.cn/RvCUVwB", "\u52ff\u5fd8\u56fd\u803b\uff0c\u632f\u5174\u4e2d\u534e\uff01\u81f3\u4eca\u65e0\u6cd5\u5fd8\u8bb0\u65e5\u5bc7\u523a\u5200\u4e0a\u7684\u5a74\u513f\uff01\uff01\uff01\uff01\uff01\uff01\uff01\u75db\u5fc3\u75be\u9996", "\u6211\u5206\u4eab\u4e86http://t.cn/Rvdm1cn" ]
Python 提取新浪微博的博文中的元素(包含Text, Screen_name),布布扣,bubuko.com
Python 提取新浪微博的博文中的元素(包含Text, Screen_name)
原文地址:http://blog.csdn.net/guaguastd/article/details/37556999