标签:
# -*- coding:utf-8 -*- """ 从文件导入分类 根据行首制表符或空格确定层级关系(4个空格等于一个制表符 同一行制表符和空格不能混用 ) 必须是 utf-8编码 -------test.txt------- 鞋子 运动鞋 凉鞋 拖鞋 靴子 体育 羽毛球 单打 双打 游泳 ---------------------- """ import redis import MySQLdb import json class Category(object): cache = redis.StrictRedis(host=‘localhost‘,port = 6379,db=0) conn = MySQLdb.Connect(host=‘192.168.1.222‘, user=‘root‘, passwd=‘123456‘, db=‘gmian‘,charset=‘utf8‘) """ 打开文件 读取每一行进行处理 """ def run(self,category_file): f = open(category_file,‘r‘) while True: #print isinstance(line, unicode) # 检测是否为unicode #print unicode(line,"utf-8") line = f.readline() if line: self.writeCate(line) else: break f.close() #删除缓存 def __del__(self): keys = self.__class__.cache.smembers(‘category_keys‘) for key in keys: self.__class__.cache.delete(key) self.__class__.cache.delete(‘category_cateid‘) self.__class__.cache.delete(‘category_keys‘) #获取自动增长的ID @classmethod def getincr(cls): return int(cls.cache.incr(‘category_cateid‘)) + DB_AUTO_INSERT # 把分类添加进按层级分别的列表中 @classmethod def add(cls,num,catename): id = cls.getincr() cls.insert_db(id,num,catename) cls.cache.sadd(‘category_keys‘,‘category_level_‘+str(num)) cls.cache.lpush(‘category_level_‘+str(num),json.dumps([id,catename])) #获取父类 @classmethod def getparentid(cls,num): parent = cls.cache.lindex(‘category_level_‘+str(num-1),0) if not parent: raise NameError(‘STOP !! 没找到父类‘) parent = json.loads(parent) return parent[0] @classmethod def writeCate(cls,line): num,catename = cls.getLevelNum(line) cls.add(num,catename) @classmethod def insert_db(cls,id,num,catename): if num: pid = cls.getparentid(num) else: pid = 0 cursor = cls.conn.cursor() cursor.execute("insert into gm_category (cat_id,cat_name,parent_id,style,is_top_style,is_top_show,cat_ico)value(%s,%s,%s,%s,%s,%s,%s)" , (id,catename,pid,‘‘,0,0,‘‘)) cls.conn.commit() # 要么全部是 \t 要么全部是空格 空格4个为一个单位 @staticmethod def getLevelNum(str): i = 0 while str[i] == ‘\t‘: i+=1 if i: return i,str[i:] while str[i] == ‘ ‘: i+=1 if not i: return 0,str return i/4,str[i:] if __name__ == ‘__main__‘: # >$ python category.py 153(DB_AUTO_INSERT) cate1.txt cate2.txt ... import sys try: DB_AUTO_INSERT = int(sys.argv[1]) # 已存在的最大ID 否则ID重复 插入失败 files = sys.argv[2:] if not files: raise IndexError(‘input category file‘) except IndexError as e: raise ValueError(‘---------argv errors-------‘) cate = Category() for file in files: try: cate.run(file) except Exception as e: print e print u"【" +file+ u"】导入失败"
标签:
原文地址:http://www.cnblogs.com/cgjcgs/p/5689081.html