标签:number file_path address sep pen entity warnings dex res
import pandas as pd import warnings warnings.filterwarnings("ignore") import time from multiprocessing import Process def node(): #获取节点数据 global_id=[] order_number=[] with open("./entity_csv/yinni_kg_intopieces.csv",encoding="utf-8") as f: lines=f.readlines() for line in lines: line=line.replace("\n","") intopiece_list=line.split(",") intopiece_list=[x.replace(‘"‘,"") for x in intopiece_list ] global_id.append(intopiece_list[0]) order_number.append(intopiece_list[2]) df_node=pd.DataFrame({ "global_id":global_id, "order_number":order_number, }) print(df_node.shape[0],df_node.head()) return df_node def edge(df_node,column): #获取边数据 start_id=[] end_id=[] file_path="./relations_csv/yinni_kg_intopieces_link_by_"+column+".csv" with open(file_path,encoding="utf-8") as f: lines=f.readlines() for line in lines: line=line.replace("\n","") intopiece_list=line.split(",") intopiece_list=[x.replace(‘"‘,"") for x in intopiece_list ] start_id.append(intopiece_list[0]) end_id.append(intopiece_list[2]) df_edge_0=pd.DataFrame({ "start_id":start_id, "end_id":end_id, }) # print(df_edge_0.shape[0],df_edge_0.head()) df_edge_1=df_edge_0.merge(df_node,how="left",left_on="start_id",right_on="global_id") df_edge_1.rename(columns={"order_number":"start_order_number"},inplace=True) df_edge_2=df_edge_1.merge(df_node,how="left",left_on="end_id",right_on="global_id") df_edge_2.rename(columns={"order_number":"end_order_number"},inplace=True) #删除同样类型的多条边 df_edge_2.drop_duplicates(inplace=True) df_edge=df_edge_2[[‘start_order_number‘, ‘end_order_number‘]] print(df_edge.shape[0],df_edge.head()) file_path=column+".txt" df_edge.to_csv(file_path,encoding="utf-8",index=False,sep="|") def main(): df_node=node() ps=[] #创建子进程实例 labelList =["bank","link_phone","person_address"] for i in range(3): p=Process(target=edge,args=(df_node,labelList[i])) ps.append(p) #开启进程 for i in range(3): ps[i].start() #阻塞进程 for i in range(3): ps[i].join() if __name__=="__main__": start=time.time() main() end=time.time() print(end-start) #2 58.53214192390442 3 56.898120164871216
标签:number file_path address sep pen entity warnings dex res
原文地址:https://www.cnblogs.com/hapyygril/p/12966767.html