码迷,mamicode.com
首页 > 系统相关 > 详细

进程示例

时间:2020-05-26 18:12:38      阅读:86      评论:0      收藏:0      [点我收藏+]

标签:number   file_path   address   sep   pen   entity   warnings   dex   res   

import pandas as pd
import warnings
warnings.filterwarnings("ignore")
import time
from multiprocessing import Process
def node():
    #获取节点数据
    global_id=[]
    order_number=[]

    with open("./entity_csv/yinni_kg_intopieces.csv",encoding="utf-8") as f:
        lines=f.readlines()
        for line in lines:
            line=line.replace("\n","")
            intopiece_list=line.split(",")
            intopiece_list=[x.replace(","") for x in intopiece_list ]
            global_id.append(intopiece_list[0])
            order_number.append(intopiece_list[2])            
        df_node=pd.DataFrame({
            "global_id":global_id,
            "order_number":order_number,

        })

        print(df_node.shape[0],df_node.head())
        return df_node
def edge(df_node,column):
    #获取边数据
    start_id=[]
    end_id=[]
    file_path="./relations_csv/yinni_kg_intopieces_link_by_"+column+".csv"
    with open(file_path,encoding="utf-8") as f:
        lines=f.readlines()
        for line in lines:
            line=line.replace("\n","")
            intopiece_list=line.split(",")
            intopiece_list=[x.replace(","") for x in intopiece_list ]
            start_id.append(intopiece_list[0])
            end_id.append(intopiece_list[2])            
        df_edge_0=pd.DataFrame({
            "start_id":start_id,
            "end_id":end_id,

        })

    #     print(df_edge_0.shape[0],df_edge_0.head())
        df_edge_1=df_edge_0.merge(df_node,how="left",left_on="start_id",right_on="global_id")
        df_edge_1.rename(columns={"order_number":"start_order_number"},inplace=True)

        df_edge_2=df_edge_1.merge(df_node,how="left",left_on="end_id",right_on="global_id")
        df_edge_2.rename(columns={"order_number":"end_order_number"},inplace=True)
        #删除同样类型的多条边
        df_edge_2.drop_duplicates(inplace=True)        
        df_edge=df_edge_2[[start_order_number, end_order_number]]
        print(df_edge.shape[0],df_edge.head())
        file_path=column+".txt"
        df_edge.to_csv(file_path,encoding="utf-8",index=False,sep="|")
def main():
    df_node=node() 
    ps=[]
 
    #创建子进程实例
    labelList =["bank","link_phone","person_address"]
    for i in range(3):
        p=Process(target=edge,args=(df_node,labelList[i]))
        ps.append(p)
    #开启进程
    for i in range(3):
        ps[i].start()
 
    #阻塞进程
    for i in range(3):
        ps[i].join()
 
if __name__=="__main__":
    start=time.time()
    main()   
    end=time.time()
    print(end-start)  #2 58.53214192390442  3 56.898120164871216

 

进程示例

标签:number   file_path   address   sep   pen   entity   warnings   dex   res   

原文地址:https://www.cnblogs.com/hapyygril/p/12966767.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!