标签:bsp tuples __name__ 数据 完全 not rtu open ext
# coding=gbk import os import csv import pandas as pd import shutil # 获取指定文件夹中后缀.svs的文件 def get_svsfile(path_svs): f_list = os.listdir(path_svs) for i in f_list: # os.path.splitext():分离文件名与扩展名 if os.path.splitext(i)[1] == ‘.svs‘: print(os.path.splitext(i)[0]) # 新建CSV文件并指定列名 def create_csv(path_csv): with open(path_csv,‘w‘) as csvfile: writer = csv.writer(csvfile) # 先写入columns_name csv_head = ["case_id", "slide_id", "label"] writer.writerow(csv_head) # 写入多行用writerows # writer.writerows([[0, 1, 3], [1, 2, 3], [2, 3, 4]]) # 根据某一CSV文件中的数据指定标签 def create_label_csv(path_svs, path_csv): f_list = os.listdir(path_svs) with open(path_csv,‘w‘) as csvfile: writer = csv.writer(csvfile) # 先写入columns_name csv_head = ["case_id", "slide_id", "label"] writer.writerow(csv_head) for i in f_list: # os.path.splitext():分离文件名与扩展名 if os.path.splitext(i)[1] == ‘.svs‘: # print(os.path.splitext(i)[0]) writer.writerow([‘‘,os.path.splitext(i)[0],‘‘]) def excel_to_csv(path_excel): file_excel = pd.read_excel(path_excel) file_excel.to_csv(‘meta_yunnan.csv‘) # 根据csv文件中的某一列值获取另一csv中的对应列的值 def gen_final_csv(path_csv, metadata_csv): final_csv = pd.read_csv(path_csv, engine=‘python‘) metadata = pd.read_csv(metadata_csv) # print(‘201405225‘ in metadata[[‘术前病理号‘]].values) count = 0 # final_csv[[‘slide_id‘]].itertuples() 遍历csv文件中某一列的值 for i,svsname in final_csv[[‘slide_id‘]].itertuples(): # 判断csv文件中某一列是否包含某个值 if svsname in metadata[[‘术前病理号‘]].values: # sqblh术前病理号 for j, sqblh in metadata[["术前病理号"]].itertuples(): if svsname == sqblh: count = count + 1 # 获取csv文件中指定行列的值 final_csv.loc[i, ‘case_id‘] = metadata.iloc[j][‘病理版编号‘] # print(metadata.iloc[j][‘病理版编号‘]) # print(i ,svsname) # print(metadata[[‘术前病理号‘]]) # print(final_csv[[‘slide_id‘]]) if metadata.iloc[j][‘肿瘤退缩程度(0:完全退缩,1:单个或小灶癌残余,2:部分癌残留,3:大量癌残留;4 不适合评价)‘] == 0: final_csv.loc[i, ‘label‘] = 0 else: final_csv.loc[i, ‘label‘] = 1 elif svsname not in metadata[[‘术前病理号‘]].values: # 删除csv文件中某一行的值 final_csv = final_csv.drop([i]) # 将final_csv按‘label’列排序 final_csv = final_csv.sort_values(‘label‘) print(count) # print(final_csv) final_csv.to_csv(‘_final_.csv‘) # 根据文件名读取文件夹下数据并移动到指定文件夹 def copy_file(path_svs, path_final_csv): final_csv = pd.read_csv(path_final_csv) for i,svslabel in final_csv[[‘label‘]].itertuples(): if svslabel == 0: shutil.copy(path_svs+str(final_csv.loc[i, ‘slide_id‘])+‘.svs‘, "train_folder/pcr") elif svslabel == 1: shutil.copy(path_svs + str(final_csv.loc[i, ‘slide_id‘]) + ‘.svs‘, "train_folder/non_pcr") if __name__ == ‘__main__‘: path_svs = "F:/数据/附三院直肠癌新辅助肠镜病理/lzhpCR图像/" path_final_csv = "_final_.csv" # path_csv = "_yunnan_svs_name_.csv" # metadata_csv = "meta_yunnan.csv" # # get_svsfile(path_svs) # # create_csv(path_csv) # create_label_csv(path_svs, path_csv) # file_csv = pd.read_csv(path_csv, engine=‘python‘) # print(file_csv) # excel_to_csv(‘entireData.xlsx‘) # gen_final_csv(path_csv, metadata_csv) copy_file(path_svs, path_final_csv)
标签:bsp tuples __name__ 数据 完全 not rtu open ext
原文地址:https://www.cnblogs.com/dyc99/p/14584861.html