标签:
1 # -*- coding: utf-8 -*- 2 # __author__ = ‘JieYap‘ 3 from biocluster.agent import Agent 4 from biocluster.tool import Tool 5 import os 6 import types 7 import subprocess 8 from biocluster.core.exceptions import OptionError 9 10 11 class OtunetworkAgent(Agent): 12 """ 13 需要calc_otu_network.py 14 version 1.0 15 author: JieYao 16 last_modified:2016.8.1 17 """ 18 19 def __init__(self, parent): 20 super(OtunetworkAgent, self).__init__(parent) 21 options = [ 22 {"name": "otutable", "type": "infile", "format": "meta.otu.otu_table, meta.otu.tax_summary_dir"}, 23 {"name": "level", "type": "string", "default": "otu"}, 24 {"name": "envtable", "type": "infile", "format": "meta.otu.group_table"}, 25 {"name": "envlabs", "type": "string", "default": ""} 26 ] 27 self.add_option(options) 28 self.step.add_steps(‘OtunetworkAnalysis‘) 29 self.on(‘start‘, self.step_start) 30 self.on(‘end‘, self.step_end) 31 32 def step_start(self): 33 self.step.OtunetworkAnalysis.start() 34 self.step.update() 35 36 def step_end(self): 37 self.step.OtunetworkAnalysis.finish() 38 self.step.update() 39 40 def gettable(self): 41 """ 42 根据输入的otu表和分类水平计算新的otu表 43 :return: 44 """ 45 if self.option(‘otutable‘).format == "meta.otu.tax_summary_dir": 46 return self.option(‘otutable‘).get_table(self.option(‘level‘)) 47 else: 48 return self.option(‘otutable‘).prop[‘path‘] 49 50 def check_options(self): 51 """ 52 重写参数检查 53 """ 54 if not self.option(‘otutable‘).is_set: 55 raise OptionError(‘必须提供otu表‘) 56 self.option(‘otutable‘).get_info() 57 if self.option(‘otutable‘).prop[‘sample_num‘] < 2: 58 raise OptionError(‘otu表的样本数目少于2,不可进行网络分析‘) 59 if self.option(‘envtable‘).is_set: 60 self.option(‘envtable‘).get_info() 61 if self.option(‘envlabs‘): 62 labs = self.option(‘envlabs‘).split(‘,‘) 63 for lab in labs: 64 if lab not in self.option(‘envtable‘).prop[‘group_scheme‘]: 65 raise OptionError(‘envlabs中有不在物种(环境因子)表中存在的因子:%s‘ % lab) 66 else: 67 pass 68 if len(self.option(‘envtable‘).prop[‘sample‘]) < 2: 69 raise OptionError(‘物种(环境因子)表的样本数目少于2,不可进行网络分析‘) 70 samplelist = open(self.gettable()).readline().strip().split(‘\t‘)[1:] 71 if self.option(‘envtable‘).is_set: 72 self.option(‘envtable‘).get_info() 73 if len(self.option(‘envtable‘).prop[‘sample‘]) > len(samplelist): 74 raise OptionError(‘OTU表中的样本数量:%s少于物种(环境因子)表中的样本数量:%s‘ % (len(samplelist), 75 len(self.option(‘envtable‘).prop[‘sample‘]))) 76 for sample in self.option(‘envtable‘).prop[‘sample‘]: 77 if sample not in samplelist: 78 raise OptionError(‘物种(环境因子)表的样本中存在OTU表中未知的样本%s‘ % sample) 79 table = open(self.gettable()) 80 if len(table.readlines()) < 4 : 81 raise OptionError(‘数据表信息少于3行‘) 82 table.close() 83 return True 84 85 def set_resource(self): 86 """ 87 设置所需资源 88 """ 89 self._cpu = 2 90 self._memory = ‘‘ 91 92 def end(self): 93 result_dir = self.add_upload_dir(self.output_dir) 94 result_dir.add_relpath_rules([ 95 [".", "", "OTU网络分析结果输出目录"], 96 ["./real_node_table.txt", "txt", "OTU网络节点属性表"], 97 ["./real_edge_table.txt", "txt", "OTU网络边集属性表"], 98 ["./real_dc_otu_degree.txt", "txt", "OTU网络OTU节点度分布表"], 99 ["./real_dc_sample_degree.txt", "txt", "OTU网络sample节点度分布表"], 100 ["./real_dc_sample_otu_degree.txt", "txt", "OTU网络节点度分布表"], 101 ["./network_centrality.txt", "txt", "OTU网络中心系数表"], 102 ["./network_attributes.txt", "txt", "OTU网络单值属性表"], 103 ]) 104 print self.get_upload_files() 105 super(OtunetworkAgent, self).end() 106 107 108 class OtunetworkTool(Tool): 109 def __init__(self, config): 110 super(OtunetworkTool, self).__init__(config) 111 self._version = "1.0.1" 112 self.cmd_path = self.config.SOFTWARE_DIR + ‘/bioinfo/meta/scripts/calc_otu_network.py‘ 113 self.env_table = self.get_new_env() 114 self.otu_table = self.get_otu_table() 115 self.out_files = [‘real_node_table.txt‘, ‘real_edge_table.txt‘, ‘real_dc_otu_degree.txt‘, ‘real_dc_sample_degree.txt‘, ‘real_dc_sample_otu_degree.txt‘, ‘network_centrality.txt‘, ‘network_attributes.txt‘] 116 117 118 def get_otu_table(self): 119 """ 120 根据调用的level参数重构otu表 121 :return: 122 """ 123 if self.option(‘otutable‘).format == "meta.otu.tax_summary_dir": 124 otu_path = self.option(‘otutable‘).get_table(self.option(‘level‘)) 125 else: 126 otu_path = self.option(‘otutable‘).prop[‘path‘] 127 if self.option(‘envtable‘).is_set: 128 return self.filter_otu_sample(otu_path, self.option(‘envtable‘).prop[‘sample‘], 129 os.path.join(self.work_dir, ‘temp_filter.otutable‘)) 130 else: 131 return otu_path 132 133 def filter_otu_sample(self, otu_path, filter_samples, newfile): 134 if not isinstance(filter_samples, types.ListType): 135 raise Exception(‘过滤otu表样本的样本名称应为列表‘) 136 try: 137 with open(otu_path, ‘rb‘) as f, open(newfile, ‘wb‘) as w: 138 one_line = f.readline() 139 all_samples = one_line.rstrip().split(‘\t‘)[1:] 140 if not ((set(all_samples) & set(filter_samples)) == set(filter_samples)): 141 raise Exception(‘提供的过滤样本集合中存在otu表中不存在的样本all:%s,filter_samples:%s‘ % (all_samples, filter_samples)) 142 if len(all_samples) == len(filter_samples): 143 return otu_path 144 samples_index = [all_samples.index(i) + 1 for i in filter_samples] 145 w.write(‘OTU\t‘ + ‘\t‘.join(filter_samples) + ‘\n‘) 146 for line in f: 147 all_values = line.rstrip().split(‘\t‘) 148 new_values = [all_values[0]] + [all_values[i] for i in samples_index] 149 w.write(‘\t‘.join(new_values) + ‘\n‘) 150 return newfile 151 except IOError: 152 raise Exception(‘无法打开OTU相关文件或者文件不存在‘) 153 154 def get_new_env(self): 155 """ 156 根据envlabs生成新的envtable 157 """ 158 if self.option(‘envlabs‘): 159 new_path = self.work_dir + ‘/temp_env_table.xls‘ 160 self.option(‘envtable‘).sub_group(new_path, self.option(‘envlabs‘).split(‘,‘)) 161 return new_path 162 else: 163 return self.option(‘envtable‘).path 164 165 def run(self): 166 """ 167 运行 168 """ 169 super(OtunetworkTool, self).run() 170 self.run_otu_network_py() 171 172 def formattable(self, tablepath): 173 alllines = open(tablepath).readlines() 174 if alllines[0][0] == ‘#‘: 175 newtable = open(os.path.join(self.work_dir, ‘temp_format.table‘), ‘w‘) 176 newtable.write(alllines[0].lstrip(‘#‘)) 177 newtable.writelines(alllines[1:]) 178 newtable.close() 179 return os.path.join(self.work_dir, ‘temp_format.table‘) 180 else: 181 return tablepath 182 183 def run_otu_network_py(self): 184 """ 185 运行calc_otu_network.py 186 """ 187 real_otu_path = self.formattable(self.otu_table) 188 cmd = self.config.SOFTWARE_DIR + ‘/program/Python/bin/python ‘ 189 cmd += self.cmd_path 190 cmd += ‘ -i %s -o %s‘ % (real_otu_path, self.work_dir + ‘/otu_network‘) 191 if self.option(‘envtable‘).is_set: 192 cmd += ‘ -m %s‘ % (self.env_table) 193 self.logger.info(‘开始运行calc_otu_network生成OTU网络并进行计算‘) 194 195 try: 196 subprocess.check_output(cmd, shell=True) 197 self.logger.info(‘OTU_Network计算完成‘) 198 except subprocess.CalledProcessError: 199 self.logger.info(‘OTU_Network计算失败‘) 200 self.set_error(‘运行calc_otu_network.py失败‘) 201 allfiles = self.get_filesname() 202 for i in range(len(self.out_files)): 203 self.linkfile(allfiles[i], self.out_files[i]) 204 self.end() 205 206 def linkfile(self, oldfile, newname): 207 """ 208 link文件到output文件夹 209 :param oldfile: 资源文件路径 210 :param newname: 新的文件名 211 :return: 212 """ 213 newpath = os.path.join(self.output_dir, newname) 214 if os.path.exists(newpath): 215 os.remove(newpath) 216 os.link(oldfile, newpath) 217 218 def get_filesname(self): 219 files_status = [None, None, None, None, None, None, None] 220 for paths,d,filelist in os.walk(self.work_dir + ‘/otu_network‘): 221 for filename in filelist: 222 name = os.path.join(paths, filename) 223 for i in range(len(self.out_files)): 224 if self.out_files[i] in name: 225 files_status[i] = name 226 for i in range(len(self.out_files)): 227 if not files_status[i]: 228 self.set_error(‘未知原因,结果文件生成出错或丢失‘) 229 return files_status
1 # -*- coding: utf-8 -*- 2 # __author__ = ‘JieYao‘ 3 import os 4 import argparse 5 from biocluster.config import Config 6 import shutil 7 import networkx 8 9 def make_env_table(inFile, outFile): 10 with open(inFile, "r") as tmp_file: 11 samples_name = tmp_file.readline().rstrip().split(‘\t‘) 12 with open(‘group.txt‘ , "w") as tmp_file: 13 tmp_file.write("#sample\tgroup\n") 14 for i in range(1,len(samples_name)): 15 tmp_file.write(samples_name[i]+"\tSTD\n") 16 return ‘./group.txt‘ 17 18 parser = argparse.ArgumentParser(description=‘输入OTU表格,生成OTU网络信息‘) 19 parser.add_argument(‘-i‘, "--otu_matrix", help="输入的OTU表", required = True) 20 parser.add_argument(‘-o‘, "--output", help="输出文件位置", required = True) 21 parser.add_argument(‘-m‘, "--env_table", help="样本分类表", required = False) 22 args = vars(parser.parse_args()) 23 24 flag = False 25 inFile = args["otu_matrix"] 26 outFile = args["output"] 27 if not args["env_table"]: 28 env_table = make_env_table(inFile, outFile) 29 flag = True 30 else: 31 env_table = args["env_table"] 32 if os.path.exists(outFile): 33 shutil.rmtree(outFile) 34 35 """ 36 执行make_otu_network.py 计算otu网络的相关信息并生成文件 37 完成后由于make_otu_network.py生成的是一个文件夹,使用os和shutil的命令将文件全部移动到输出路径下 38 """ 39 command = Config().SOFTWARE_DIR + ‘/program/Python/bin/python ‘ 40 command += Config().SOFTWARE_DIR + ‘/program/Python/bin/make_otu_network.py‘ 41 command += ‘ -i %s -o %s -m %s‘ %(inFile, outFile, env_table) 42 os.system(command) 43 if flag: 44 os.remove("./group.txt") 45 for paths,d,filelist in os.walk(outFile): 46 for filename in filelist: 47 name = os.path.join(paths, filename) 48 if "reduced" in name: 49 os.remove(name) 50 elif "/otu_network/" in name: 51 shutil.move(name, outFile) 52 shutil.rmtree(outFile + ‘/otu_network‘) 53 for paths,d,filelist in os.walk(outFile): 54 for filename in filelist: 55 name = os.path.join(paths, filename) 56 if "props" in name: 57 os.remove(name) 58 59 """ 60 根据node表建立{节点名字---节点编号}的字典 61 """ 62 node_name = [""] 63 node_dictionary = {} 64 with open(outFile + ‘/real_node_table.txt‘, "r") as node_file: 65 informations = node_file.readlines() 66 for i in range(1, len(informations)): 67 tmp = informations[i].rstrip().split("\t") 68 node_dictionary[tmp[0]] = i 69 node_name += [tmp[0]] 70 """ 71 开始使用Networkx包建立图 72 计算OTU网络的属性信息 73 """ 74 G = networkx.Graph() 75 with open(outFile + "/real_edge_table.txt" , "r") as edge_file: 76 informations = edge_file.readlines() 77 for i in range(1, len(informations)): 78 tmp = informations[i].rstrip().split("\t") 79 G.add_edge(node_dictionary[tmp[0]], node_dictionary[tmp[1]], weight = eval(tmp[2])) 80 81 82 """ 83 用实践测试单独对Sample或者是OTU构图的构图方法, 84 结果证明这样的构图出来的结果基本上Sample是完全图, 85 OTU单独构图的意义则不大,所以这种想法……失败了。 86 """ 87 """ 88 H = networkx.Graph() 89 with open(outFile + "/real_node_table.txt" , "r") as node_file: 90 informations = node_file.readlines() 91 for i in range(1,len(informations)): 92 tmp = informations[i].rstrip().split("\t") 93 if tmp[2] == "otu_node": 94 break 95 position = i 96 for i in range(position): 97 for j in range(position): 98 H.add_edge(i,j,weight=0) 99 for k in range(position,len(G)): 100 if G.get_edge_data(i,k) and G.get_edge_data(j,k): 101 H.edge[i][j][‘weight‘] += min(G.edge[i][k][‘weight‘]+G.edge[j][k][‘weight‘]) 102 if H.edge[i][j][‘weight‘] == 0: 103 H.remove_edge(i,j) 104 105 minx = 32767 106 for i in range(position): 107 for j in range(position): 108 if (i in H)and(j in H)and(H.get_edge_data(i,j)): 109 minx = min(minx, H.edge[i][j][‘weight‘]) 110 111 for i in range(position): 112 for j in range(position): 113 if (i in H)and(j in H)and(H.get_edge_data(i,j)): 114 H.edge[i][j][‘weight‘] -= minx 115 if H.edge[i][j][‘weight‘] <=0: 116 H.remove_edge(i,j) 117 print H.edges() 118 119 H = networkx.Graph() 120 with open(outFile + "/real_node_table.txt" , "r") as node_file: 121 informations = node_file.readlines() 122 for i in range(1,len(informations)): 123 tmp = informations[i].rstrip().split("\t") 124 if tmp[2] == "otu_node": 125 break 126 position = i 127 for i in range(position,len(G)): 128 for j in range(position,len(G)): 129 H.add_edge(i,j,weight=0) 130 for k in range(position): 131 if G.get_edge_data(i,k) and G.get_edge_data(j,k): 132 H.edge[i][j][‘weight‘] += 1 133 if H.edge[i][j][‘weight‘] == 0: 134 H.remove_edge(i,j) 135 print len(H) 136 print len(H.edges()) 137 print H.edges() 138 139 minx = 32767 140 for i in range(position,len(G)): 141 for j in range(position,len(G)): 142 if (i in H)and(j in H)and(H.get_edge_data(i,j)): 143 minx = min(minx, H.edge[i][j][‘weight‘]) 144 145 for i in range(position): 146 for j in range(position): 147 if (i in H)and(j in H)and(H.get_edge_data(i,j)): 148 H.edge[i][j][‘weight‘] -= minx 149 if H.edge[i][j][‘weight‘] <=0: 150 H.remove_edge(i,j) 151 """ 152 153 """3计算属性表,分本3""" 154 155 #节点度中心系数,表示节点在图中的重要性 156 Degree_Centrality = networkx.degree_centrality(G) 157 #节点距离中心系数,值越大表示到其他节点距离越近,中心性越高 158 Closeness_Centrality = networkx.closeness_centrality(G) 159 #节点介数中心系数,值越大表示通过该节点的最短路径越多,中心性越高 160 Betweenness_Centrality = networkx.betweenness_centrality(G) 161 with open(os.path.join(args["output"], "network_centrality.txt"), "w") as tmp_file: 162 tmp_file.write("Node_ID\tNode_Name\tDegree_Centrality\t") 163 tmp_file.write("Closeness_Centrality\tBetweenness_Centrality\n") 164 for i in range(1, len(G)+1): 165 tmp_file.write(str(i)+"\t"+node_name[i]+"\t") 166 tmp_file.write(str(Degree_Centrality[i])+"\t") 167 tmp_file.write(str(Closeness_Centrality[i])+"\t") 168 tmp_file.write(str(Betweenness_Centrality[i])+"\n") 169 170 171 #网络传递性,二分图中应该为0,否则有问题 172 Transitivity = networkx.transitivity(G) 173 #网络直径 174 Diameter = networkx.diameter(G) 175 #网络平均最短路长度 176 Average_shortest_path = networkx.average_shortest_path_length(G) 177 with open(os.path.join(args["output"], "network_attributes.txt"), "w") as tmp_file: 178 tmp_file.write("Transitivity:"+str(Transitivity)+"\n") 179 tmp_file.write("Diameter:"+str(Diameter)+"\n") 180 tmp_file.write("Average_shortest_path_length:"+str(Average_shortest_path)+"\n")
标签:
原文地址:http://www.cnblogs.com/neverforget/p/5784957.html