码迷,mamicode.com
首页 > Web开发 > 详细

OTU_Network&calc_otu

时间:2016-08-18 18:27:17      阅读:210      评论:0      收藏:0      [点我收藏+]

标签:

技术分享
  1 # -*- coding: utf-8 -*-
  2 # __author__ = ‘JieYap‘
  3 from biocluster.agent import Agent
  4 from biocluster.tool import Tool
  5 import os
  6 import types
  7 import subprocess
  8 from biocluster.core.exceptions import OptionError
  9 
 10 
 11 class OtunetworkAgent(Agent):
 12     """
 13     需要calc_otu_network.py
 14     version 1.0
 15     author: JieYao
 16     last_modified:2016.8.1
 17     """
 18     
 19     def __init__(self, parent):
 20         super(OtunetworkAgent, self).__init__(parent)
 21         options = [
 22             {"name": "otutable", "type": "infile", "format": "meta.otu.otu_table, meta.otu.tax_summary_dir"},
 23             {"name": "level", "type": "string", "default": "otu"},
 24             {"name": "envtable", "type": "infile", "format": "meta.otu.group_table"},
 25             {"name": "envlabs", "type": "string", "default": ""}
 26             ]
 27         self.add_option(options)
 28         self.step.add_steps(OtunetworkAnalysis)
 29         self.on(start, self.step_start)
 30         self.on(end, self.step_end)
 31 
 32     def step_start(self):
 33         self.step.OtunetworkAnalysis.start()
 34         self.step.update()
 35         
 36     def step_end(self):
 37         self.step.OtunetworkAnalysis.finish()
 38         self.step.update()
 39         
 40     def gettable(self):
 41         """
 42         根据输入的otu表和分类水平计算新的otu表
 43         :return:
 44         """
 45         if self.option(otutable).format == "meta.otu.tax_summary_dir":
 46             return self.option(otutable).get_table(self.option(level))
 47         else:
 48             return self.option(otutable).prop[path]
 49         
 50     def check_options(self):
 51         """
 52         重写参数检查
 53         """
 54         if not self.option(otutable).is_set:
 55             raise OptionError(必须提供otu表)
 56         self.option(otutable).get_info()
 57         if self.option(otutable).prop[sample_num] < 2:
 58             raise OptionError(otu表的样本数目少于2,不可进行网络分析)
 59         if self.option(envtable).is_set:
 60             self.option(envtable).get_info()
 61             if self.option(envlabs):
 62                 labs = self.option(envlabs).split(,)
 63                 for lab in labs:
 64                     if lab not in self.option(envtable).prop[group_scheme]:
 65                         raise OptionError(envlabs中有不在物种(环境因子)表中存在的因子:%s % lab)
 66             else:
 67                 pass
 68             if len(self.option(envtable).prop[sample]) < 2:
 69                 raise OptionError(物种(环境因子)表的样本数目少于2,不可进行网络分析)
 70         samplelist = open(self.gettable()).readline().strip().split(\t)[1:]
 71         if self.option(envtable).is_set:
 72             self.option(envtable).get_info()
 73             if len(self.option(envtable).prop[sample]) > len(samplelist):
 74                 raise OptionError(OTU表中的样本数量:%s少于物种(环境因子)表中的样本数量:%s % (len(samplelist),
 75                                   len(self.option(envtable).prop[sample])))
 76             for sample in self.option(envtable).prop[sample]:
 77                 if sample not in samplelist:
 78                     raise OptionError(物种(环境因子)表的样本中存在OTU表中未知的样本%s % sample)
 79         table = open(self.gettable())
 80         if len(table.readlines()) < 4 :
 81             raise OptionError(数据表信息少于3行)
 82         table.close()
 83         return True
 84 
 85     def set_resource(self):
 86         """
 87         设置所需资源
 88         """
 89         self._cpu = 2
 90         self._memory = ‘‘
 91         
 92     def end(self):
 93         result_dir = self.add_upload_dir(self.output_dir)
 94         result_dir.add_relpath_rules([
 95                 [".", "", "OTU网络分析结果输出目录"],
 96                 ["./real_node_table.txt", "txt", "OTU网络节点属性表"],
 97                 ["./real_edge_table.txt", "txt", "OTU网络边集属性表"],
 98                 ["./real_dc_otu_degree.txt", "txt", "OTU网络OTU节点度分布表"],
 99                 ["./real_dc_sample_degree.txt", "txt", "OTU网络sample节点度分布表"],
100                 ["./real_dc_sample_otu_degree.txt", "txt", "OTU网络节点度分布表"],
101                 ["./network_centrality.txt", "txt", "OTU网络中心系数表"],
102                 ["./network_attributes.txt", "txt", "OTU网络单值属性表"],
103                 ])
104         print self.get_upload_files()
105         super(OtunetworkAgent, self).end()
106 
107 
108 class OtunetworkTool(Tool):
109     def __init__(self, config):
110         super(OtunetworkTool, self).__init__(config)
111         self._version = "1.0.1"
112         self.cmd_path = self.config.SOFTWARE_DIR + /bioinfo/meta/scripts/calc_otu_network.py
113         self.env_table = self.get_new_env()
114         self.otu_table = self.get_otu_table()
115         self.out_files = [real_node_table.txt, real_edge_table.txt, real_dc_otu_degree.txt, real_dc_sample_degree.txt, real_dc_sample_otu_degree.txt, network_centrality.txt, network_attributes.txt]
116         
117         
118     def get_otu_table(self):
119         """
120         根据调用的level参数重构otu表
121         :return:
122         """
123         if self.option(otutable).format == "meta.otu.tax_summary_dir":
124             otu_path = self.option(otutable).get_table(self.option(level))
125         else:
126             otu_path = self.option(otutable).prop[path]
127         if self.option(envtable).is_set:
128             return self.filter_otu_sample(otu_path, self.option(envtable).prop[sample],
129                                           os.path.join(self.work_dir, temp_filter.otutable))
130         else:
131             return otu_path
132     
133     def filter_otu_sample(self, otu_path, filter_samples, newfile):
134         if not isinstance(filter_samples, types.ListType):
135             raise Exception(过滤otu表样本的样本名称应为列表)
136         try:
137             with open(otu_path, rb) as f, open(newfile, wb) as w:
138                 one_line = f.readline()
139                 all_samples = one_line.rstrip().split(\t)[1:]
140                 if not ((set(all_samples) & set(filter_samples)) == set(filter_samples)):
141                     raise Exception(提供的过滤样本集合中存在otu表中不存在的样本all:%s,filter_samples:%s % (all_samples, filter_samples))
142                 if len(all_samples) == len(filter_samples):
143                     return otu_path
144                 samples_index = [all_samples.index(i) + 1 for i in filter_samples]
145                 w.write(OTU\t + \t.join(filter_samples) + \n)
146                 for line in f:
147                     all_values = line.rstrip().split(\t)
148                     new_values = [all_values[0]] + [all_values[i] for i in samples_index]
149                     w.write(\t.join(new_values) + \n)
150                 return newfile
151         except IOError:
152             raise Exception(无法打开OTU相关文件或者文件不存在)
153 
154     def get_new_env(self):
155         """
156         根据envlabs生成新的envtable
157         """
158         if self.option(envlabs):
159             new_path = self.work_dir + /temp_env_table.xls
160             self.option(envtable).sub_group(new_path, self.option(envlabs).split(,))
161             return new_path
162         else:
163             return self.option(envtable).path
164 
165     def run(self):
166         """
167         运行
168         """
169         super(OtunetworkTool, self).run()
170         self.run_otu_network_py()
171 
172     def formattable(self, tablepath):
173         alllines = open(tablepath).readlines()
174         if alllines[0][0] == #:
175             newtable = open(os.path.join(self.work_dir, temp_format.table), w)
176             newtable.write(alllines[0].lstrip(#))
177             newtable.writelines(alllines[1:])
178             newtable.close()
179             return os.path.join(self.work_dir, temp_format.table)
180         else:
181             return tablepath
182 
183     def run_otu_network_py(self):
184         """
185         运行calc_otu_network.py
186         """
187         real_otu_path = self.formattable(self.otu_table)
188         cmd = self.config.SOFTWARE_DIR + /program/Python/bin/python 
189         cmd += self.cmd_path
190         cmd +=  -i %s -o %s % (real_otu_path, self.work_dir + /otu_network)
191         if self.option(envtable).is_set:
192             cmd +=  -m %s % (self.env_table)
193         self.logger.info(开始运行calc_otu_network生成OTU网络并进行计算)
194         
195         try:
196             subprocess.check_output(cmd, shell=True)
197             self.logger.info(OTU_Network计算完成)
198         except subprocess.CalledProcessError:
199             self.logger.info(OTU_Network计算失败)
200             self.set_error(运行calc_otu_network.py失败)
201         allfiles = self.get_filesname()
202         for i in range(len(self.out_files)):
203             self.linkfile(allfiles[i], self.out_files[i])
204         self.end()
205 
206     def linkfile(self, oldfile, newname):
207         """
208         link文件到output文件夹
209         :param oldfile: 资源文件路径
210         :param newname: 新的文件名
211         :return:
212         """
213         newpath = os.path.join(self.output_dir, newname)
214         if os.path.exists(newpath):
215             os.remove(newpath)
216         os.link(oldfile, newpath)
217 
218     def get_filesname(self):
219         files_status = [None, None, None, None, None, None, None]
220         for paths,d,filelist in os.walk(self.work_dir + /otu_network):
221             for filename in filelist:
222                 name = os.path.join(paths, filename)
223                 for i in range(len(self.out_files)):
224                     if self.out_files[i] in name:
225                         files_status[i] = name
226         for i in range(len(self.out_files)):
227             if not files_status[i]:
228                 self.set_error(未知原因,结果文件生成出错或丢失)
229         return files_status
View Code
技术分享
  1 # -*- coding: utf-8 -*-
  2 # __author__ = ‘JieYao‘
  3 import os
  4 import argparse
  5 from biocluster.config import Config
  6 import shutil
  7 import networkx
  8 
  9 def make_env_table(inFile, outFile):
 10     with open(inFile, "r") as tmp_file:
 11         samples_name = tmp_file.readline().rstrip().split(\t)
 12     with open(group.txt , "w") as tmp_file:
 13         tmp_file.write("#sample\tgroup\n")
 14         for i in range(1,len(samples_name)):
 15             tmp_file.write(samples_name[i]+"\tSTD\n") 
 16     return ./group.txt
 17 
 18 parser = argparse.ArgumentParser(description=输入OTU表格,生成OTU网络信息)
 19 parser.add_argument(-i, "--otu_matrix", help="输入的OTU表", required = True)
 20 parser.add_argument(-o, "--output", help="输出文件位置", required = True)
 21 parser.add_argument(-m, "--env_table", help="样本分类表", required = False)
 22 args = vars(parser.parse_args())
 23 
 24 flag = False
 25 inFile = args["otu_matrix"]
 26 outFile = args["output"]
 27 if not args["env_table"]:
 28     env_table = make_env_table(inFile, outFile)
 29     flag = True
 30 else:
 31     env_table = args["env_table"]
 32 if os.path.exists(outFile):
 33     shutil.rmtree(outFile)
 34     
 35 """
 36 执行make_otu_network.py 计算otu网络的相关信息并生成文件
 37 完成后由于make_otu_network.py生成的是一个文件夹,使用os和shutil的命令将文件全部移动到输出路径下
 38 """
 39 command = Config().SOFTWARE_DIR + /program/Python/bin/python 
 40 command += Config().SOFTWARE_DIR + /program/Python/bin/make_otu_network.py
 41 command +=  -i %s -o %s -m %s %(inFile, outFile, env_table)
 42 os.system(command)
 43 if flag:
 44     os.remove("./group.txt")
 45 for paths,d,filelist in os.walk(outFile):
 46     for filename in filelist:
 47         name = os.path.join(paths, filename)
 48         if "reduced" in name:
 49             os.remove(name)
 50         elif "/otu_network/" in name:
 51             shutil.move(name, outFile)
 52 shutil.rmtree(outFile + /otu_network)
 53 for paths,d,filelist in os.walk(outFile):
 54     for filename in filelist:
 55         name = os.path.join(paths, filename)
 56         if "props" in name:
 57             os.remove(name)
 58 
 59 """
 60 根据node表建立{节点名字---节点编号}的字典
 61 """
 62 node_name = [""]
 63 node_dictionary = {}
 64 with open(outFile + /real_node_table.txt, "r") as node_file:
 65     informations = node_file.readlines()
 66     for i in range(1, len(informations)):
 67         tmp = informations[i].rstrip().split("\t")
 68         node_dictionary[tmp[0]] = i
 69         node_name += [tmp[0]]
 70 """
 71 开始使用Networkx包建立图
 72 计算OTU网络的属性信息
 73 """
 74 G = networkx.Graph()
 75 with open(outFile + "/real_edge_table.txt" , "r") as edge_file:
 76     informations = edge_file.readlines()
 77     for i in range(1, len(informations)):
 78         tmp = informations[i].rstrip().split("\t")
 79         G.add_edge(node_dictionary[tmp[0]], node_dictionary[tmp[1]], weight = eval(tmp[2]))
 80 
 81 
 82 """
 83 用实践测试单独对Sample或者是OTU构图的构图方法,
 84 结果证明这样的构图出来的结果基本上Sample是完全图,
 85 OTU单独构图的意义则不大,所以这种想法……失败了。
 86 """
 87 """
 88 H = networkx.Graph()
 89 with open(outFile + "/real_node_table.txt" , "r") as node_file:
 90     informations = node_file.readlines()
 91     for i in range(1,len(informations)):
 92         tmp = informations[i].rstrip().split("\t")
 93         if tmp[2] == "otu_node":
 94             break
 95 position = i
 96 for i in range(position):
 97     for j in range(position):
 98         H.add_edge(i,j,weight=0)
 99         for k in range(position,len(G)):
100             if G.get_edge_data(i,k) and G.get_edge_data(j,k):
101                 H.edge[i][j][‘weight‘] += min(G.edge[i][k][‘weight‘]+G.edge[j][k][‘weight‘])
102         if H.edge[i][j][‘weight‘] == 0:
103             H.remove_edge(i,j)
104 
105 minx = 32767
106 for i in range(position):
107     for j in range(position):
108         if (i in H)and(j in H)and(H.get_edge_data(i,j)):
109             minx = min(minx, H.edge[i][j][‘weight‘])
110 
111 for i in range(position):
112     for j in range(position):
113         if (i in H)and(j in H)and(H.get_edge_data(i,j)):
114             H.edge[i][j][‘weight‘] -= minx
115             if H.edge[i][j][‘weight‘] <=0:
116                 H.remove_edge(i,j)
117 print H.edges()
118 
119 H = networkx.Graph()
120 with open(outFile + "/real_node_table.txt" , "r") as node_file:
121     informations = node_file.readlines()
122     for i in range(1,len(informations)):
123         tmp = informations[i].rstrip().split("\t")
124         if tmp[2] == "otu_node":
125             break
126 position = i
127 for i in range(position,len(G)):
128     for j in range(position,len(G)):
129         H.add_edge(i,j,weight=0)
130         for k in range(position):
131             if G.get_edge_data(i,k) and G.get_edge_data(j,k):
132                 H.edge[i][j][‘weight‘] += 1
133         if H.edge[i][j][‘weight‘] == 0:
134             H.remove_edge(i,j)
135 print len(H)
136 print len(H.edges())
137 print H.edges()
138 
139 minx = 32767
140 for i in range(position,len(G)):
141     for j in range(position,len(G)):
142         if (i in H)and(j in H)and(H.get_edge_data(i,j)):
143             minx = min(minx, H.edge[i][j][‘weight‘])
144 
145 for i in range(position):
146     for j in range(position):
147         if (i in H)and(j in H)and(H.get_edge_data(i,j)):
148             H.edge[i][j][‘weight‘] -= minx
149             if H.edge[i][j][‘weight‘] <=0:
150                 H.remove_edge(i,j)
151 """
152 
153 """3计算属性表,分本3"""
154 
155 #节点度中心系数,表示节点在图中的重要性
156 Degree_Centrality = networkx.degree_centrality(G)
157 #节点距离中心系数,值越大表示到其他节点距离越近,中心性越高
158 Closeness_Centrality = networkx.closeness_centrality(G)
159 #节点介数中心系数,值越大表示通过该节点的最短路径越多,中心性越高
160 Betweenness_Centrality = networkx.betweenness_centrality(G)
161 with open(os.path.join(args["output"], "network_centrality.txt"), "w") as tmp_file:
162     tmp_file.write("Node_ID\tNode_Name\tDegree_Centrality\t")
163     tmp_file.write("Closeness_Centrality\tBetweenness_Centrality\n")
164     for i in range(1, len(G)+1):
165         tmp_file.write(str(i)+"\t"+node_name[i]+"\t")
166         tmp_file.write(str(Degree_Centrality[i])+"\t")
167         tmp_file.write(str(Closeness_Centrality[i])+"\t")
168         tmp_file.write(str(Betweenness_Centrality[i])+"\n")
169 
170 
171 #网络传递性,二分图中应该为0,否则有问题
172 Transitivity = networkx.transitivity(G)
173 #网络直径
174 Diameter = networkx.diameter(G)
175 #网络平均最短路长度
176 Average_shortest_path = networkx.average_shortest_path_length(G)
177 with open(os.path.join(args["output"], "network_attributes.txt"), "w") as tmp_file:
178     tmp_file.write("Transitivity:"+str(Transitivity)+"\n")
179     tmp_file.write("Diameter:"+str(Diameter)+"\n")
180     tmp_file.write("Average_shortest_path_length:"+str(Average_shortest_path)+"\n")
View Code

 

OTU_Network&calc_otu

标签:

原文地址:http://www.cnblogs.com/neverforget/p/5784957.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!