{"id":2691,"date":"2019-04-04T00:17:14","date_gmt":"2019-04-03T16:17:14","guid":{"rendered":"http:\/\/www.chenlianfu.com\/?p=2691"},"modified":"2019-05-09T17:53:56","modified_gmt":"2019-05-09T09:53:56","slug":"%e5%88%9b%e5%bb%bancbi%e7%9a%84nr%e6%95%b0%e6%8d%ae%e5%ba%93%e7%9a%84%e5%ad%90%e9%9b%86%e6%95%b0%e6%8d%ae%e5%ba%93","status":"publish","type":"post","link":"http:\/\/www.chenlianfu.com\/?p=2691","title":{"rendered":"\u521b\u5efaNCBI\u7684Nr\u6570\u636e\u5e93\u7684\u5b50\u96c6\u6570\u636e\u5e93"},"content":{"rendered":"<h1>1. \u4e3a\u4ec0\u4e48\u8981\u505aNr\u5b50\u96c6\u6570\u636e\u5e93<\/h1>\n<p>NCBI\u5b98\u7f51\u4ec5\u63d0\u4f9bNr\u5168\u6570\u636e\u5e93\u3002\u8be5\u6570\u636e\u5e93\u592a\u5927\uff0c\u5c06\u7269\u79cd\u7684\u86cb\u767d\u5e8f\u5217\u4f7f\u7528Blastp\u6bd4\u5bf9\u5230Nr\u6570\u636e\u5e93\u975e\u5e38\u6d88\u8017\u8ba1\u7b97\u548c\u65f6\u95f4\u3002\u5bf91\u4e2a\u86cb\u767d\u5e8f\u5217\u53ef\u80fd\u9700\u89811\u4e2aCPU\u8ba1\u7b97\u534a\u4e2a\u5c0f\u65f6\u3002\u82e5\u5bf9\u5168\u57fa\u56e0\u7ec42\u4e07\u4e2a\u57fa\u56e0\u5206\u6790\uff0c\u666e\u901a\u53f0\u5f0f\u673a8\u4e2aCPU\u8981\u8ba1\u7b972000\/(2*8*24)=52\u5929\u3002\u8fd9\u4e3b\u8981\u662f\u7531\u4e8eNr\u6570\u636e\u5e93\u592a\u5927\u5bfc\u81f4\u7684\u3002\u4e3a\u4e86\u80fd\u5c3d\u5feb\u5f97\u5230Nr\u6ce8\u91ca\u7ed3\u679c\uff0c\u53ef\u4ee5\u6309\u7269\u79cd\u5206\u7c7b\u5c06Nr\u6570\u636e\u5e93\u5206\u5272\u6210\u5b50\u96c6\u6570\u636e\u5e93\uff0c\u80fd\u5f97\u5230\u66f4\u5feb\u7684\u6bd4\u5bf9\u901f\u5ea6\u3002\u4ee5\u4e0b\u662f\u521b\u5efaNr\u5b50\u96c6\u6570\u636e\u5e93\u7684\u6b65\u9aa4\u3002<\/p>\n<h1>2. \u521b\u5efaNr\u5b50\u96c6\u6570\u636e\u5e93\u7684\u6b65\u9aa4<\/h1>\n<p>\u4eceNCBI\u4e0b\u8f7dNr\u6570\u636e\u5e93\u548c\u5206\u7c7b\u6570\u636e\u5e93\u6587\u4ef6<\/p>\n<pre>cd \/opt\/biosoft\/Nr_database\n# \u4e0b\u8f7dNr\u6570\u636e\u5e93\uff08FASTA\u6587\u4ef6\uff09\nascp -T -l 200M -i ~\/.aspera\/connect\/etc\/asperaweb_id_dsa.openssh --host=ftp.ncbi.nih.gov --user=anonftp --mode=recv \/blast\/db\/FASTA\/nr.gz .\/\n\n# \u4e0b\u8f7dNCBI\u7684\u5206\u7c7b\u6570\u636e\u5e93\u6587\u4ef6\nascp -T -l 200M -i ~\/.aspera\/connect\/etc\/asperaweb_id_dsa.openssh --host=ftp.ncbi.nih.gov --user=anonftp --mode=recv \/pub\/taxonomy\/taxdump.tar.gz .\/\nascp -T -l 200M -i ~\/.aspera\/connect\/etc\/asperaweb_id_dsa.openssh --host=ftp.ncbi.nih.gov --user=anonftp --mode=recv \/pub\/taxonomy\/accession2taxid\/prot.accession2taxid.gz .\/\n\n# \u89e3\u538b\u7f29\u4e24\u4e2aNCBI\u7684\u5206\u7c7b\u6570\u636e\u5e93\u6587\u4ef6\ngzip -dc prot.accession2taxid.gz &gt; prot.accession2taxid\nmkdir ~\/.taxonkit\ntar zxf taxdump.tar.gz -C ~\/.taxonkit\n# \u5176\u4e3b\u8981\u6709\u6548\u6587\u4ef6\u6709\u4e24\u4e2a\uff1a\n# names.dmp \u8bb0\u5f55\u7269\u79cd\u540d\u53ca\u5176\u5206\u7c7b\u7f16\u53f7\n# nodes.dmp \u8bb0\u5f55\u5206\u7c7b\u7f16\u53f7\u7684\u8282\u70b9\u4fe1\u606f\n# \u67e5\u770b~\/.taxonkit\/names.dmp\u6587\u4ef6\uff0c\u4f7f\u7528\u5173\u952e\u8bcd\u68c0\u7d22\u5f97\u5230\u76ee\u6807\u7c7b\u7684\u5206\u7c7b\u7f16\u53f7\uff0c\u4f8b\u5982\uff1a\n# fungi 4751             # grep -P \"\\|\\s+[fF]ungi\\w*\\s*\\|\" ~\/.taxonkit\/names.dmp\n# plants 3193            # grep -P \"\\|\\s+[pP]lant\\w*\\s*\\|\" ~\/.taxonkit\/names.dmp\n# animals 33208          # grep -P \"\\|\\s+[aA]nimal\\w*\\s*\\|\" ~\/.taxonkit\/names.dmp\n<\/pre>\n<p>\u4e0b\u8f7d\u5e76\u5b89\u88c5NCBI\u5206\u7c7b\u6570\u636e\u5e93\u89e3\u6790\u8f6f\u4ef6TaxonKitTaxonKit\uff0c\u5e76\u89e3\u6790nodes.dmp\u6587\u4ef6\u7684\u7269\u79cd\u8282\u70b9\u4fe1\u606f\uff0c\u5f97\u5230\u6307\u5b9a\u7c7b\u7684\u6240\u6709\u7269\u79cd\u5217\u8868\u4fe1\u606f\u3002<\/p>\n<pre># \u4e0b\u8f7d\u5e76\u5b89\u88c5NCBI\u5206\u7c7b\u6570\u636e\u5e93\u89e3\u6790\u8f6f\u4ef6TaxonKit\nwget https:\/\/github.com\/shenwei356\/taxonkit\/releases\/download\/v0.2.4\/taxonkit_linux_amd64.tar.gz\ntar zxvf taxonkit_linux_amd64.tar.gz\n\n# \u63d0\u53d6\u53e4\u83cc(2157)\u3001\u7ec6\u83cc(2)\u548c\u75c5\u6bd2(10239)\u8fd9\u51e0\u4e2a\u5927\u7c7b\u4e0b\u7684\u6240\u6709\u7269\u79cd\u7f16\u53f7\u3002\n.\/taxonkit list -j 8 --ids 2,2157,10239 &gt; sub.meta.list\n\n# \u518d\u7f16\u5199\u7a0b\u5e8fextract_sub_data_from_Nr.pl\u83b7\u5f97\u5217\u8868\u4e2d\u7269\u79cd\u5728Nr\u6570\u636e\u5e93\u4e2d\u7684\u5e8f\u5217\u4fe1\u606f\u3002\ngzip -dc nr.gz | perl extract_sub_data_from_Nr.pl --sub_taxon sub.meta.list --acc2taxid prot.accession2taxid - &gt; nr_meta.fasta\n<\/pre>\n<p>\u63d0\u53d6fungi\/plants\/animals\u5b50\u96c6<\/p>\n<pre>.\/taxonkit list -j 8 --ids 4751 &gt; sub.fungi.list\n.\/taxonkit list -j 8 --ids 3193 &gt; sub.plants.list\n.\/taxonkit list -j 8 --ids 33208 &gt; sub.animals.list\n.\/taxonkit list -j 8 --ids 10239 &gt; sub.virus.list\ngzip -dc nr.gz | perl extract_sub_data_from_Nr.pl --sub_taxon sub.fungi.list --acc2taxid prot.accession2taxid - &gt; nr_fungi.fasta\ngzip -dc nr.gz | perl extract_sub_data_from_Nr.pl --sub_taxon sub.plants.list --acc2taxid prot.accession2taxid - &gt; nr_plants.fasta\ngzip -dc nr.gz | perl extract_sub_data_from_Nr.pl --sub_taxon sub.animals.list --acc2taxid prot.accession2taxid - &gt; nr_animals.fasta\ngzip -dc nr.gz | perl extract_sub_data_from_Nr.pl --sub_taxon sub.virus.list --acc2taxid prot.accession2taxid - &gt; nr_virus.fasta\n<\/pre>\n<p>\u4f7f\u7528makeblastdb\u521b\u5efablast\u672c\u5730\u6570\u636e\u5e93<\/p>\n<pre>makeblastdb -in nr_fungi.fasta -dbtype prot -title nr_fungi -parse_seqids -out nr_fungi_`date +%Y%m%d` -logfile nr_fungi_`date +%Y%m%d`.log\nmakeblastdb -in nr_plants.fasta -dbtype prot -title nr_plants -parse_seqids -out nr_plants_`date +%Y%m%d` -logfile nr_plants_`date +%Y%m%d`.log\nmakeblastdb -in nr_animals.fasta -dbtype prot -title nr_animals -parse_seqids -out nr_animals_`date +%Y%m%d` -logfile nr_animals_`date +%Y%m%d`.log\nmakeblastdb -in nr_virus.fasta -dbtype prot -title nr_virus -parse_seqids -out nr_virus_`date +%Y%m%d` -logfile nr_virus_`date +%Y%m%d`.log\n<\/pre>\n","protected":false},"excerpt":{"rendered":"<p>1. \u4e3a\u4ec0\u4e48\u8981\u505aNr\u5b50\u96c6\u6570\u636e\u5e93 NCBI\u5b98\u7f51\u4ec5\u63d0\u4f9bNr\u5168\u6570\u636e\u5e93\u3002\u8be5\u6570\u636e\u5e93\u592a\u5927\uff0c\u5c06 &hellip; <a href=\"http:\/\/www.chenlianfu.com\/?p=2691\">\u7ee7\u7eed\u9605\u8bfb <span class=\"meta-nav\">&rarr;<\/span><\/a><\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":[],"categories":[1],"tags":[],"_links":{"self":[{"href":"http:\/\/www.chenlianfu.com\/index.php?rest_route=\/wp\/v2\/posts\/2691"}],"collection":[{"href":"http:\/\/www.chenlianfu.com\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/www.chenlianfu.com\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/www.chenlianfu.com\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"http:\/\/www.chenlianfu.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=2691"}],"version-history":[{"count":9,"href":"http:\/\/www.chenlianfu.com\/index.php?rest_route=\/wp\/v2\/posts\/2691\/revisions"}],"predecessor-version":[{"id":2897,"href":"http:\/\/www.chenlianfu.com\/index.php?rest_route=\/wp\/v2\/posts\/2691\/revisions\/2897"}],"wp:attachment":[{"href":"http:\/\/www.chenlianfu.com\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=2691"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/www.chenlianfu.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=2691"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/www.chenlianfu.com\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=2691"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}