| Return Create A Forum - Home | |
| --------------------------------------------------------- | |
| nCoV_info | |
| https://ncovinfo.createaforum.com | |
| --------------------------------------------------------- | |
| ***************************************************** | |
| Return to: nCoV - discussion | |
| ***************************************************** | |
| #Post#: 282-------------------------------------------------- | |
| covidcg.org's big file encoding Gisaid but hiding the names and | |
| not showing the NNNNN | |
| By: babar Date: January 17, 2021, 1:23 am | |
| --------------------------------------------------------- | |
| Download and unzip | |
| https://storage.googleapis.com/ve-public/v1.4/data_package.json.gz | |
| Then with node.js (on my laptop the file is too big for | |
| chrome) | |
| [code] | |
| var t = require('./data_package.json'); | |
| var fr_loc = {}; | |
| var c =t["geo_select_tree"].children[2].children[15].children; | |
| // list of French locations | |
| for (var i=0;i<c.length;i++) { | |
| if (c[i]) { | |
| fr_loc[c[i].location_id]=c[i].label; | |
| for (var j=0;j<c[i].children.length;j++) { | |
| if (c[i].children[j]) | |
| fr_loc[c[i].children[j].location_id]=c[i].label; | |
| } | |
| } | |
| } | |
| var fr_sequences=[]; | |
| for (var i = 0;i < t.case_data.length; i++) { | |
| if (fr_loc[t.case_data[i].location_id]) | |
| fr_sequences.push(t.case_data[i]); | |
| } | |
| var lab_count = {}; | |
| for (var i =0;i < fr_sequences.length; i++) { | |
| var l = | |
| t.metadata_map.submitting_lab[fr_sequences[i].submitting_lab]; | |
| if (!lab_count[l]) lab_count[l]=0; | |
| lab_count[l]++; | |
| } | |
| lab_count; | |
| var SNP= {}; | |
| for (var n in t.metadata_map.dna_snp) { | |
| if (t.metadata_map.dna_snp.hasOwnProperty(n)) { | |
| SNP[t.metadata_map.dna_snp[n]]=n; | |
| } | |
| } | |
| for (var i =0;i < fr_sequences.length; i++) { | |
| var mut = []; | |
| for (var j =0; j<fr_sequences[i].dna_snp_str.length;j++) { | |
| mut.push(SNP[fr_sequences[i].dna_snp_str[j]]); | |
| } | |
| fr_sequences[i].mut = mut; | |
| fr_sequences[i].loc = fr_loc[fr_sequences[i].location_id]; | |
| } | |
| [/code] | |
| output : a table of 2753 sequences, the first one is | |
| [code] | |
| > fr_sequences[0] | |
| { 'Accession ID': '2d58d2cd', | |
| collection_date: '2020-02-26', | |
| submission_date: '2020-03-14', | |
| gender: 0, | |
| age_start: 36, | |
| age_end: 37, | |
| patient_status: 0, | |
| passage: 0, | |
| specimen: 5, | |
| lineage: 'A.2', | |
| clade: 'S', | |
| sequencing_tech: 7, | |
| assembly_method: 23, | |
| comment_type: -1, | |
| authors: 39, | |
| originating_lab: 51, | |
| submitting_lab: 30, | |
| dna_snp_str: [ 28245, 28855, 4217, 14839, 15498, 18708, 19358, | |
| 19677 ], | |
| gene_aa_snp_str: [ 14838, 4890, 18663, 18297, 19910, 605 ], | |
| protein_aa_snp_str: [ 11687, 13956, 13590, 15422, 605, 15820 | |
| ], | |
| location_id: 1315, | |
| mut: | |
| [ '8782|C|T', | |
| '9477|T|A', | |
| '14805|C|T', | |
| '25553|C|T', | |
| '25979|G|T', | |
| '28144|T|C', | |
| '28657|C|T', | |
| '28863|C|T' ] | |
| loc:'Grand-Est'} | |
| [/code] | |
| #Post#: 283-------------------------------------------------- | |
| Re: covidcg.org's big file encoding Gisaid but hiding the names | |
| and not showing the NNNNN | |
| By: gsgs Date: January 17, 2021, 4:49 am | |
| --------------------------------------------------------- | |
| hi babar. | |
| Thanks ! | |
| I could download and unzip that file. | |
| https://storage.googleapis.com/ve-public/v1.5/data_package.json.gz | |
| 231MB , 341354 lines , | |
| https://storage.googleapis.com/ve-public/v1.6/data_package.json.gz | |
| https://storage.googleapis.com/ve-public/v1.7/data_package.json.gz | |
| . | |
| 396 with B.1.351 | |
| shall we use (part of) this forum to exchange sequencing info ? | |
| or make another forum ? | |
| shall I make you admin for re-organisation ? | |
| shall we invite others ? | |
| ***************************************************** |