From dbd32a3042099d52d30028364ffdadcd0f60ede2 Mon Sep 17 00:00:00 2001 From: AndreaGuarracino Date: Thu, 12 Nov 2020 18:50:13 +0100 Subject: managed the assembly_method in the scripts, doc, and the example templates --- scripts/create_sra_metadata/create_sra_metadata.py | 2 ++ scripts/download_genbank_data/from_genbank_to_fasta_and_yaml.py | 2 ++ scripts/esr_samples/template.yaml | 1 + scripts/uthsc_samples/template.yaml | 1 + 4 files changed, 6 insertions(+) (limited to 'scripts') diff --git a/scripts/create_sra_metadata/create_sra_metadata.py b/scripts/create_sra_metadata/create_sra_metadata.py index 2aac0e5..554aea3 100644 --- a/scripts/create_sra_metadata/create_sra_metadata.py +++ b/scripts/create_sra_metadata/create_sra_metadata.py @@ -227,6 +227,8 @@ for i, EXPERIMENT_PACKAGE in enumerate(EXPERIMENT_PACKAGE_SET): taxon_id = SAMPLE.find('SAMPLE_NAME').find('TAXON_ID').text info_for_yaml_dict['virus']['virus_species'] = "http://purl.obolibrary.org/obo/NCBITaxon_"+taxon_id + # This script download and prepare data and metadata for samples that will be mapepd againg a referenceT + info_for_yaml_dict['technology']['assembly_method'] = 'http://purl.obolibrary.org/obo/GENEPIO_0002028' EXPERIMENT = EXPERIMENT_PACKAGE.find('EXPERIMENT') INSTRUMENT_MODEL = [x.text for x in EXPERIMENT.find('PLATFORM').iter('INSTRUMENT_MODEL')][0] diff --git a/scripts/download_genbank_data/from_genbank_to_fasta_and_yaml.py b/scripts/download_genbank_data/from_genbank_to_fasta_and_yaml.py index 87b348b..9a46474 100755 --- a/scripts/download_genbank_data/from_genbank_to_fasta_and_yaml.py +++ b/scripts/download_genbank_data/from_genbank_to_fasta_and_yaml.py @@ -179,6 +179,8 @@ for path_metadata_xxx_xml in [os.path.join(dir_metadata, name_metadata_xxx_xml) else: info_for_yaml_dict['submitter']['additional_submitter_information'] = GBReference_journal.text + # This script download and prepare data and metadata for assemblies samples + info_for_yaml_dict['technology']['assembly_method'] = 'http://purl.obolibrary.org/obo/GENEPIO_0001628' GBSeq_comment = GBSeq.find('GBSeq_comment') if GBSeq_comment is not None and 'Assembly-Data' in GBSeq_comment.text: diff --git a/scripts/esr_samples/template.yaml b/scripts/esr_samples/template.yaml index 3ca1de3..3846122 100644 --- a/scripts/esr_samples/template.yaml +++ b/scripts/esr_samples/template.yaml @@ -15,6 +15,7 @@ virus: technology: sample_sequencing_technology: ["http://www.ebi.ac.uk/efo/EFO_0008632"] alignment_protocol: "https://github.com/ESR-NZ/NZ_SARS-CoV-2_genomics" + assembly_method: "http://purl.obolibrary.org/obo/GENEPIO_0001628" additional_technology_information: "Artic V3 workflow" submitter: diff --git a/scripts/uthsc_samples/template.yaml b/scripts/uthsc_samples/template.yaml index c81bd28..dab8634 100644 --- a/scripts/uthsc_samples/template.yaml +++ b/scripts/uthsc_samples/template.yaml @@ -23,6 +23,7 @@ virus: technology: sample_sequencing_technology: [http://www.ebi.ac.uk/efo/EFO_0008632] alignment_protocol: https://bio.tools/BWA#! + assembly_method: "http://purl.obolibrary.org/obo/GENEPIO_0001628" additional_technology_information: "Oxford Nanopore MiniIon RNA long reads" submitter: -- cgit v1.2.3