From dbd32a3042099d52d30028364ffdadcd0f60ede2 Mon Sep 17 00:00:00 2001 From: AndreaGuarracino Date: Thu, 12 Nov 2020 18:50:13 +0100 Subject: managed the assembly_method in the scripts, doc, and the example templates --- doc/blog/using-covid-19-pubseq-part3.org | 1 + example/esr_example.yaml | 1 + example/uthsc_example.yaml | 1 + scripts/create_sra_metadata/create_sra_metadata.py | 2 ++ scripts/download_genbank_data/from_genbank_to_fasta_and_yaml.py | 2 ++ scripts/esr_samples/template.yaml | 1 + scripts/uthsc_samples/template.yaml | 1 + test/data/input/TN_UT2.yaml | 1 + 8 files changed, 10 insertions(+) diff --git a/doc/blog/using-covid-19-pubseq-part3.org b/doc/blog/using-covid-19-pubseq-part3.org index c98bf27..7336cf6 100644 --- a/doc/blog/using-covid-19-pubseq-part3.org +++ b/doc/blog/using-covid-19-pubseq-part3.org @@ -236,6 +236,7 @@ a more elaborate example (note most fields are optional) may look like sample_sequencing_technology: [http://www.ebi.ac.uk/efo/EFO_0009173,http://www.ebi.ac.uk/efo/EFO_0009173] alignment_protocol: Protocol used for assembly sequencing_coverage: [70.0, 100.0] + assembly_method: "http://purl.obolibrary.org/obo/GENEPIO_0001628" additional_technology_information: Optional free text field for additional information submitter: diff --git a/example/esr_example.yaml b/example/esr_example.yaml index b3e97d5..693aff3 100644 --- a/example/esr_example.yaml +++ b/example/esr_example.yaml @@ -25,6 +25,7 @@ virus: technology: sample_sequencing_technology: [http://www.ebi.ac.uk/efo/EFO_0008632] // Nanopore MinION alignment_protocol: https://github.com/ESR-NZ/NZ_SARS-CoV-2_genomics + assembly_method: "http://purl.obolibrary.org/obo/GENEPIO_0001628" additional_technology_information: "Artic V3 workflow" submitter: diff --git a/example/uthsc_example.yaml b/example/uthsc_example.yaml index 3bdbf6f..2384114 100644 --- a/example/uthsc_example.yaml +++ b/example/uthsc_example.yaml @@ -25,6 +25,7 @@ virus: technology: sample_sequencing_technology: [http://www.ebi.ac.uk/efo/EFO_0008632] // Nanopore MinION alignment_protocol: guppy + assembly_method: "http://purl.obolibrary.org/obo/GENEPIO_0001628" additional_technology_information: Optional free text field for additional information submitter: diff --git a/scripts/create_sra_metadata/create_sra_metadata.py b/scripts/create_sra_metadata/create_sra_metadata.py index 2aac0e5..554aea3 100644 --- a/scripts/create_sra_metadata/create_sra_metadata.py +++ b/scripts/create_sra_metadata/create_sra_metadata.py @@ -227,6 +227,8 @@ for i, EXPERIMENT_PACKAGE in enumerate(EXPERIMENT_PACKAGE_SET): taxon_id = SAMPLE.find('SAMPLE_NAME').find('TAXON_ID').text info_for_yaml_dict['virus']['virus_species'] = "http://purl.obolibrary.org/obo/NCBITaxon_"+taxon_id + # This script download and prepare data and metadata for samples that will be mapepd againg a referenceT + info_for_yaml_dict['technology']['assembly_method'] = 'http://purl.obolibrary.org/obo/GENEPIO_0002028' EXPERIMENT = EXPERIMENT_PACKAGE.find('EXPERIMENT') INSTRUMENT_MODEL = [x.text for x in EXPERIMENT.find('PLATFORM').iter('INSTRUMENT_MODEL')][0] diff --git a/scripts/download_genbank_data/from_genbank_to_fasta_and_yaml.py b/scripts/download_genbank_data/from_genbank_to_fasta_and_yaml.py index 87b348b..9a46474 100755 --- a/scripts/download_genbank_data/from_genbank_to_fasta_and_yaml.py +++ b/scripts/download_genbank_data/from_genbank_to_fasta_and_yaml.py @@ -179,6 +179,8 @@ for path_metadata_xxx_xml in [os.path.join(dir_metadata, name_metadata_xxx_xml) else: info_for_yaml_dict['submitter']['additional_submitter_information'] = GBReference_journal.text + # This script download and prepare data and metadata for assemblies samples + info_for_yaml_dict['technology']['assembly_method'] = 'http://purl.obolibrary.org/obo/GENEPIO_0001628' GBSeq_comment = GBSeq.find('GBSeq_comment') if GBSeq_comment is not None and 'Assembly-Data' in GBSeq_comment.text: diff --git a/scripts/esr_samples/template.yaml b/scripts/esr_samples/template.yaml index 3ca1de3..3846122 100644 --- a/scripts/esr_samples/template.yaml +++ b/scripts/esr_samples/template.yaml @@ -15,6 +15,7 @@ virus: technology: sample_sequencing_technology: ["http://www.ebi.ac.uk/efo/EFO_0008632"] alignment_protocol: "https://github.com/ESR-NZ/NZ_SARS-CoV-2_genomics" + assembly_method: "http://purl.obolibrary.org/obo/GENEPIO_0001628" additional_technology_information: "Artic V3 workflow" submitter: diff --git a/scripts/uthsc_samples/template.yaml b/scripts/uthsc_samples/template.yaml index c81bd28..dab8634 100644 --- a/scripts/uthsc_samples/template.yaml +++ b/scripts/uthsc_samples/template.yaml @@ -23,6 +23,7 @@ virus: technology: sample_sequencing_technology: [http://www.ebi.ac.uk/efo/EFO_0008632] alignment_protocol: https://bio.tools/BWA#! + assembly_method: "http://purl.obolibrary.org/obo/GENEPIO_0001628" additional_technology_information: "Oxford Nanopore MiniIon RNA long reads" submitter: diff --git a/test/data/input/TN_UT2.yaml b/test/data/input/TN_UT2.yaml index 236110d..f2f190b 100644 --- a/test/data/input/TN_UT2.yaml +++ b/test/data/input/TN_UT2.yaml @@ -23,6 +23,7 @@ virus: technology: sample_sequencing_technology: [http://www.ebi.ac.uk/efo/EFO_0008632] alignment_protocol: https://bio.tools/BWA#! + assembly_method: "http://purl.obolibrary.org/obo/GENEPIO_0001628" additional_technology_information: Oxford Nanopore MiniIon RNA long reads submitter: -- cgit v1.2.3