aboutsummaryrefslogtreecommitdiff
path: root/bh20sequploader/bh20seq-schema.yml
blob: 0520e36776402b762cb6c062f320e05a9691855f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
$base: http://biohackathon.org/bh20-seq-schema
$namespaces:
  sch: https://schema.org/
  efo: http://www.ebi.ac.uk/efo/
  obo: http://purl.obolibrary.org/obo/
  sio: http://semanticscience.org/resource/
  edam: http://edamontology.org/
  evs: http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#

$graph:

- name: hostSchema
  type: record
  fields:
    host_species:
        doc: Host species as defined in NCBITaxon (e.g. http://purl.obolibrary.org/obo/NCBITaxon_9606 for Homo sapiens)
        type: string
        jsonldPredicate:
          _id: http://www.ebi.ac.uk/efo/EFO_0000532
          _type: "@id"
    host_id:
        doc: Identifer for the host. If you submit multiple samples from the same host, use the same host_id for those samples
        type: string
        jsonldPredicate:
          _id: http://semanticscience.org/resource/SIO_000115
# Removed_common_name. Not necessary since we turned species into IRI
#    host_common_name:
#        doc: Text label for the host species (e.g. homo sapiens)
#        type: string?
#        jsonldPredicate:
#          _id: http://purl.obolibrary.org/obo/NOMEN_0000037
    host_sex:
        doc: Sex of the host as defined in NCIT, IRI expected (http://purl.obolibrary.org/obo/NCIT_C20197 (Male), http://purl.obolibrary.org/obo/NCIT_C27993 (Female), http://purl.obolibrary.org/obo/NCIT_C45908 (Intersex), or http://purl.obolibrary.org/obo/NCIT_C17998 (Unknown))
        type: string
        jsonldPredicate:
          _id: http://purl.obolibrary.org/obo/PATO_0000047
          _type: "@id"
    host_age:
        doc: Age of the host as number (e.g. 50)
        type: int?
        jsonldPredicate:
          _id: http://purl.obolibrary.org/obo/PATO_0000011
    host_age_unit:
        doc: Unit of host age e.g. http://purl.obolibrary.org/obo/UO_0000036
        type: string?
        jsonldPredicate:
          _id: http://purl.obolibrary.org/obo/NCIT_C42574
          _type: "@id"
    host_health_status:
        doc: A condition or state at a particular time
        type: string?
        jsonldPredicate: http://purl.obolibrary.org/obo/NCIT_C25688
    host_treatment:
      doc: Process in which the act is intended to modify or alter
      type: string?
      jsonldPredicate:
          _id: http://www.ebi.ac.uk/efo/EFO_0000727
    host_vaccination:
      doc: List of vaccines given to the host
      type: string?
      jsonldPredicate:
          _id: http://purl.obolibrary.org/obo/VO_0000002
    additional_host_information:
      doc: Field for additional host information
      type: string?
      jsonldPredicate:
          _id: http://semanticscience.org/resource/SIO_001167

- name: sampleSchema
  type: record
  fields:
    collector_name:
      doc: Name of the person that took the sample
      type: string
      jsonldPredicate:
          _id: http://purl.obolibrary.org/obo/OBI_0001895
    collecting_institution:
      doc: Institute that was responsible of sampeling
      type: string
      jsonldPredicate:
          _id: http://semanticscience.org/resource/SIO_001167
    specimen_source:
      doc: A specimen that derives from an anatomical part or substance arising from an organism, e.g.  tissue, organ
      type: string?
      jsonldPredicate:
          _id: http://purl.obolibrary.org/obo/OBI_0001479
    collection_date:
      doc: Date when the sample was taken
      type: string?
      jsonldPredicate:
          _id: http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#C25164
    collection_location:
      doc: Geographical location where the sample was collected as Gazetteer (https://www.ebi.ac.uk/ols/ontologies/gaz) reference, e.g.  http://purl.obolibrary.org/obo/GAZ_00002845 (China)
      type: string?
      jsonldPredicate:
        _id: http://purl.obolibrary.org/obo/GAZ_00000448
        _type: "@id"
    sample_storage_conditions:
      doc: Information aboout storage of a specified type, e.g.  frozen specimen, paraffin, fresh ....
      type: string?
      jsonldPredicate:
          _id: http://purl.obolibrary.org/obo/OBI_0001472
    additional_collection_information:
      doc: Add additional comment about the circumstances that a sample was taken
      type: string?
      jsonldPredicate:
          _id: http://semanticscience.org/resource/SIO_001167
    sample_id:
      doc: Id of the sample as defined by the submitter
      type: string
      jsonldPredicate:
          _id: http://semanticscience.org/resource/SIO_000115
    source_database_accession:
      doc: If data is deposit at a public resource (e.g. Genbank, ENA) enter the Accession Id here
      type: string?
      jsonldPredicate:
          _id: http://edamontology.org/data_2091

- name: virusSchema
  type: record
  fields:
    virus_species:
      doc: The name of a taxon from the NCBI taxonomy database
      type: string
      jsonldPredicate:
          _id: http://edamontology.org/data_1875
          _type: "@id"
    virus_strain:
      doc: Name of the virus strain
      type: string?
      jsonldPredicate:
          _id: http://semanticscience.org/resource/SIO_010055

- name: technologySchema
  type: record
  fields:
    sample_sequencing_technology:
      doc: Technology that was used to sequence this sample (e.g Sanger, Nanopor MiniION)
      type: string
      jsonldPredicate:
        _id: http://purl.obolibrary.org/obo/OBI_0600047
    sequence_assembly_method:
      doc: Protocol which provides instructions on the alignment of sequencing reads to reference genome
      type: string?
      jsonldPredicate:
        _id: http://www.ebi.ac.uk/efo/EFO_0002699
    sequencing_coverage:
      doc: Sequence coverage defined as the average number of reads representing a given nucleotide (e.g. 100x)
      type: int?
      jsonldPredicate:
        _id: http://purl.obolibrary.org/obo/FLU_0000848

- name: submitterSchema
  type: record
  fields:
    submitter_name:
      doc: Name of the submitter
      type: string
      jsonldPredicate:
          _id: http://semanticscience.org/resource/SIO_000116
    submitter_address:
      doc: Address of the submitter
      type: string?
      jsonldPredicate:
          _id: http://semanticscience.org/resource/SIO_000172
    originating_lab:
      doc: Name of the laboratory that took the sample
      type: string
      jsonldPredicate:
          _id: http://purl.obolibrary.org/obo/NCIT_C37984
    lab_address:
      doc: Address of the laboratory where the sample was taken
      type: string?
      jsonldPredicate:
          _id: http://purl.obolibrary.org/obo/OBI_0600047
    provider_sample_id:
      type: string?
      jsonldPredicate:
          _id: http://purl.obolibrary.org/obo/NCIT_C37900
    submitter_sample_id:
      type: string?
      jsonldPredicate:
          _id: http://www.ebi.ac.uk/efo/EFO_0001741
    authors:
      doc: Name of the author(s)
      type: string?
      jsonldPredicate:
          _id: http://purl.obolibrary.org/obo/NCIT_C42781
    publication:
      doc: Reference to publication of this sample (e.g. DOI, pubmed ID, ...)
      type: string?
      jsonldPredicate:
        _id: http://purl.obolibrary.org/obo/NCIT_C19026
    submitter_orcid:
      doc: ORCID of the submitter as a full URI, e.g. https://orcid.org/0000-0002-1825-0097
      type: string?
      jsonldPredicate:
          _id: http://semanticscience.org/resource/SIO_000115

- name: MainSchema
  type: record
  documentRoot: true
  fields:
    host: hostSchema
    sample: sampleSchema
    virus: virusSchema?
    technology: technologySchema
    submitter: submitterSchema
    id:
      doc: The subject (eg the fasta/fastq file) that the metadata describes
      type: string
      jsonldPredicate:
        _id: "@id"
        _type: "@id"
        noLinkCheck: true