aboutsummaryrefslogtreecommitdiff
path: root/bh20sequploader/main.py
diff options
context:
space:
mode:
Diffstat (limited to 'bh20sequploader/main.py')
-rw-r--r--bh20sequploader/main.py58
1 files changed, 37 insertions, 21 deletions
diff --git a/bh20sequploader/main.py b/bh20sequploader/main.py
index a2e62fa..c442af0 100644
--- a/bh20sequploader/main.py
+++ b/bh20sequploader/main.py
@@ -22,18 +22,10 @@ ARVADOS_API_HOST='lugli.arvadosapi.com'
ARVADOS_API_TOKEN='2fbebpmbo3rw3x05ueu2i6nx70zhrsb1p22ycu3ry34m4x4462'
UPLOAD_PROJECT='lugli-j7d0g-n5clictpuvwk8aa'
-def main():
- parser = argparse.ArgumentParser(description='Upload SARS-CoV-19 sequences for analysis')
- parser.add_argument('sequence', type=argparse.FileType('r'), help='sequence FASTA/FASTQ')
- parser.add_argument('metadata', type=argparse.FileType('r'), help='sequence metadata json')
- parser.add_argument("--validate", action="store_true", help="Dry run, validate only")
- args = parser.parse_args()
-
- api = arvados.api(host=ARVADOS_API_HOST, token=ARVADOS_API_TOKEN, insecure=True)
-
+def qa_stuff(metadata, sequence_p1, sequence_p2):
try:
log.debug("Checking metadata")
- if not qc_metadata(args.metadata.name):
+ if not qc_metadata(metadata.name):
log.warning("Failed metadata qc")
exit(1)
except ValueError as e:
@@ -42,29 +34,52 @@ def main():
print(e)
exit(1)
+ target = []
try:
- log.debug("Checking FASTA QC")
- target = qc_fasta(args.sequence)
+ log.debug("Checking FASTA/FASTQ QC")
+ target.append(qc_fasta(sequence_p1))
+ if sequence_p2:
+ target.append(qc_fasta(sequence_p2))
+ target[0] = ("reads_1."+target[0][0][6:], target[0][1])
+ target[1] = ("reads_2."+target[1][0][6:], target[0][1])
except ValueError as e:
log.debug(e)
log.debug("Failed FASTA qc")
print(e)
exit(1)
+ return target
+
+def upload_sequence(col, target, sequence):
+ with col.open(target[0], "wb") as f:
+ r = sequence.read(65536)
+ while r:
+ f.write(r)
+ r = sequence.read(65536)
+
+
+def main():
+ parser = argparse.ArgumentParser(description='Upload SARS-CoV-19 sequences for analysis')
+ parser.add_argument('metadata', type=argparse.FileType('r'), help='sequence metadata json')
+ parser.add_argument('sequence_p1', type=argparse.FileType('rb'), help='sequence FASTA/FASTQ')
+ parser.add_argument('sequence_p2', type=argparse.FileType('rb'), default=None, help='sequence FASTQ pair')
+ parser.add_argument("--validate", action="store_true", help="Dry run, validate only")
+ args = parser.parse_args()
+
+ api = arvados.api(host=ARVADOS_API_HOST, token=ARVADOS_API_TOKEN, insecure=True)
+
+ target = qa_stuff(args.metadata, args.sequence_p1, args.sequence_p2)
+ seqlabel = target[0][1]
+
if args.validate:
print("Valid")
exit(0)
col = arvados.collection.Collection(api_client=api)
- with col.open(target, "w") as f:
- r = args.sequence.read(65536)
- seqlabel = r[1:r.index("\n")]
- print(seqlabel)
- while r:
- f.write(r)
- r = args.sequence.read(65536)
- args.sequence.close()
+ upload_sequence(col, target[0], args.sequence_p1)
+ if args.sequence_p2:
+ upload_sequence(col, target[1], args.sequence_p2)
print("Reading metadata")
with col.open("metadata.yaml", "w") as f:
@@ -73,7 +88,6 @@ def main():
while r:
f.write(r)
r = args.metadata.read(65536)
- args.metadata.close()
external_ip = urllib.request.urlopen('https://ident.me').read().decode('utf8')
@@ -93,6 +107,8 @@ def main():
(seqlabel, properties['upload_user'], properties['upload_ip']),
properties=properties, ensure_unique_name=True)
+ print("Saved to %s" % col.manifest_locator())
+
print("Done")
if __name__ == "__main__":