about summary refs log tree commit diff
path: root/workflows/pull-data/genbank/transform-genbank-xml2yamlfa.py
diff options
context:
space:
mode:
Diffstat (limited to 'workflows/pull-data/genbank/transform-genbank-xml2yamlfa.py')
-rwxr-xr-xworkflows/pull-data/genbank/transform-genbank-xml2yamlfa.py16
1 files changed, 12 insertions, 4 deletions
diff --git a/workflows/pull-data/genbank/transform-genbank-xml2yamlfa.py b/workflows/pull-data/genbank/transform-genbank-xml2yamlfa.py
index ebdf17e..9414864 100755
--- a/workflows/pull-data/genbank/transform-genbank-xml2yamlfa.py
+++ b/workflows/pull-data/genbank/transform-genbank-xml2yamlfa.py
@@ -1,18 +1,17 @@
 #!/usr/bin/env python3
 #
-# Create a single YAML/FASTA from genbank XML
+# Create a single YAML/FASTA for each genbank entry in GenBank XML file
 #
 #   transform-genbank-xml2yamlfa --out ~/tmp/pubseq file(s)
 #
 # Also writes a validation file in the outdir named state.json
-#
-# Where --in can be a file or a directory
 # ----------------------------------------------------------------------
 
 # See also directory .guix-run and README.md
 
 import argparse
 import gzip
+import json
 import os
 import sys
 import types
@@ -47,6 +46,12 @@ for xmlfn in args.files:
             try:
                 valid,meta = genbank.get_metadata(id,gb)
                 if valid:
+                    # --- write JSON
+                    jsonfn = basename + ".json"
+                    with open(jsonfn, 'w') as outfile:
+                        print(f"    writing {jsonfn}")
+                        json.dump(meta, outfile, indent=4)
+                    # --- write FASTA
                     fa = basename+".fa"
                     seq = genbank.get_sequence(id,gb)
                     print(f"    writing {fa}")
@@ -66,4 +71,7 @@ for xmlfn in args.files:
                 state['warnings'] = meta['warnings']
             states[id] = state
 
-print(states)
+statefn = dir + '/state.json'
+with open(statefn, 'w') as outfile:
+    print(f"    Writing {statefn}")
+    json.dump(states, outfile, indent=4)