about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--.gitignore4
-rw-r--r--doc/INSTALL.md2
-rwxr-xr-xscripts/update_virtuoso/check_for_updates.py59
3 files changed, 63 insertions, 2 deletions
diff --git a/.gitignore b/.gitignore
index 5707130..9057a4f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,8 @@
 
 # Distribution / packaging
 build/
+cache.txt
+metadata.ttl
 __pycache__/
 eggs/
 .eggs/
@@ -15,4 +17,4 @@ env/
 venv/
 ENV/
 env.bak/
-venv.bak/
\ No newline at end of file
+venv.bak/
diff --git a/doc/INSTALL.md b/doc/INSTALL.md
index d8d7f3e..e68b81a 100644
--- a/doc/INSTALL.md
+++ b/doc/INSTALL.md
@@ -37,7 +37,7 @@ guix environment guix --ad-hoc git python openssl python-pycurl python-magic nss
 To run the web uploader in a GNU Guix environment/container
 
 ```
-guix environment -C guix --ad-hoc git python python-flask python-pyyaml python-magic nss-certs --network openssl -- env FLASK_APP=bh20simplewebuploader/main.py flask run
+guix environment guix --ad-hoc git python python-flask python-pyyaml python-pycurl python-magic  nss-certs --network openssl -- env FLASK_ENV=development PYTHONPATH=$PYTHONPATH:./bh20sequploader FLASK_APP=bh20simplewebuploader/main.py flask run
  * Serving Flask app "bh20simplewebuploader/main.py"
  * Environment: production
    WARNING: This is a development server. Do not use it in a production deployment.
diff --git a/scripts/update_virtuoso/check_for_updates.py b/scripts/update_virtuoso/check_for_updates.py
new file mode 100755
index 0000000..f3b8a86
--- /dev/null
+++ b/scripts/update_virtuoso/check_for_updates.py
@@ -0,0 +1,59 @@
+#! /usr/bin/env python3
+#
+# Check for updates on Arvados, pull the TTL and
+# push into Virtuoso
+#
+# You can run this in a Guix container with
+#
+#  ~/opt/guix/bin/guix environment -C guix --ad-hoc python python-requests curl --network -- python3 ./scripts/update_virtuoso/check_for_updates.py cache.txt dba dba
+
+import requests
+import time
+
+url = 'https://download.lugli.arvadosapi.com/c=lugli-4zz18-z513nlpqm03hpca/_/mergedmetadata.ttl'
+# --- Fetch headers from TTL file on Arvados
+r = requests.head(url)
+print(r.headers)
+
+print(r.headers['Last-Modified'])
+
+# --- Convert/validate time stamp
+# ValueError: time data 'Tue, 21 Apr 2020 23:47:43 GMT' does not match format '%a %b %d %H:%M:%S %Y'
+last_modified_str = r.headers['Last-Modified']
+t_stamp = time.strptime(last_modified_str,"%a, %d %b %Y %H:%M:%S %Z" )
+print(t_stamp)
+
+# OK, it works, now check last stored value
+import sys
+assert(len(sys.argv)==4)
+fn = sys.argv[1]
+user = sys.argv[2]
+pwd = sys.argv[3]
+
+import os.path
+stamp = None
+if os.path.isfile(fn):
+    file = open(fn,"r")
+    stamp = file.read()
+    file.close
+
+import subprocess
+if stamp != last_modified_str:
+    print("Fetch metadata TTL")
+    r = requests.get(url)
+    assert(r.status_code == 200)
+    with open("metadata.ttl", "w") as f:
+        f.write(r.text)
+        f.close
+    # Now push into Virtuoso using CURL
+    # cmd = "curl -X PUT --digest -u dba:dba -H Content-Type:text/turtle -T metadata.ttl -G http://localhost:8890/sparql-graph-crud-auth --data-urlencode graph=http://covid-19.genenetwork.org/graph".split(" ")
+    print("Push metadata TTL")
+    cmd = ("curl -X PUT --digest -u dba:%s -H Content-Type:text/turtle -T metadata.ttl -G http://sparql.genenetwork.org/sparql-graph-crud-auth --data-urlencode graph=http://covid-19.genenetwork.org/graph" % pwd ).split(" ")
+    print(cmd)
+    p = subprocess.Popen(cmd)
+    output = p.communicate()[0]
+    print(output)
+    assert(p.returncode == 0)
+
+    with open(fn,"w") as f:
+        f.write(last_modified_str)