diff options
-rw-r--r-- | .gitignore | 4 | ||||
-rw-r--r-- | doc/INSTALL.md | 2 | ||||
-rwxr-xr-x | scripts/update_virtuoso/check_for_updates.py | 59 |
3 files changed, 63 insertions, 2 deletions
@@ -2,6 +2,8 @@ # Distribution / packaging build/ +cache.txt +metadata.ttl __pycache__/ eggs/ .eggs/ @@ -15,4 +17,4 @@ env/ venv/ ENV/ env.bak/ -venv.bak/
\ No newline at end of file +venv.bak/ diff --git a/doc/INSTALL.md b/doc/INSTALL.md index d8d7f3e..e68b81a 100644 --- a/doc/INSTALL.md +++ b/doc/INSTALL.md @@ -37,7 +37,7 @@ guix environment guix --ad-hoc git python openssl python-pycurl python-magic nss To run the web uploader in a GNU Guix environment/container ``` -guix environment -C guix --ad-hoc git python python-flask python-pyyaml python-magic nss-certs --network openssl -- env FLASK_APP=bh20simplewebuploader/main.py flask run +guix environment guix --ad-hoc git python python-flask python-pyyaml python-pycurl python-magic nss-certs --network openssl -- env FLASK_ENV=development PYTHONPATH=$PYTHONPATH:./bh20sequploader FLASK_APP=bh20simplewebuploader/main.py flask run * Serving Flask app "bh20simplewebuploader/main.py" * Environment: production WARNING: This is a development server. Do not use it in a production deployment. diff --git a/scripts/update_virtuoso/check_for_updates.py b/scripts/update_virtuoso/check_for_updates.py new file mode 100755 index 0000000..f3b8a86 --- /dev/null +++ b/scripts/update_virtuoso/check_for_updates.py @@ -0,0 +1,59 @@ +#! /usr/bin/env python3 +# +# Check for updates on Arvados, pull the TTL and +# push into Virtuoso +# +# You can run this in a Guix container with +# +# ~/opt/guix/bin/guix environment -C guix --ad-hoc python python-requests curl --network -- python3 ./scripts/update_virtuoso/check_for_updates.py cache.txt dba dba + +import requests +import time + +url = 'https://download.lugli.arvadosapi.com/c=lugli-4zz18-z513nlpqm03hpca/_/mergedmetadata.ttl' +# --- Fetch headers from TTL file on Arvados +r = requests.head(url) +print(r.headers) + +print(r.headers['Last-Modified']) + +# --- Convert/validate time stamp +# ValueError: time data 'Tue, 21 Apr 2020 23:47:43 GMT' does not match format '%a %b %d %H:%M:%S %Y' +last_modified_str = r.headers['Last-Modified'] +t_stamp = time.strptime(last_modified_str,"%a, %d %b %Y %H:%M:%S %Z" ) +print(t_stamp) + +# OK, it works, now check last stored value +import sys +assert(len(sys.argv)==4) +fn = sys.argv[1] +user = sys.argv[2] +pwd = sys.argv[3] + +import os.path +stamp = None +if os.path.isfile(fn): + file = open(fn,"r") + stamp = file.read() + file.close + +import subprocess +if stamp != last_modified_str: + print("Fetch metadata TTL") + r = requests.get(url) + assert(r.status_code == 200) + with open("metadata.ttl", "w") as f: + f.write(r.text) + f.close + # Now push into Virtuoso using CURL + # cmd = "curl -X PUT --digest -u dba:dba -H Content-Type:text/turtle -T metadata.ttl -G http://localhost:8890/sparql-graph-crud-auth --data-urlencode graph=http://covid-19.genenetwork.org/graph".split(" ") + print("Push metadata TTL") + cmd = ("curl -X PUT --digest -u dba:%s -H Content-Type:text/turtle -T metadata.ttl -G http://sparql.genenetwork.org/sparql-graph-crud-auth --data-urlencode graph=http://covid-19.genenetwork.org/graph" % pwd ).split(" ") + print(cmd) + p = subprocess.Popen(cmd) + output = p.communicate()[0] + print(output) + assert(p.returncode == 0) + + with open(fn,"w") as f: + f.write(last_modified_str) |