From 11812bbf95ddf1771a159b7ef6580a9179c0cad1 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Tue, 10 Nov 2020 05:02:57 -0600 Subject: virtuoso: Added a --no-cache option --- scripts/update_virtuoso/check_for_updates.py | 34 ++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 9 deletions(-) (limited to 'scripts/update_virtuoso') diff --git a/scripts/update_virtuoso/check_for_updates.py b/scripts/update_virtuoso/check_for_updates.py index 8761c8a..a63f4d1 100755 --- a/scripts/update_virtuoso/check_for_updates.py +++ b/scripts/update_virtuoso/check_for_updates.py @@ -4,7 +4,7 @@ # # You can run this in a Guix container with # -# ~/opt/guix/bin/guix environment -C guix --ad-hoc python python-requests curl --network -- python3 ./scripts/update_virtuoso/check_for_updates.py cache.txt dba dba +# ~/opt/guix/bin/guix environment -C guix --ad-hoc raptor2 python python-requests curl --network -- python3 ./scripts/update_virtuoso/check_for_updates.py cache.txt dba dba # # Note you'll need to run from the root dir. Remove the ./cache.txt file if you want to force an update. # @@ -19,6 +19,11 @@ fn = sys.argv[1] user = sys.argv[2] pwd = sys.argv[3] +no_cache = False +if fn == "--no-cache": + no_cache = True + print("Skipping cache check and download of metadata.ttl") + scriptdir = os.path.dirname(os.path.realpath(__file__)) print(scriptdir) basedir = os.path.dirname(os.path.dirname(scriptdir)) @@ -29,6 +34,15 @@ def upload(fn): # print("DELETE "+fn) # cmd = ("curl --digest --user dba:%s --verbose --url -G http://sparql.genenetwork.org/sparql-graph-crud-auth --data-urlencode graph=http://covid-19.genenetwork.org/graph -X DELETE" % pwd).split(" ") + print("VALIDATE "+fn) + cmd = f"rapper -i turtle {fn}" + print(cmd) + p = subprocess.Popen(cmd.split(" "),stdout=subprocess.PIPE,stderr=subprocess.PIPE) + out, err = p.communicate() + if p.returncode != 0: + print(out,err) + assert(p.returncode == 0) + print("UPLOADING "+fn) cmd = ("curl -X PUT --digest -u dba:%s -H Content-Type:text/turtle -T %s -G http://sparql.genenetwork.org/sparql-graph-crud-auth --data-urlencode graph=http://covid-19.genenetwork.org/graph/%s" % (pwd, fn, os.path.basename(fn)) ) print(cmd) @@ -39,6 +53,7 @@ def upload(fn): url = 'https://download.lugli.arvadosapi.com/c=lugli-4zz18-z513nlpqm03hpca/_/mergedmetadata.ttl' # --- Fetch headers from TTL file on Arvados +# curl --head https://download.lugli.arvadosapi.com/c=lugli-4zz18-z513nlpqm03hpca/_/mergedmetadata.ttl r = requests.head(url) print(r.headers) print(r.headers['Last-Modified']) @@ -49,14 +64,14 @@ last_modified_str = r.headers['Last-Modified'] t_stamp = time.strptime(last_modified_str,"%a, %d %b %Y %H:%M:%S %Z" ) print(t_stamp) -# OK, it works, now check last stored value +# OK, it works, now check last stored value in the cache stamp = None if os.path.isfile(fn): file = open(fn,"r") stamp = file.read() file.close -if stamp != last_modified_str: +if no_cache or stamp != last_modified_str: print("Delete graphs") for graph in ["labels.ttl", "metadata.ttl", "countries.ttl"]: cmd = ("curl --digest -u dba:%s --verbose --url http://127.0.0.1:8890/sparql-graph-crud-auth?graph=http://covid-19.genenetwork.org/graph/%s -X DELETE" % (pwd, graph)) @@ -69,12 +84,13 @@ if stamp != last_modified_str: upload(basedir+"/semantic_enrichment/labels.ttl") upload(basedir+"/semantic_enrichment/countries.ttl") - print("Fetch metadata TTL") - r = requests.get(url) - assert(r.status_code == 200) - with open("metadata.ttl", "w") as f: - f.write(r.text) - f.close + if not no_cache: + print("Fetch metadata TTL") + r = requests.get(url) + assert(r.status_code == 200) + with open("metadata.ttl", "w") as f: + f.write(r.text) + f.close upload("metadata.ttl") with open(fn,"w") as f: f.write(last_modified_str) -- cgit v1.2.3 From 986bacf77191a159d842605f6bb86f3f92a3be54 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Tue, 10 Nov 2020 11:41:09 +0000 Subject: virtuoso: header change --- scripts/update_virtuoso/check_for_updates.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'scripts/update_virtuoso') diff --git a/scripts/update_virtuoso/check_for_updates.py b/scripts/update_virtuoso/check_for_updates.py index a63f4d1..fb66c2e 100755 --- a/scripts/update_virtuoso/check_for_updates.py +++ b/scripts/update_virtuoso/check_for_updates.py @@ -56,13 +56,14 @@ url = 'https://download.lugli.arvadosapi.com/c=lugli-4zz18-z513nlpqm03hpca/_/mer # curl --head https://download.lugli.arvadosapi.com/c=lugli-4zz18-z513nlpqm03hpca/_/mergedmetadata.ttl r = requests.head(url) print(r.headers) -print(r.headers['Last-Modified']) +if not no_cache: + print(r.headers['Last-Modified']) -# --- Convert/validate time stamp -# ValueError: time data 'Tue, 21 Apr 2020 23:47:43 GMT' does not match format '%a %b %d %H:%M:%S %Y' -last_modified_str = r.headers['Last-Modified'] -t_stamp = time.strptime(last_modified_str,"%a, %d %b %Y %H:%M:%S %Z" ) -print(t_stamp) + # --- Convert/validate time stamp + # ValueError: time data 'Tue, 21 Apr 2020 23:47:43 GMT' does not match format '%a %b %d %H:%M:%S %Y' + last_modified_str = r.headers['Last-Modified'] + t_stamp = time.strptime(last_modified_str,"%a, %d %b %Y %H:%M:%S %Z" ) + print(t_stamp) # OK, it works, now check last stored value in the cache stamp = None -- cgit v1.2.3 From b3eb10770bada631c929fec83247f6fda7ef22a4 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Tue, 10 Nov 2020 06:39:10 -0600 Subject: virtuoso: no-cache --- scripts/update_virtuoso/check_for_updates.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'scripts/update_virtuoso') diff --git a/scripts/update_virtuoso/check_for_updates.py b/scripts/update_virtuoso/check_for_updates.py index fb66c2e..939a575 100755 --- a/scripts/update_virtuoso/check_for_updates.py +++ b/scripts/update_virtuoso/check_for_updates.py @@ -93,7 +93,8 @@ if no_cache or stamp != last_modified_str: f.write(r.text) f.close upload("metadata.ttl") - with open(fn,"w") as f: - f.write(last_modified_str) + if not no_cache: + with open(fn,"w") as f: + f.write(last_modified_str) else: print("Metadata is up to date") -- cgit v1.2.3