summary refs log tree commit diff
path: root/build-aux
diff options
context:
space:
mode:
authorArun Isaac2021-06-15 23:38:47 +0530
committerArun Isaac2021-06-15 23:38:47 +0530
commit512e835cf2752681eda5bbb374c122c037baa6df (patch)
treea2a0295a5e0180f11eb74be2f3028067d83e3b63 /build-aux
parent7a78c23f6f2f1c32abb508563a700cd535b29ad1 (diff)
downloadguile-email-512e835cf2752681eda5bbb374c122c037baa6df.tar.gz
guile-email-512e835cf2752681eda5bbb374c122c037baa6df.tar.lz
guile-email-512e835cf2752681eda5bbb374c122c037baa6df.zip
build-aux: Add corpus pull script.
This is a follow-up to commit 7a78c23f6f2f1c32abb508563a700cd535b29ad1
adding a file that was missed out.

* build-aux/pull-corpus.scm: New file.
Diffstat (limited to 'build-aux')
-rw-r--r--build-aux/pull-corpus.scm103
1 files changed, 103 insertions, 0 deletions
diff --git a/build-aux/pull-corpus.scm b/build-aux/pull-corpus.scm
new file mode 100644
index 0000000..bf4129e
--- /dev/null
+++ b/build-aux/pull-corpus.scm
@@ -0,0 +1,103 @@
+;;; guile-email --- Guile email parser
+;;; Copyright © 2021 Arun Isaac <arunisaac@systemreboot.net>
+;;;
+;;; This file is part of guile-email.
+;;;
+;;; guile-email is free software; you can redistribute it and/or modify
+;;; it under the terms of the GNU Affero General Public License as
+;;; published by the Free Software Foundation; either version 3 of the
+;;; License, or (at your option) any later version.
+;;;
+;;; guile-email is distributed in the hope that it will be useful, but
+;;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;;; Affero General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU Affero General Public
+;;; License along with guile-email.  If not, see
+;;; <http://www.gnu.org/licenses/>.
+
+;;; Commentary:
+
+;; This script pulls archives of specified GNU mailing lists
+;; (currently guix-commits and guix-patches). If a partial archive
+;; already exists in the current directory, only the newer messages
+;; are downloaded.
+
+;;; Code:
+
+(use-modules (ice-9 format)
+             (rnrs io ports)
+             (srfi srfi-1)
+             (srfi srfi-9 gnu)
+             (srfi srfi-19)
+             (srfi srfi-41)
+             (srfi srfi-71)
+             (web client)
+             (web response))
+
+(define-immutable-record-type <archive>
+  (make-archive mailing-list year month size)
+  archive?
+  (mailing-list archive-mailing-list)
+  (year archive-year set-archive-year)
+  (month archive-month set-archive-month)
+  (size archive-size set-archive-size))
+
+(define (archive->uri archive)
+  (format #f "https://lists.gnu.org/archive/mbox/~a/~a-~2,'0d"
+          (archive-mailing-list archive)
+          (archive-year archive)
+          (archive-month archive)))
+
+(define (initialize-archive-size archive)
+  (let ((response body (http-head (archive->uri archive))))
+    (set-archive-size archive (assoc-ref (response-headers response) 'content-length))))
+
+(define (previous-archive archive)
+  (initialize-archive-size
+   (case (archive-month archive)
+     ((1) (set-archive-year
+           (set-archive-month archive 12)
+           (1- (archive-year archive))))
+     (else (set-archive-month archive (1- (archive-month archive)))))))
+
+(define (archive->filename archive)
+  (format #f "~a/~a-~2,'0d.mbox"
+          (archive-mailing-list archive)
+          (archive-year archive)
+          (archive-month archive)))
+
+(define file-size
+  (compose stat:size stat))
+
+(define (out-of-sync? archive)
+  (let ((filename (archive->filename archive)))
+    (if (file-exists? filename)
+        (< (file-size filename)
+           (archive-size archive))
+        #t)))
+
+(define (pull-archive archive)
+  (let ((file (archive->filename archive)))
+    (format #t "Pulling archive ~a~%" (archive->filename archive))
+    (call-with-port (open-file file "a")
+      (lambda (port)
+        (let ((response body (http-get (archive->uri archive)
+                                       #:decode-body? #f
+                                       #:headers `((range . (bytes (,(file-size file) . #f)))))))
+          (put-bytevector port body))))))
+
+(define (pull-mailing-list mailing-list)
+  "Pull archives of MAILING-LIST to a directory of the same name."
+  (define archives
+    (let ((today (current-date)))
+      (stream-unfold
+       identity archive-size previous-archive
+       (initialize-archive-size
+        (make-archive mailing-list (date-year today) (date-month today) #f)))))
+
+  (stream-for-each pull-archive (stream-take-while out-of-sync? archives)))
+
+(pull-mailing-list "guix-commits")
+(pull-mailing-list "guix-patches")