aboutsummaryrefslogtreecommitdiff
path: root/build-aux/test-corpus.scm
diff options
context:
space:
mode:
Diffstat (limited to 'build-aux/test-corpus.scm')
-rw-r--r--build-aux/test-corpus.scm100
1 files changed, 100 insertions, 0 deletions
diff --git a/build-aux/test-corpus.scm b/build-aux/test-corpus.scm
new file mode 100644
index 0000000..83a359f
--- /dev/null
+++ b/build-aux/test-corpus.scm
@@ -0,0 +1,100 @@
+;;; guile-email --- Guile email parser
+;;; Copyright © 2021 Arun Isaac <arunisaac@systemreboot.net>
+;;;
+;;; This file is part of guile-email.
+;;;
+;;; guile-email is free software; you can redistribute it and/or modify
+;;; it under the terms of the GNU Affero General Public License as
+;;; published by the Free Software Foundation; either version 3 of the
+;;; License, or (at your option) any later version.
+;;;
+;;; guile-email is distributed in the hope that it will be useful, but
+;;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;;; Affero General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU Affero General Public
+;;; License along with guile-email. If not, see
+;;; <http://www.gnu.org/licenses/>.
+
+;;; Commentary:
+
+;; This script recursively finds all mbox files in the current
+;; directory, and tests if guile-email can parse all messages without
+;; erroring out.
+
+;;; Code:
+
+(use-modules (rnrs io ports)
+ (srfi srfi-26)
+ (srfi srfi-28)
+ (srfi srfi-171)
+ (ice-9 ftw)
+ (ice-9 match)
+ (email email))
+
+(define %fail-directory
+ (match (command-line)
+ ((_ fail-directory) fail-directory)
+ ((arg0 _ ...)
+ (scm-error 'misc-error
+ #f
+ "Usage: ~A fail-directory"
+ (list arg0)
+ #f))))
+
+(define (directory-files directory)
+ "Return files in DIRECTORY."
+ (scandir directory (lambda (filename)
+ (not (member filename (list "." ".."))))))
+
+(define (write-failed-email corpus mbox index email)
+ "Write EMAIL, the INDEXth email in MBOX of CORPUS, for which
+parse-email fails to fail directory."
+ (display (format "Failed on index ~a of ~a/~a~%" index corpus mbox))
+ (call-with-output-file (format "~a/~a_~a_~a"
+ %fail-directory
+ corpus
+ (basename mbox ".mbox")
+ index)
+ (cut put-bytevector <> email)))
+
+(define read-next-email-in-mbox
+ (@@ (email email)
+ read-next-email-in-mbox))
+
+(define (test-corpus corpus)
+ "Test CORPUS, a directory containing mbox files."
+ (for-each (lambda (mbox)
+ (let ((mbox-path (string-append corpus "/" mbox)))
+ (display (format "~a~%" mbox-path))
+ (call-with-input-file mbox-path
+ (lambda (port)
+ (port-transduce (compose (tenumerate)
+ (tmap (match-lambda
+ ((index . bv)
+ (catch #t
+ (lambda ()
+ (parse-email bv))
+ (lambda _
+ (write-failed-email corpus mbox index bv)))))))
+ (const #t)
+ read-next-email-in-mbox
+ port)))))
+ (directory-files corpus)))
+
+;; If fail directory exists, delete its contents. Else, create it.
+(if (file-exists? %fail-directory)
+ (for-each (lambda (file)
+ (delete-file (string-append %fail-directory "/" file)))
+ (directory-files %fail-directory))
+ (mkdir %fail-directory))
+
+;; Test corpora.
+(test-corpus "guix-commits")
+(test-corpus "guix-patches")
+(test-corpus "bug-guix")
+
+;; If fail directory has any emails, then error out.
+(unless (null? (directory-files %fail-directory))
+ (error "Corpus test failed for some messages"))