summaryrefslogtreecommitdiff
path: root/build-aux/test-corpus.scm.in
blob: 2bf7a0a5d7b6cd81a326eafbab859d23275e3394 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#! @GUILE@
-*- scheme -*-
!#
;;; guile-email --- Guile email parser
;;; Copyright © 2021 Arun Isaac <arunisaac@systemreboot.net>
;;;
;;; This file is part of guile-email.
;;;
;;; guile-email is free software; you can redistribute it and/or modify
;;; it under the terms of the GNU Affero General Public License as
;;; published by the Free Software Foundation; either version 3 of the
;;; License, or (at your option) any later version.
;;;
;;; guile-email is distributed in the hope that it will be useful, but
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;;; Affero General Public License for more details.
;;;
;;; You should have received a copy of the GNU Affero General Public
;;; License along with guile-email.  If not, see
;;; <http://www.gnu.org/licenses/>.

;;; Commentary:

;; This script recursively finds all mbox files in the current
;; directory, and tests if guile-email can parse all messages without
;; erroring out.

;;; Code:

(use-modules (rnrs io ports)
             (srfi srfi-26)
             (srfi srfi-28)
             (srfi srfi-171)
             (ice-9 ftw)
             (ice-9 match)
             (email email))

(define %fail-directory
  (match (command-line)
    ((_ fail-directory) fail-directory)
    ((arg0 _ ...)
     (scm-error 'misc-error
                #f
                "Usage: ~A fail-directory"
                (list arg0)
                #f))))

(define (directory-files directory)
  "Return files in DIRECTORY."
  (scandir directory (lambda (filename)
                       (not (member filename (list "." ".."))))))

(define (write-failed-email corpus mbox index email)
  "Write EMAIL, the INDEXth email in MBOX of CORPUS, for which
parse-email fails to fail directory."
  (display (format "Failed on index ~a of ~a/~a~%" index corpus mbox))
  (call-with-output-file (format "~a/~a_~a_~a"
                                 %fail-directory
                                 corpus
                                 (basename mbox ".mbox")
                                 index)
    (cut put-bytevector <> email)))

(define read-next-email-in-mbox
  (@@ (email email)
      read-next-email-in-mbox))

(define (test-corpus corpus)
  "Test CORPUS, a directory containing mbox files."
  (for-each (lambda (mbox)
              (let ((mbox-path (string-append corpus "/" mbox)))
                (display (format "~a~%" mbox-path))
                (call-with-input-file mbox-path
                  (lambda (port)
                    (port-transduce (compose (tenumerate)
                                             (tmap (match-lambda
                                                     ((index . bv)
                                                      (catch #t
                                                        (lambda ()
                                                          (parse-email bv))
                                                        (lambda _
                                                          (write-failed-email corpus mbox index bv)))))))
                                    (const #t)
                                    read-next-email-in-mbox
                                    port)))))
            (directory-files corpus)))

;; If fail directory exists, delete its contents. Else, create it.
(if (file-exists? %fail-directory)
    (for-each (lambda (file)
                (delete-file (string-append %fail-directory "/" file)))
              (directory-files %fail-directory))
    (mkdir %fail-directory))

;; Test corpora.
(test-corpus "guix-commits")
(test-corpus "guix-patches")

;; If fail directory has any emails, then error out.
(unless (null? (directory-files %fail-directory))
  (error "Corpus test failed for some messages"))