summaryrefslogtreecommitdiff
path: root/build-aux/test-corpus.scm
blob: 740233ec0a3cac1fa24757d433fe03acc0567b79 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
;;; guile-email --- Guile email parser
;;; Copyright © 2023 Arun Isaac <arunisaac@systemreboot.net>
;;;
;;; This file is part of guile-email.
;;;
;;; guile-email is free software; you can redistribute it and/or modify
;;; it under the terms of the GNU Affero General Public License as
;;; published by the Free Software Foundation; either version 3 of the
;;; License, or (at your option) any later version.
;;;
;;; guile-email is distributed in the hope that it will be useful, but
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;;; Affero General Public License for more details.
;;;
;;; You should have received a copy of the GNU Affero General Public
;;; License along with guile-email.  If not, see
;;; <http://www.gnu.org/licenses/>.

;;; Commentary:

;; This script recursively finds all mbox files in the current
;; directory, and tests if guile-email can parse all messages without
;; erroring out.

;;; Code:

(use-modules (rnrs io ports)
             (srfi srfi-26)
             (srfi srfi-28)
             (srfi srfi-171)
             (ice-9 ftw)
             (ice-9 match)
             (email email))

(define (directory-files directory)
  "Return files in DIRECTORY."
  (scandir directory (lambda (filename)
                       (not (member filename (list "." ".."))))))

(define (write-failed-email fail-directory corpus mbox index email)
  "Write EMAIL, the INDEXth email in MBOX of CORPUS, for which
parse-email fails to FAIL-DIRECTORY."
  (display (format "Failed on index ~a of ~a/~a~%" index corpus mbox))
  (call-with-output-file (format "~a/~a_~a_~a"
                                 fail-directory
                                 corpus
                                 (basename mbox ".mbox")
                                 index)
    (cut put-bytevector <> email)))

(define read-next-email-in-mbox
  (@@ (email email)
      read-next-email-in-mbox))

(define (test-corpus corpus fail-directory)
  "Test CORPUS, a directory containing mbox files. Write emails on which
the parser failed to FAIL-DIRECTORY."
  (for-each (lambda (mbox)
              (let ((mbox-path (string-append corpus "/" mbox)))
                (display (format "~a~%" mbox-path))
                (call-with-input-file mbox-path
                  (lambda (port)
                    (port-transduce (compose (tenumerate)
                                             (tmap (match-lambda
                                                     ((index . bv)
                                                      (catch #t
                                                        (lambda ()
                                                          (parse-email bv))
                                                        (lambda _
                                                          (write-failed-email fail-directory corpus mbox index bv)))))))
                                    (const #t)
                                    read-next-email-in-mbox
                                    port)))))
            (directory-files corpus)))

(define (main fail-directory)
  ;; Test corpora.
  (test-corpus "guix-commits" fail-directory)
  (test-corpus "guix-patches" fail-directory)
  (test-corpus "bug-guix" fail-directory)

  ;; If fail directory has any emails, then error out.
  (unless (null? (directory-files fail-directory))
    (error "Corpus test failed for some messages")))