From 7a4f28752dd142bd85939d840393c4cd8421f784 Mon Sep 17 00:00:00 2001 From: Arun Isaac Date: Sat, 23 Oct 2021 09:47:14 +0530 Subject: build-aux: Read emails one by one from mbox corpora. Reading emails one by one avoids allocating memory for a list of emails. * build-aux/test-corpus.scm.in (read-next-email-in-mbox): New function. (test-corpus): Use port-transduce to read emails one by one. --- build-aux/test-corpus.scm.in | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/build-aux/test-corpus.scm.in b/build-aux/test-corpus.scm.in index 9e66132..2bf7a0a 100644 --- a/build-aux/test-corpus.scm.in +++ b/build-aux/test-corpus.scm.in @@ -62,6 +62,10 @@ parse-email fails to fail directory." index) (cut put-bytevector <> email))) +(define read-next-email-in-mbox + (@@ (email email) + read-next-email-in-mbox)) + (define (test-corpus corpus) "Test CORPUS, a directory containing mbox files." (for-each (lambda (mbox) @@ -69,7 +73,7 @@ parse-email fails to fail directory." (display (format "~a~%" mbox-path)) (call-with-input-file mbox-path (lambda (port) - (list-transduce (compose (tenumerate) + (port-transduce (compose (tenumerate) (tmap (match-lambda ((index . bv) (catch #t @@ -78,7 +82,8 @@ parse-email fails to fail directory." (lambda _ (write-failed-email corpus mbox index bv))))))) (const #t) - (mbox->emails port)))))) + read-next-email-in-mbox + port))))) (directory-files corpus))) ;; If fail directory exists, delete its contents. Else, create it. -- cgit v1.2.3