summaryrefslogtreecommitdiff
path: root/examples/index.scm
blob: 292334e248b1c00670e9004451a0830ec05d392d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
;;; guile-xapian --- Guile bindings for Xapian
;;; Copyright © 2020 Arun Isaac <arunisaac@systemreboot.net>
;;;
;;; This file is part of guile-xapian.
;;;
;;; guile-xapian is free software: you can redistribute it and/or
;;; modify it under the terms of the GNU General Public License as
;;; published by the Free Software Foundation, either version 2 of the
;;; License, or (at your option) any later version.
;;;
;;; guile-xapian is distributed in the hope that it will be useful,
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;;; General Public License for more details.
;;;
;;; You should have received a copy of the GNU General Public License
;;; along with guile-xapian.  If not, see
;;; <https://www.gnu.org/licenses/>.

(use-modules (ice-9 match)
             (srfi srfi-26)
             (xapian xapian))

(define (index datapath dbpath)
  ;; Open database for writing. call-with-writable-database
  ;; automatically closes the database once we're done.
  (call-with-writable-database dbpath
    (lambda (db)
      (for-each
       (lambda (record)
         (match record
           ((description title identifier)
            (let* (;; Make a document and tell the term generator to
                   ;; use it. The data of the document is stored for
                   ;; display purposes. Storing data is optional and
                   ;; can be skipped to cut down on database size. The
                   ;; unique idterm ensures each document ends up in
                   ;; the database only once no matter how many times
                   ;; we run the database.
                   (idterm (string-append "Q" identifier))
                   (doc (make-document #:data (call-with-output-string (cut write record <>))
                                       #:terms `((,idterm . 0))))
                   (term-generator (make-term-generator #:stem (make-stem "en")
                                                        #:document doc)))
              ;; Index title and description with a suitable
              ;; prefix. This is used to allow for searching separate
              ;; fields as in title:sunwatch, description:leather,
              ;; etc.
              (index-text! term-generator title #:prefix "S")
              (index-text! term-generator description #:prefix "XD")

              ;; Index title and description without prefixes for
              ;; general search.
              (index-text! term-generator title)
              (increase-termpos! term-generator)
              (index-text! term-generator description)

              ;; Add the document to the database. The unique idterm
              ;; ensures each object ends up in the database only once
              ;; no matter how many times we run the indexer.
              (replace-document! db idterm doc)))))
       (call-with-input-file datapath read)))))

;; Handle command line arguments
(match (command-line)
  ((_ datapath dbpath)
   (index datapath dbpath))
  ((program . _)
   (format (current-error-port) "Usage: ~a DATAPATH DBPATH~%" program)))