summaryrefslogtreecommitdiff
path: root/tissue/search.scm
blob: bc60c191d2ebb430ada237380fb73f5afe000895 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
;;; tissue --- Text based issue tracker
;;; Copyright © 2022 Arun Isaac <arunisaac@systemreboot.net>
;;;
;;; This file is part of tissue.
;;;
;;; tissue is free software: you can redistribute it and/or modify it
;;; under the terms of the GNU General Public License as published by
;;; the Free Software Foundation, either version 3 of the License, or
;;; (at your option) any later version.
;;;
;;; tissue is distributed in the hope that it will be useful, but
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;;; General Public License for more details.
;;;
;;; You should have received a copy of the GNU General Public License
;;; along with tissue.  If not, see <https://www.gnu.org/licenses/>.

(define-module (tissue search)
  #:use-module (srfi srfi-1)
  #:use-module (ice-9 match)
  #:use-module (tissue document)
  #:use-module (tissue issue)
  #:use-module (tissue utils)
  #:use-module (xapian wrap)
  #:use-module ((xapian xapian) #:renamer (lambda (symbol)
                                            (case symbol
                                              ((parse-query) 'xapian:parse-query)
                                              (else symbol))))
  #:export (parse-query
            boolean-query?
            search-fold
            search-map))

(define (make-query-parser stemmer prefixes boolean-prefixes)
  "Return a query parser with STEMMER, PREFIXES and
BOOLEAN-PREFIXES. PREFIXES and BOOLEAN-PREFIXES are association lists
mapping field names to prefixes."
  (let ((query-parser (new-QueryParser)))
    (QueryParser-set-stemmer query-parser stemmer)
    (for-each (match-lambda
                ((field . prefix)
                 (QueryParser-add-prefix query-parser field prefix)))
              prefixes)
    (for-each (match-lambda
                ((field . prefix)
                 (QueryParser-add-boolean-prefix query-parser field prefix)))
              boolean-prefixes)
    query-parser))

(define %prefixes
  '(("title" . "S")))

(define %boolean-prefixes
  '(("type" . "XT")
    ("creator" . "A")
    ("lastupdater" . "XA")
    ("assigned" . "XI")
    ("keyword" . "K")
    ("tag" . "K")
    ("is" . "XS")))

(define query-parser
  (make-query-parser (make-stem "en") %prefixes %boolean-prefixes))

(define (parse-query search-query)
  "Parse SEARCH-QUERY and return a xapian Query object."
  (if (string-blank? search-query)
      (Query-MatchAll)
      (QueryParser-parse-query query-parser search-query)))

(define term-ref TermIterator-get-term)

(define (query-terms-every pred query)
  "Test whether every term in QUERY satisfies PRED. If so, return the
result of the last PRED call. If not, return #f. The calls to PRED are
made successively on the first, second, third, etc. term, and stopped
when PRED returns #f."
  (let loop ((head (Query-get-terms-begin query))
             (result #t))
    (cond
     ((TermIterator-equals head (Query-get-terms-end query))
      result)
     ((pred head)
      => (lambda (result)
           (TermIterator-next head)
           (loop head result)))
     (else #f))))

(define (boolean-query? query)
  "Return #t if QUERY contains only boolean terms. Else, return #f."
  (query-terms-every (lambda (term)
                       (any (match-lambda
                              ((field . prefix)
                               (string-contains? (term-ref term) prefix)))
                            %boolean-prefixes))
                     query))

(define* (search-fold proc initial db search-query
                      #:key (offset 0) (maximum-items (database-document-count db)))
  "Search xapian database DB using SEARCH-QUERY and fold over the
results using PROC and INITIAL.

PROC is invoked as (PROC DOCUMENT MSET PREVIOUS). DOCUMENT is an
instance of <document> or one of its subclasses. MSET is the xapian
MSet object representing the search results. PREVIOUS is the return
from the previous invocation of PROC, or the given INITIAL for the
first call.

OFFSET specifies the number of items to ignore at the beginning of the
result set. MAXIMUM-ITEMS specifies the maximum number of items to
return."
  (mset-fold (lambda (item result)
               (proc (call-with-input-string (document-data (mset-item-document item))
                       (compose scm->object read))
                     (MSetIterator-mset-get item)
                     result))
             initial
             (enquire-mset (let* ((query (parse-query search-query))
                                  (enquire (enquire db query)))
                             ;; Sort by recency date (slot 0) when
                             ;; query is strictly boolean.
                             (when (boolean-query? query)
                               (Enquire-set-sort-by-value enquire 0 #t))
                             enquire)
                           #:maximum-items maximum-items)))

(define* (search-map proc db search-query
                     #:key (offset 0) (maximum-items (database-document-count db)))
  "Search xapian database DB using SEARCH-QUERY and map over the results
using PROC.

PROC is invoked as (PROC DOCUMENT MSET). DOCUMENT is an instance of
<document> or one of its subclasses. MSET is the xapian MSet object
representing the search results.

OFFSET specifies the number of items to ignore at the beginning of the
result set. MAXIMUM-ITEMS specifies the maximum number of items to
return."
  (reverse
   (search-fold (lambda (document mset result)
                  (cons (proc document mset)
                        result))
                '()
                db
                search-query
                #:maximum-items maximum-items)))