;;; guile-xapian --- Guile bindings for Xapian
;;; Copyright © 2020, 2022 Arun Isaac <arunisaac@systemreboot.net>
;;; Copyright © 2021 Bob131 <bob@bob131.so>
;;;
;;; This file is part of guile-xapian.
;;;
;;; guile-xapian is free software: you can redistribute it and/or
;;; modify it under the terms of the GNU General Public License as
;;; published by the Free Software Foundation, either version 2 of the
;;; License, or (at your option) any later version.
;;;
;;; guile-xapian is distributed in the hope that it will be useful,
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;;; General Public License for more details.
;;;
;;; You should have received a copy of the GNU General Public License
;;; along with guile-xapian.  If not, see
;;; <https://www.gnu.org/licenses/>.

(define-module (xapian xapian)
  #:use-module (rnrs arithmetic bitwise)
  #:use-module (rnrs bytevectors)
  #:use-module (ice-9 match)
  #:use-module (srfi srfi-26)
  #:use-module (xapian wrap)
  #:export (xapian-open
            xapian-close
            call-with-database
            database-document-count
            xapian-open-writable
            xapian-close-writable
            call-with-writable-database
            call-with-in-memory-database
            add-document!
            replace-document!
            make-document
            document-data
            document-bytes
            document-set-data!
            document-set-bytes!
            document-slot-ref
            document-slot-ref-bytes
            document-slot-set!
            document-slot-set-bytes!
            make-stem
            make-term-generator
            index-text!
            increase-termpos!
            parse-query
            enquire
            enquire-mset
            mset-item-docid
            mset-item-document
            mset-item-rank
            mset-item-weight
            mset-fold
            mset-snippet))

(define xapian-open new-Database)
(define xapian-close delete-Database)

(define (call-with-database dbpath proc)
  (let ((db (xapian-open dbpath)))
    (dynamic-wind noop (cut proc db) (cut xapian-close db))))

(define database-document-count Database-get-doccount)

(define xapian-open-writable new-WritableDatabase)
(define xapian-close-writable delete-WritableDatabase)

(define (call-with-writable-database dbpath proc)
  (let ((db (xapian-open-writable dbpath)))
    (dynamic-wind noop (cut proc db) (cut xapian-close-writable db))))

(define (call-with-in-memory-database proc)
  "Call PROC passing it an in-memory writable database. Only a
writable in-memory database can be created, since a read-only
in-memory database would always remain empty and is of little use."
  (let ((db (inmemory-open)))
    (dynamic-wind noop (cut proc db) (cut xapian-close-writable db))))

(define add-document! WritableDatabase-add-document)
(define replace-document! WritableDatabase-replace-document)

(define* (make-document #:key data (terms '()) (values '()))
  (let ((doc (new-Document)))
    (when data
      (document-set-data! doc data))
    (for-each (match-lambda
                ((term . wdf-increment)
                 (Document-add-term doc term wdf-increment)))
              terms)
    (for-each (match-lambda
                ((slot . value)
                 (document-slot-set! doc slot value)))
              values)
    doc))

(define document-data Document-get-data)
(define document-bytes Document-get-data-bytes)

(define (document-set-data! document data)
  "Set data of DOCUMENT to DATA. DATA may be a string or a bytevector."
  (cond
   ((string? data)
    (Document-set-data document data))
   ((bytevector? data)
    (Document-set-data-bytes document data))
   (else
    (error "Invalid document data" data))))

(define (document-set-bytes! doc data)
  (display "document-set-bytes! is deprecated. document-set-data! now supports setting bytevectors as data. Please use that instead."
           (current-error-port))
  (Document-set-data-bytes doc data))

(define document-slot-ref Document-get-value)
(define document-slot-ref-bytes Document-get-value-bytes)

(define (document-slot-set! document slot value)
  "Set SLOT of DOCUMENT to VALUE. VALUE may be a string or a
bytevector."
  (cond
   ((string? value)
    (Document-add-value document slot value))
   ((bytevector? value)
    (Document-add-value-bytes document slot value))
   (else
    (error "Invalid document slot value" value))))

(define (document-slot-set-bytes! document slot value)
  (display "document-slot-set-bytes! is deprecated. document-slot-set! now supports setting bytevectors as slot value. Please use that instead."
           (current-error-port))
  (Document-add-value-bytes document slot value))

(define make-stem new-Stem)

(define* (make-term-generator #:key stem document)
  (let ((term-generator (new-TermGenerator)))
    (TermGenerator-set-stemmer term-generator stem)
    (TermGenerator-set-document term-generator document)
    term-generator))

(define* (index-text! term-generator text #:key (wdf-increment 1) prefix)
  (apply TermGenerator-index-text term-generator text wdf-increment
         (if prefix (list prefix) '())))

(define increase-termpos! TermGenerator-increase-termpos)

(define* (parse-query querystring #:key stemmer stemming-strategy (prefixes '()))
  (let ((queryparser (new-QueryParser)))
    (QueryParser-set-stemmer queryparser stemmer)
    (when stemming-strategy
      (QueryParser-set-stemming-strategy queryparser stemming-strategy))
    (for-each (match-lambda
                ((field . prefix)
                 (QueryParser-add-prefix queryparser field prefix)))
              prefixes)
    (let ((query (QueryParser-parse-query queryparser querystring)))
      (delete-QueryParser queryparser)
      query)))

(define* (enquire db query #:key weighting-scheme)
  (let ((enquire (new-Enquire db)))
    (Enquire-set-query enquire query)
    (when weighting-scheme
      (Enquire-set-weighting-scheme enquire weighting-scheme))
    enquire))

(define* (enquire-mset enquire #:key (offset 0) maximum-items)
  "Run a query using the settings in the ENQUIRE object and return an
mset object.

OFFSET specifies the number of items to ignore at the beginning of the
result set.

MAXIMUM-ITEMS specifies the maximum number of items to return. To
return all matches, pass the result of calling database-document-count
on the database object."
  (Enquire-get-mset enquire offset maximum-items))

(define mset-item-docid MSetIterator-get-docid)
(define mset-item-document MSetIterator-get-document)
(define mset-item-rank MSetIterator-get-rank)
(define mset-item-weight MSetIterator-get-weight)

(define (mset-fold proc init mset)
  (let loop ((head (MSet-begin mset))
             (result init))
    (cond
     ((MSetIterator-equals head (MSet-end mset)) result)
     (else (let ((result (proc head result)))
             (MSetIterator-next head)
             (loop head result))))))

(define (get-flag flag-thunk value)
  (if value (flag-thunk) 0))

;; TODO: Support cjk-words?
(define* (mset-snippet mset text
                       #:key (length 500) (stemmer (make-stem "none"))
                       (highlight-start "<b>") (highlight-end "</b>") (omit "...")
                       (background-model? #t) (exhaustive? #t)
                       (empty-without-match? #t)
                       (cjk-ngram? #t))
  (MSet-snippet mset text length stemmer
                (bitwise-ior (get-flag MSet-SNIPPET-BACKGROUND-MODEL background-model?)
                             (get-flag MSet-SNIPPET-EXHAUSTIVE exhaustive?)
                             (get-flag MSet-SNIPPET-EMPTY-WITHOUT-MATCH empty-without-match?)
                             (get-flag MSet-SNIPPET-CJK-NGRAM cjk-ngram?))
                highlight-start highlight-end omit))