aboutsummaryrefslogtreecommitdiff
path: root/xapian
diff options
context:
space:
mode:
Diffstat (limited to 'xapian')
-rw-r--r--xapian/xapian.scm133
1 files changed, 130 insertions, 3 deletions
diff --git a/xapian/xapian.scm b/xapian/xapian.scm
index 80b4b9b..64042a2 100644
--- a/xapian/xapian.scm
+++ b/xapian/xapian.scm
@@ -1,5 +1,5 @@
;;; guile-xapian --- Guile bindings for Xapian
-;;; Copyright © 2020, 2022 Arun Isaac <arunisaac@systemreboot.net>
+;;; Copyright © 2020, 2022, 2024 Arun Isaac <arunisaac@systemreboot.net>
;;; Copyright © 2021 Bob131 <bob@bob131.so>
;;;
;;; This file is part of guile-xapian.
@@ -50,9 +50,15 @@
index-text!
increase-termpos!
parse-query
+ query
query-and
query-or
query-filter
+ prefixed-range-processor
+ suffixed-range-processor
+ prefixed-date-range-processor
+ suffixed-date-range-processor
+ field-processor
enquire
enquire-mset
mset-item-docid
@@ -160,7 +166,37 @@ generated."
(define increase-termpos! TermGenerator-increase-termpos)
-(define* (parse-query querystring #:key stemmer stemming-strategy (prefixes '()))
+(define* (parse-query querystring
+ #:key
+ stemmer
+ stemming-strategy
+ (prefixes '())
+ (boolean-prefixes '())
+ (range-processors '())
+ (boolean? #t)
+ (phrases? #t)
+ (love-hate? #t)
+ any-case-boolean?
+ wildcard?)
+ "Parse @var{querystring} and return a @code{Query} object.
+
+@var{prefixes} and @var{boolean-prefixes} must be association lists
+mapping fields to prefixes or @code{FieldProcessor}
+objects. @var{range-processors} is a list of @code{RangeProcessor}
+objects.
+
+When @var{boolean?} is @code{#t}, boolean operators (AND, OR, etc.)
+and bracketed subexpressions are supported.
+
+When @var{phrases?} is @code{#t}, quoted phrases are supported.
+
+When @var{love-hate?} is @code{#t}, @samp{+} and @samp{-} are
+supported.
+
+When @var{any-case-boolean?} is @code{#t}, boolean operators are
+supported even if they are not in capitals.
+
+When @var{wildcard?} is @code{#t}, wildcards are supported."
(let ((queryparser (new-QueryParser)))
(QueryParser-set-stemmer queryparser stemmer)
(when stemming-strategy
@@ -169,7 +205,19 @@ generated."
((field . prefix)
(QueryParser-add-prefix queryparser field prefix)))
prefixes)
- (let ((query (QueryParser-parse-query queryparser querystring)))
+ (for-each (match-lambda
+ ((field . prefix)
+ (QueryParser-add-boolean-prefix queryparser field prefix)))
+ boolean-prefixes)
+ (for-each (cut QueryParser-add-rangeprocessor queryparser <>)
+ range-processors)
+ (let ((query (QueryParser-parse-query queryparser
+ querystring
+ (bitwise-ior (get-flag QueryParser-FLAG-BOOLEAN boolean?)
+ (get-flag QueryParser-FLAG-PHRASE phrases?)
+ (get-flag QueryParser-FLAG-LOVEHATE love-hate?)
+ (get-flag QueryParser-FLAG-BOOLEAN-ANY-CASE any-case-boolean?)
+ (get-flag QueryParser-FLAG-WILDCARD wildcard?)))))
(delete-QueryParser queryparser)
query)))
@@ -206,6 +254,10 @@ on the database object."
(MSetIterator-next head)
(loop head result))))))
+(define (query term)
+ "Return a @code{Query} object for @var{term}."
+ (new-Query term))
+
(define (query-combine combine-operator default . queries)
(reduce (cut new-Query combine-operator <> <>)
default
@@ -234,6 +286,81 @@ In a non-weighted context, @code{query-filter} and @code{query-and}
are equivalent."
(apply query-combine (Query-OP-FILTER) (Query-MatchAll) queries))
+(define* (prefixed-range-processor slot proc #:key (prefix "") repeated?)
+ "Return a @code{RangeProcessor} object that calls @var{proc} to process
+its range over @var{slot}.
+
+@var{proc} is a procedure that, given a begin string and an end
+string, must return a @code{Query} object. For open-ended ranges,
+either the begin string or the end string will be @code{#f}.
+
+@var{prefix} is a prefix to look for to recognize values as belonging
+to this range. When @var{repeated?} is @code{#t}, allow @var{prefix}
+on both ends of the range—@samp{$1..$10}."
+ (new-GuileXapianRangeProcessorWrapper
+ slot
+ prefix
+ (get-flag RP-REPEATED repeated?)
+ proc))
+
+(define* (suffixed-range-processor slot proc #:key suffix repeated?)
+ "Return a @code{RangeProcessor} object that calls @var{proc} to process
+its range over @var{slot}.
+
+@var{proc} is a procedure that, given a begin string and an end
+string, must return a @code{Query} object. For open-ended ranges,
+either the begin string or the end string will be @code{#f}.
+
+@var{suffix} is a suffix to look for to recognize values as belonging
+to this range. When @var{repeated?} is @code{#t}, allow @var{suffix}
+on both ends of the range—@samp{2kg..12kg}."
+ (new-GuileXapianRangeProcessorWrapper
+ slot
+ suffix
+ (bitwise-ior (RP-SUFFIX)
+ (get-flag RP-REPEATED repeated?))
+ proc))
+
+(define* (prefixed-date-range-processor slot #:key (prefix "") repeated? prefer-mdy? (epoch-year 1970))
+ "Return a @code{DateRangeProcessor} object that handles date ranges on
+@var{slot}.
+
+@var{prefix} and @var{repeated?} are the same as in
+@code{prefixed-range-processor}.
+
+When @var{prefer-mdy?} is @code{#t}, interpret ambiguous dates as
+month/day/year rather than day/month/year.
+
+@var{epoch-year} is the year to use as the epoch for dates with
+two-digit years."
+ (new-DateRangeProcessor slot
+ prefix
+ (bitwise-ior (get-flag (RP-REPEATED) repeated?)
+ (get-flag (RP-DATE-PREFER-MDY) prefer-mdy?))
+ epoch-year))
+
+(define* (suffixed-date-range-processor slot #:key suffix repeated? prefer-mdy? (epoch-year 1970))
+ "Return a @code{DateRangeProcessor} object that handles date ranges on
+@var{slot}.
+
+@var{suffix} and @var{repeated?} are the same as in
+@code{suffixed-range-processor}.
+
+@var{prefer-mdy?} and @var{epoch-year} are the same as in
+@code{prefixed-date-range-processor}."
+ (new-DateRangeProcessor slot
+ suffix
+ (bitwise-ior (RP-SUFFIX)
+ (get-flag (RP-REPEATED) repeated?)
+ (get-flag (RP-DATE-PREFER-MDY) prefer-mdy?))
+ epoch-year))
+
+(define (field-processor proc)
+ "Return a @code{FieldProcessor} object that calls
+@var{proc} to process its field. @var{proc} is a procedure that, given
+a string, must return a @code{Query} object."
+ (new-GuileXapianFieldProcessorWrapper proc))
+
(define (get-flag flag-thunk value)
(if value (flag-thunk) 0))