aboutsummaryrefslogtreecommitdiff
path: root/xapian/xapian.scm
diff options
context:
space:
mode:
Diffstat (limited to 'xapian/xapian.scm')
-rw-r--r--xapian/xapian.scm109
1 files changed, 105 insertions, 4 deletions
diff --git a/xapian/xapian.scm b/xapian/xapian.scm
index 75924ae..80b4b9b 100644
--- a/xapian/xapian.scm
+++ b/xapian/xapian.scm
@@ -22,7 +22,9 @@
#:use-module (rnrs arithmetic bitwise)
#:use-module (rnrs bytevectors)
#:use-module (ice-9 match)
+ #:use-module (srfi srfi-1)
#:use-module (srfi srfi-26)
+ #:use-module (htmlprag)
#:use-module (xapian wrap)
#:export (xapian-open
xapian-close
@@ -48,6 +50,9 @@
index-text!
increase-termpos!
parse-query
+ query-and
+ query-or
+ query-filter
enquire
enquire-mset
mset-item-docid
@@ -55,7 +60,8 @@
mset-item-rank
mset-item-weight
mset-fold
- mset-snippet))
+ mset-snippet
+ mset-sxml-snippet))
(define xapian-open new-Database)
(define xapian-close delete-Database)
@@ -142,9 +148,15 @@ bytevector."
(TermGenerator-set-document term-generator document)
term-generator))
-(define* (index-text! term-generator text #:key (wdf-increment 1) prefix)
- (apply TermGenerator-index-text term-generator text wdf-increment
- (if prefix (list prefix) '())))
+(define* (index-text! term-generator text #:key (wdf-increment 1) (prefix "") (positions? #t))
+ "Index @var{text} using @var{term-generator}. @var{wdf-increment} is
+the within document frequency increment. @var{prefix} is the term
+prefix to use. If @var{positions?} is #f, position information is not
+generated."
+ ((if positions?
+ TermGenerator-index-text
+ TermGenerator-index-text-without-positions)
+ term-generator text wdf-increment prefix))
(define increase-termpos! TermGenerator-increase-termpos)
@@ -194,6 +206,34 @@ on the database object."
(MSetIterator-next head)
(loop head result))))))
+(define (query-combine combine-operator default . queries)
+ (reduce (cut new-Query combine-operator <> <>)
+ default
+ queries))
+
+(define (query-and . queries)
+ "Return a query matching only documents matching all @var{queries}.
+
+In a weighted context, the weight is the sum of the weights for all
+queries."
+ (apply query-combine (Query-OP-AND) (Query-MatchAll) queries))
+
+(define (query-or . queries)
+ "Return a query matching documents which at least one of @var{queries}
+match.
+
+In a weighted context, the weight is the sum of the weights for
+matching queries."
+ (apply query-combine (Query-OP-OR) (Query-MatchNothing) queries))
+
+(define (query-filter . queries)
+ "Return a query matching only documents matching all @var{queries},
+but only take weight from the first of @var{queries}.
+
+In a non-weighted context, @code{query-filter} and @code{query-and}
+are equivalent."
+ (apply query-combine (Query-OP-FILTER) (Query-MatchAll) queries))
+
(define (get-flag flag-thunk value)
(if value (flag-thunk) 0))
@@ -204,9 +244,70 @@ on the database object."
(background-model? #t) (exhaustive? #t)
(empty-without-match? #t)
(cjk-ngram? #t))
+ "Generate a snippet from @var{text}. @var{mset} is the xapian
+@code{MSet} object representing a list of search results.
+
+@var{length} is the number of bytes of @var{text} to aim to select.
+
+The same stemmer used to build the query should be specified as
+@var{stemmer}.
+
+@var{highlight-start} and @var{highlight-end} are inserted
+respectively before and after the highlit terms.
+
+If the chosen snippet seems to start or end mid-sentence, then
+@var{omit} is prepended or appended to indicate this.
+
+If @var{background-model?} is @code{#true}, the relevance of non-query
+terms are modelled to prefer snippets containing a more interesting
+background.
+
+If @var{exhaustive?} is @code{#true}, exhaustively evaluate candidate
+snippets. Else, snippet generation will stop once a @emph{good enough}
+snippet has been found.
+
+If @var{empty-without-match?} is @code{#true}, return the empty string
+if not a single term was found in @var{text}. Else, return a substring
+of text without any highlit terms.
+
+If @var{cjk-ngram?} is @code{#true}, enable generation of n-grams from
+CJK text.
+
+See @code{MSet::snippet} in @file{xapian/mset.h} of the xapian source
+for more details."
(MSet-snippet mset text length stemmer
(bitwise-ior (get-flag MSet-SNIPPET-BACKGROUND-MODEL background-model?)
(get-flag MSet-SNIPPET-EXHAUSTIVE exhaustive?)
(get-flag MSet-SNIPPET-EMPTY-WITHOUT-MATCH empty-without-match?)
(get-flag MSet-SNIPPET-CJK-NGRAM cjk-ngram?))
highlight-start highlight-end omit))
+
+(define* (mset-sxml-snippet mset text #:key (length 500) (stemmer (make-stem "none"))
+ (highlight-proc (lambda (text)
+ `(b ,text)))
+ (omit "...") (background-model? #t) (exhaustive? #t)
+ (empty-without-match? #t) (cjk-ngram? #t))
+ "Generate a snippet in SXML form from @var{text}.
+
+@var{highlight-proc} is a function that is passed the highlit text. It
+should return a SXML tree highlighting that text.
+
+Arguments @var{mset}, @var{length}, @var{stemmer}, @var{omit},
+@var{background-model?}, @var{exhaustive?}, @var{empty-without-match?}
+and @var{cjk-ngram?} are the same as in the @code{mset-snippet}
+function."
+ ;; mset-snippet returns serialized HTML. So, we reverse it with
+ ;; html->sxml.
+ (match (html->sxml (mset-snippet mset text))
+ (('*TOP* children ...)
+ (append-map (match-lambda
+ ;; Apply highlight-proc if highlit text.
+ (('b text)
+ (list (highlight-proc text)))
+ ;; Add (br) if end of line.
+ ((? (cut string-suffix? "\n" <>) text)
+ (list (string-trim-right text #\newline)
+ '(br)))
+ ;; Else, return verbatim.
+ (text (list text)))
+ children))))