diff options
-rw-r--r-- | .guix-authorizations | 4 | ||||
-rw-r--r-- | .guix-channel | 3 | ||||
-rw-r--r-- | .guix/guile-xapian-package.scm | 89 | ||||
-rw-r--r-- | Makefile.am | 8 | ||||
-rw-r--r-- | NEWS | 28 | ||||
-rw-r--r-- | configure.ac | 5 | ||||
-rw-r--r-- | except.i | 50 | ||||
l---------[-rw-r--r--] | guix.scm | 38 | ||||
-rw-r--r-- | xapian.i.in | 3 | ||||
-rw-r--r-- | xapian/error.scm | 30 | ||||
-rw-r--r-- | xapian/xapian.scm | 109 |
11 files changed, 318 insertions, 49 deletions
diff --git a/.guix-authorizations b/.guix-authorizations new file mode 100644 index 0000000..a8ef8be --- /dev/null +++ b/.guix-authorizations @@ -0,0 +1,4 @@ +(authorizations + (version 0) + (("7F73 0343 F2F0 9F3C 77BF 79D3 2E25 EE8B 6180 2BB3" + (name "arunisaac")))) diff --git a/.guix-channel b/.guix-channel new file mode 100644 index 0000000..35e181f --- /dev/null +++ b/.guix-channel @@ -0,0 +1,3 @@ +(channel + (version 0) + (directory ".guix")) diff --git a/.guix/guile-xapian-package.scm b/.guix/guile-xapian-package.scm new file mode 100644 index 0000000..f52e330 --- /dev/null +++ b/.guix/guile-xapian-package.scm @@ -0,0 +1,89 @@ +;;; guile-xapian --- Guile bindings for Xapian +;;; Copyright © 2021–2024 Arun Isaac <arunisaac@systemreboot.net> +;;; +;;; This file is part of guile-xapian. +;;; +;;; guile-xapian is free software: you can redistribute it and/or +;;; modify it under the terms of the GNU General Public License as +;;; published by the Free Software Foundation, either version 2 of the +;;; License, or (at your option) any later version. +;;; +;;; guile-xapian is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with guile-xapian. If not, see +;;; <https://www.gnu.org/licenses/>. + +(define-module (guile-xapian-package) + #:use-module ((gnu packages emacs) #:select (emacs-minimal)) + #:use-module ((gnu packages guile-xyz) #:prefix guix:) + #:use-module ((guix build-system guile) #:select (%guile-build-system-modules)) + #:use-module (guix gexp) + #:use-module (guix git-download) + #:use-module (guix packages) + #:use-module (guix profiles) + #:use-module (guix utils)) + +(define-public guile-xapian + (package + (inherit guix:guile-xapian) + (source (local-file ".." + "guile-xapian-checkout" + #:recursive? #t + #:select? (or (git-predicate (dirname (current-source-directory))) + (const #t)))))) + +(define-public guile2.2-xapian + (package + (inherit guix:guile2.2-xapian) + (source (local-file ".." + "guile-xapian-checkout" + #:recursive? #t + #:select? (or (git-predicate (dirname (current-source-directory))) + (const #t)))))) + +(define guile-xapian-website-gexp + (let ((development-profile + (profile + (content (package->development-manifest guile-xapian))))) + (with-imported-modules %guile-build-system-modules + #~(begin + (use-modules (guix build guile-build-system) + (guix build utils)) + + (copy-recursively #$(package-source guile-xapian) + (getcwd)) + (set-path-environment-variable + "PATH" (list "/bin") (list #$emacs-minimal #$development-profile)) + (set-path-environment-variable + "C_INCLUDE_PATH" (list "/include") (list #$development-profile)) + (set-path-environment-variable + "CPLUS_INCLUDE_PATH" (list "/include") (list #$development-profile)) + (set-path-environment-variable + "LIBRARY_PATH" (list "/lib") (list #$development-profile)) + (set-path-environment-variable + "ACLOCAL_PATH" (list "/share/aclocal") (list #$development-profile)) + (set-path-environment-variable + "PKG_CONFIG_PATH" (list "/lib/pkgconfig") (list #$development-profile)) + (set-path-environment-variable + "GUILE_LOAD_PATH" + (list (string-append "/share/guile/site/" + (target-guile-effective-version))) + (list #$development-profile)) + (set-path-environment-variable + "GUILE_LOAD_COMPILED_PATH" + (list (string-append "/lib/guile/" (target-guile-effective-version) "/site-ccache")) + (list #$development-profile)) + (invoke "autoreconf" "--verbose" "--install" "--force") + (patch-shebang "configure") + (invoke "./configure" "CONFIG_SHELL=sh" "SHELL=sh") + (invoke "make" "website") + (copy-recursively "website" #$output))))) + +(define-public guile-xapian-website + (computed-file "guile-xapian-website" guile-xapian-website-gexp)) + +guile-xapian diff --git a/Makefile.am b/Makefile.am index e88d63b..5d717ca 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,5 +1,5 @@ # guile-xapian --- Guile bindings for Xapian -# Copyright © 2020, 2021, 2022 Arun Isaac <arunisaac@systemreboot.net> +# Copyright © 2020, 2021, 2022, 2023 Arun Isaac <arunisaac@systemreboot.net> # Copyright © 2022 Bob131 <bob@bob131.so> # # This file is part of guile-xapian. @@ -50,7 +50,7 @@ CLEANFILES = xapian.i xapian_wrap.cc xapian/wrap.scm xapian.i: xapian.i.in Makefile sed -e 's|@libdir[@]|$(libdir)|g' -e 's|@GUILE_EFFECTIVE_VERSION[@]|$(GUILE_EFFECTIVE_VERSION)|g' $< > $@ -xapian_wrap.cc xapian/wrap.scm &: xapian.i xapian-head.i xapian-headers.i +xapian_wrap.cc xapian/wrap.scm &: xapian.i xapian-head.i xapian-headers.i except.i $(MKDIR_P) xapian $(SWIG_GEN)$(SWIG) $(SWIG_FLAGS) -I$(srcdir) -scmstub -o xapian_wrap.cc -guile -package xapian -c++ $< @@ -59,7 +59,7 @@ xapian_wrap.cc xapian/wrap.scm &: xapian.i xapian-head.i xapian-headers.i moddir = $(prefix)/share/guile/site/$(GUILE_EFFECTIVE_VERSION) godir = $(libdir)/guile/$(GUILE_EFFECTIVE_VERSION)/site-ccache -SOURCES = xapian/wrap.scm xapian/xapian.scm +SOURCES = xapian/error.scm xapian/wrap.scm xapian/xapian.scm GOBJECTS = $(SOURCES:%.scm=%.go) nobase_mod_DATA = $(SOURCES) @@ -88,7 +88,7 @@ SCM_LOG_DRIVER = \ # Distribution -EXTRA_DIST = $(TESTS) $(SOURCES) xapian.i.in xapian-head.i xapian-headers.i +EXTRA_DIST = $(TESTS) $(SOURCES) except.i xapian.i.in xapian-head.i xapian-headers.i # Build website @@ -1,10 +1,36 @@ -*- org -*- #+TITLE: guile-xapian NEWS – History of user-visible changes -Copyright © 2022 Arun Isaac <arunisaac@systemreboot.net> +Copyright © 2022–2023 Arun Isaac <arunisaac@systemreboot.net> Please send guile-xapian bug reports to arunisaac@systemreboot.net +* Changes in 0.3.1 (since 0.3.0) +** Noteworthy bug fixes +*** Catch C++ xapian exceptions and raise them as scheme exceptions + +* Changes in 0.3.0 (since 0.2.1) +** New dependencies +guile-xapian now depends on guile-lib for (htmlprag) used in the new +mset-sxml-snippet function. +** Features +*** Allow indexing text without position information +The index-text! function now optionally allows indexing text without +position information using a new #:positions? keyword argument. +*** Support combining queries with OR, AND and FILTER operators +We introduce new query-and, query-or and query-filter functions that +combine queries using the OR, AND and FILTER operators. +*** Support generation of SXML snippets +We introduce mset-sxml-snippet, a function that can generate snippets +in SXML form. + +* Changes in 0.2.1 (since 0.2.0) +** Noteworthy bug fixes +*** Distinguish libguilexapian based on Guile effective version +We suffix libguilexapian with the Guile effective version in order to +support simultaneous installation of guile-xapian for Guile 3.0 and +guile-xapian for Guile 2.2. + * Changes in 0.2.0 (since 0.1.0) ** Noteworthy bug fixes *** Look for libguilexapian.so under .libs diff --git a/configure.ac b/configure.ac index f0388c8..f953339 100644 --- a/configure.ac +++ b/configure.ac @@ -1,5 +1,5 @@ dnl guile-xapian --- Guile bindings for Xapian -dnl Copyright © 2020, 2021, 2022 Arun Isaac <arunisaac@systemreboot.net> +dnl Copyright © 2020, 2021, 2022, 2023 Arun Isaac <arunisaac@systemreboot.net> dnl dnl This file is part of guile-xapian. dnl @@ -17,7 +17,7 @@ dnl You should have received a copy of the GNU General Public License dnl along with guile-xapian. If not, see dnl <https://www.gnu.org/licenses/>. -AC_INIT([guile-xapian], [0.2.0]) +AC_INIT([guile-xapian], [0.3.1]) AM_INIT_AUTOMAKE([color-tests -Wall -Wno-portability foreign check-news no-dist-gzip dist-lzip]) AM_SILENT_RULES([yes]) LT_INIT @@ -29,6 +29,7 @@ PKG_CHECK_MODULES([GUILE], [guile-3.0],,[ PKG_CHECK_MODULES([GUILE], [guile-2.2]) ]) GUILE_PROGS +GUILE_MODULE_REQUIRED([htmlprag]) PKG_CHECK_MODULES([XAPIAN], [xapian-core]) PKG_CHECK_MODULES([ZLIB], [zlib]) diff --git a/except.i b/except.i new file mode 100644 index 0000000..fce1471 --- /dev/null +++ b/except.i @@ -0,0 +1,50 @@ +/* guile-xapian --- Guile bindings for Xapian + * Copyright © 2023 Arun Isaac <arunisaac@systemreboot.net> + * + * This file is part of guile-xapian. + * + * guile-xapian is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, either version 2 of the + * License, or (at your option) any later version. + * + * guile-xapian is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with guile-xapian. If not, see + * <https://www.gnu.org/licenses/>. + */ + +%{ +void handle_exception() { + string msg; + try { + throw; + } catch (const Xapian::Error &e) { + // TODO: Handle each Xapian error class separately and raise + // different scheme conditions for each. + msg = e.get_description(); + } catch (const std::exception &e) { + msg = "std::exception: "; + msg += e.what(); + } catch (...) { + msg = "unknown error in Xapian"; + } + scm_call_1(scm_c_public_ref("xapian error", "raise-xapian-exception"), + scm_from_stringn(msg.c_str(), + msg.length(), + "us-ascii", + SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)); +} +%} + +%exception { + try { + $function + } catch (...) { + handle_exception(); + } +} @@ -1,37 +1 @@ -;;; guile-xapian --- Guile bindings for Xapian -;;; Copyright © 2021 Arun Isaac <arunisaac@systemreboot.net> -;;; -;;; This file is part of guile-xapian. -;;; -;;; guile-xapian is free software: you can redistribute it and/or -;;; modify it under the terms of the GNU General Public License as -;;; published by the Free Software Foundation, either version 2 of the -;;; License, or (at your option) any later version. -;;; -;;; guile-xapian is distributed in the hope that it will be useful, -;;; but WITHOUT ANY WARRANTY; without even the implied warranty of -;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -;;; General Public License for more details. -;;; -;;; You should have received a copy of the GNU General Public License -;;; along with guile-xapian. If not, see -;;; <https://www.gnu.org/licenses/>. - -;;; Run the following command to enter a development environment for -;;; guile-xapian: -;;; -;;; $ guix shell -Df guix.scm - -(use-modules (guix gexp) - (guix git-download) - (guix packages) - (gnu packages autotools) - (gnu packages guile-xyz)) - -(define %source-dir (dirname (current-filename))) - -(package - (inherit guile-xapian) - (source (local-file %source-dir - #:recursive? #t - #:select? (git-predicate %source-dir)))) +.guix/guile-xapian-package.scm
\ No newline at end of file diff --git a/xapian.i.in b/xapian.i.in index f1ffe07..ff4ce80 100644 --- a/xapian.i.in +++ b/xapian.i.in @@ -1,5 +1,5 @@ /* guile-xapian --- Guile bindings for Xapian - * Copyright © 2020 Arun Isaac <arunisaac@systemreboot.net> + * Copyright © 2020, 2023 Arun Isaac <arunisaac@systemreboot.net> * Copyright © 2021, 2022 Bob131 <bob@bob131.so> * * This file is part of guile-xapian. @@ -89,6 +89,7 @@ } %include xapian-head.i +%include except.i %include xapian-headers.i %extend Xapian::ValueIterator { diff --git a/xapian/error.scm b/xapian/error.scm new file mode 100644 index 0000000..4187dda --- /dev/null +++ b/xapian/error.scm @@ -0,0 +1,30 @@ +;;; guile-xapian --- Guile bindings for Xapian +;;; Copyright © 2023 Arun Isaac <arunisaac@systemreboot.net> +;;; +;;; This file is part of guile-xapian. +;;; +;;; guile-xapian is free software: you can redistribute it and/or +;;; modify it under the terms of the GNU General Public License as +;;; published by the Free Software Foundation, either version 2 of the +;;; License, or (at your option) any later version. +;;; +;;; guile-xapian is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with guile-xapian. If not, see +;;; <https://www.gnu.org/licenses/>. + +(define-module (xapian error) + #:use-module (rnrs conditions) + #:use-module (rnrs exceptions) + #:export (raise-xapian-exception)) + +(define-condition-type &xapian-error &error + make-xapian-error xapian-error? + (message xapian-error-message)) + +(define (raise-xapian-exception message) + (raise (make-xapian-error message))) diff --git a/xapian/xapian.scm b/xapian/xapian.scm index 75924ae..80b4b9b 100644 --- a/xapian/xapian.scm +++ b/xapian/xapian.scm @@ -22,7 +22,9 @@ #:use-module (rnrs arithmetic bitwise) #:use-module (rnrs bytevectors) #:use-module (ice-9 match) + #:use-module (srfi srfi-1) #:use-module (srfi srfi-26) + #:use-module (htmlprag) #:use-module (xapian wrap) #:export (xapian-open xapian-close @@ -48,6 +50,9 @@ index-text! increase-termpos! parse-query + query-and + query-or + query-filter enquire enquire-mset mset-item-docid @@ -55,7 +60,8 @@ mset-item-rank mset-item-weight mset-fold - mset-snippet)) + mset-snippet + mset-sxml-snippet)) (define xapian-open new-Database) (define xapian-close delete-Database) @@ -142,9 +148,15 @@ bytevector." (TermGenerator-set-document term-generator document) term-generator)) -(define* (index-text! term-generator text #:key (wdf-increment 1) prefix) - (apply TermGenerator-index-text term-generator text wdf-increment - (if prefix (list prefix) '()))) +(define* (index-text! term-generator text #:key (wdf-increment 1) (prefix "") (positions? #t)) + "Index @var{text} using @var{term-generator}. @var{wdf-increment} is +the within document frequency increment. @var{prefix} is the term +prefix to use. If @var{positions?} is #f, position information is not +generated." + ((if positions? + TermGenerator-index-text + TermGenerator-index-text-without-positions) + term-generator text wdf-increment prefix)) (define increase-termpos! TermGenerator-increase-termpos) @@ -194,6 +206,34 @@ on the database object." (MSetIterator-next head) (loop head result)))))) +(define (query-combine combine-operator default . queries) + (reduce (cut new-Query combine-operator <> <>) + default + queries)) + +(define (query-and . queries) + "Return a query matching only documents matching all @var{queries}. + +In a weighted context, the weight is the sum of the weights for all +queries." + (apply query-combine (Query-OP-AND) (Query-MatchAll) queries)) + +(define (query-or . queries) + "Return a query matching documents which at least one of @var{queries} +match. + +In a weighted context, the weight is the sum of the weights for +matching queries." + (apply query-combine (Query-OP-OR) (Query-MatchNothing) queries)) + +(define (query-filter . queries) + "Return a query matching only documents matching all @var{queries}, +but only take weight from the first of @var{queries}. + +In a non-weighted context, @code{query-filter} and @code{query-and} +are equivalent." + (apply query-combine (Query-OP-FILTER) (Query-MatchAll) queries)) + (define (get-flag flag-thunk value) (if value (flag-thunk) 0)) @@ -204,9 +244,70 @@ on the database object." (background-model? #t) (exhaustive? #t) (empty-without-match? #t) (cjk-ngram? #t)) + "Generate a snippet from @var{text}. @var{mset} is the xapian +@code{MSet} object representing a list of search results. + +@var{length} is the number of bytes of @var{text} to aim to select. + +The same stemmer used to build the query should be specified as +@var{stemmer}. + +@var{highlight-start} and @var{highlight-end} are inserted +respectively before and after the highlit terms. + +If the chosen snippet seems to start or end mid-sentence, then +@var{omit} is prepended or appended to indicate this. + +If @var{background-model?} is @code{#true}, the relevance of non-query +terms are modelled to prefer snippets containing a more interesting +background. + +If @var{exhaustive?} is @code{#true}, exhaustively evaluate candidate +snippets. Else, snippet generation will stop once a @emph{good enough} +snippet has been found. + +If @var{empty-without-match?} is @code{#true}, return the empty string +if not a single term was found in @var{text}. Else, return a substring +of text without any highlit terms. + +If @var{cjk-ngram?} is @code{#true}, enable generation of n-grams from +CJK text. + +See @code{MSet::snippet} in @file{xapian/mset.h} of the xapian source +for more details." (MSet-snippet mset text length stemmer (bitwise-ior (get-flag MSet-SNIPPET-BACKGROUND-MODEL background-model?) (get-flag MSet-SNIPPET-EXHAUSTIVE exhaustive?) (get-flag MSet-SNIPPET-EMPTY-WITHOUT-MATCH empty-without-match?) (get-flag MSet-SNIPPET-CJK-NGRAM cjk-ngram?)) highlight-start highlight-end omit)) + +(define* (mset-sxml-snippet mset text #:key (length 500) (stemmer (make-stem "none")) + (highlight-proc (lambda (text) + `(b ,text))) + (omit "...") (background-model? #t) (exhaustive? #t) + (empty-without-match? #t) (cjk-ngram? #t)) + "Generate a snippet in SXML form from @var{text}. + +@var{highlight-proc} is a function that is passed the highlit text. It +should return a SXML tree highlighting that text. + +Arguments @var{mset}, @var{length}, @var{stemmer}, @var{omit}, +@var{background-model?}, @var{exhaustive?}, @var{empty-without-match?} +and @var{cjk-ngram?} are the same as in the @code{mset-snippet} +function." + ;; mset-snippet returns serialized HTML. So, we reverse it with + ;; html->sxml. + (match (html->sxml (mset-snippet mset text)) + (('*TOP* children ...) + (append-map (match-lambda + ;; Apply highlight-proc if highlit text. + (('b text) + (list (highlight-proc text))) + ;; Add (br) if end of line. + ((? (cut string-suffix? "\n" <>) text) + (list (string-trim-right text #\newline) + '(br))) + ;; Else, return verbatim. + (text (list text))) + children)))) |