diff options
authorArun Isaac2023-01-29 16:23:45 +0000
committerArun Isaac2023-01-29 16:46:38 +0000
commit8516e5d0f5b64c681d31efa2944bb9a9de32dbbc (patch)
parentd5adbcf983bb2d83fd46041ad3226dd8912266a6 (diff)
document: Inter snippet source text into the xapian index.
We store snippet source text in a slot of the <document> class thus interring into the xapian index. This allows us to render search snippets using only the xapian index without referring back to the git repository. * tissue/document.scm (<document>)[snippet-source-text]: New slot. * tissue/document.scm (document-snippet-source-text): Delete method. (document-html-snippet): Remove blank lines from snippet source text before generating a snippet. * tissue/commit.scm (document-snippet-source-text): Delete method. (repository-commits): Initialize snippet-source-text. * tissue/skribilo.scm (fragment-text): New function. (document-fragment): Initialize snippet-source-text. (document-text): Use fragment-text. (document-snippet-source-text): Delete method. * tissue/file-document.scm (file-text): New function. (document-text): Use file-text. (read-gemtext-document): Initialize snippet-source-text. * tissue/issue.scm (read-gemtext-issue): Initialize snippet-source-text. * issues/skribilo-fragment-snippets-need-code-from-repo.gmi: Close issue.
6 files changed, 47 insertions, 40 deletions
diff --git a/issues/skribilo-fragment-snippets-need-code-from-repo.gmi b/issues/skribilo-fragment-snippets-need-code-from-repo.gmi
index 0b0fdf6..74109ed 100644
--- a/issues/skribilo-fragment-snippets-need-code-from-repo.gmi
+++ b/issues/skribilo-fragment-snippets-need-code-from-repo.gmi
@@ -11,3 +11,9 @@ Also, evaluating an entire skribilo document on every search query may be costly
Therefore, it might be worthwhile to inter snippet source texts into the xapian index itself—specifically in the document data field. This will of course increase the size of the xapian index considerably. But, storage is cheap, and there does not seem to be any more elegant way out.
Until this issue is fixed, we have temporarily disabled snippets for skribilo fragments.
+## Resolution
+Search snippet source texts are now interred into the xapian index.
+* closed
diff --git a/tissue/commit.scm b/tissue/commit.scm
index c151d3a..b910695 100644
--- a/tissue/commit.scm
+++ b/tissue/commit.scm
@@ -51,15 +51,6 @@
"Return a date representing the recency of COMMIT."
(doc:commit-author-date commit))
-(define-method (document-snippet-source-text (commit <commit>))
- "Return the source text for COMMIT from which to extract a search
-result snippet."
- ;; The snippet source text excludes the first paragraph (i.e., the
- ;; summary line) of the commit. Hence, we use commit-body.
- (commit-body
- (commit-lookup (current-git-repository)
- (string->oid (commit-hash commit)))))
(define-method (document-text (commit <commit>))
"Return the full text of COMMIT."
@@ -101,7 +92,14 @@ REPOSITORY."
#:hash (oid->string (commit-id commit))
#:author (resolve-alias (signature-name (commit-author commit))
- #:author-date (commit-author-date commit))
+ #:author-date (commit-author-date commit)
+ ;; The snippet source text excludes the
+ ;; first paragraph (i.e., the summary line)
+ ;; of the commit. Hence, we use commit-body.
+ #:snippet-source-text
+ (commit-body
+ (commit-lookup (current-git-repository)
+ (commit-id commit))))
diff --git a/tissue/document.scm b/tissue/document.scm
index c05e40f..1e55e67 100644
--- a/tissue/document.scm
+++ b/tissue/document.scm
@@ -133,7 +133,9 @@ mutate @var{object}."
(define-class <document> ()
(title #:accessor document-title #:init-keyword #:title)
- (web-uri #:accessor document-web-uri #:init-keyword #:web-uri))
+ (web-uri #:accessor document-web-uri #:init-keyword #:web-uri)
+ (snippet-source-text #:accessor document-snippet-source-text
+ #:init-keyword #:snippet-source-text))
(define-generic document-id-term)
(define-generic document-text)
@@ -173,21 +175,17 @@ and further text, increase-termpos! must be called before indexing."
(index-text! term-generator (document-text document))
-(define-method (document-snippet-source-text (document <document>))
- "Return the source text for DOCUMENT from which to extract a search
-result snippet."
- ;; Remove blank lines from document text.
- (string-join
- (remove string-blank?
- (string-split (document-text document)
- #\newline))
- "\n"))
(define (document-html-snippet document mset)
"Return snippet for DOCUMENT. MSET is the xapian MSet object
representing a list of search results."
(mset-snippet mset
- (document-snippet-source-text document)
+ ;; Remove blank lines from text.
+ (string-join
+ (remove string-blank?
+ (string-split
+ (document-snippet-source-text document)
+ #\newline))
+ "\n")
#:length 200
#:highlight-start "<b>"
#:highlight-end "</b>"
diff --git a/tissue/file-document.scm b/tissue/file-document.scm
index 8de9645..ccd6a48 100644
--- a/tissue/file-document.scm
+++ b/tissue/file-document.scm
@@ -69,10 +69,14 @@
"Return a date representing the recency of DOCUMENT."
(file-document-last-updated-date document))
+(define (file-text file)
+ "Return the contents of text @var{file}."
+ (call-with-file-in-git (current-git-repository) file
+ get-string-all))
(define-method (document-text (document <file-document>))
"Return the full text of DOCUMENT."
- (call-with-file-in-git (current-git-repository) (file-document-path document)
- get-string-all))
+ (file-text (file-document-path document)))
(define-method (document-term-generator (document <file-document>))
"Return a term generator indexing DOCUMENT."
@@ -148,4 +152,5 @@ MSet object representing a list of search results."
;; Fallback to filename if document has no title.
#:path file
- #:commits (commits-affecting-file file)))
+ #:commits (commits-affecting-file file)
+ #:snippet-source-text (file-text file)))
diff --git a/tissue/issue.scm b/tissue/issue.scm
index 529fcfb..ebfca52 100644
--- a/tissue/issue.scm
+++ b/tissue/issue.scm
@@ -295,4 +295,5 @@ object."
#:open? (not (member "closed" all-keywords))
#:tasks (hashtable-ref file-details 'tasks 0)
#:completed-tasks (hashtable-ref file-details 'completed-tasks 0)
- #:commits (file-document-commits file-document))))
+ #:commits (file-document-commits file-document)
+ #:snippet-source-text (document-snippet-source-text file-document))))
diff --git a/tissue/skribilo.scm b/tissue/skribilo.scm
index d0c90ba..b17b30d 100644
--- a/tissue/skribilo.scm
+++ b/tissue/skribilo.scm
@@ -47,6 +47,14 @@
(call-with-input-file file
(cut evaluate-ast-from-port <> #:reader (make-reader reader-name)))))
+(define (fragment-text file identifier reader-name)
+ "Return the full text of skribilo fragment in @var{file} identified by
+@var{identifier} using reader named by @var{reader-name}."
+ (call-with-output-string
+ (cut ast->text
+ (document-node file identifier reader-name)
+ <>)))
(define* (document-fragment file identifier #:key (reader-name 'skribe))
"Return a @code{<skribilo-fragment>} object describing node identified
by @var{identifier} in @var{file} read using reader named by
@@ -58,7 +66,8 @@ by @var{identifier} in @var{file} read using reader named by
#:path file
#:commits (commits-affecting-file file)
#:identifier identifier
- #:reader-name reader-name))
+ #:reader-name reader-name
+ #:snippet-source-text (fragment-text file identifier reader-name)))
(define-method (document-id-term (fragment <skribilo-fragment>))
"Return the ID term for skribilo @var{fragment}."
@@ -89,16 +98,6 @@ output to @var{port}."
(define-method (document-text (fragment <skribilo-fragment>))
"Return the full text of skribilo @var{fragment}."
- (call-with-output-string
- (cut ast->text
- (document-node (file-document-path fragment)
- (skribilo-fragment-identifier fragment)
- (skribilo-fragment-reader-name fragment))
- <>)))
-;; We temporarily disable snippets for skribilo fragments until
-;; issues/skribilo-fragment-snippets-need-code-from-repo.gmi is fixed.
-(define-method (document-snippet-source-text (fragment <skribilo-fragment>))
- "Return the source text for FRAGMENT from which to extract a search
-result snippet."
- "")
+ (fragment-text (file-document-path fragment)
+ (skribilo-fragment-identifier fragment)
+ (skribilo-fragment-reader-name fragment)))