diff options
author | Arun Isaac | 2022-06-27 18:09:16 +0530 |
---|---|---|
committer | Arun Isaac | 2022-06-28 10:05:01 +0530 |
commit | e0348f347c1c95c0cb527cd2389a107cd7305ef6 (patch) | |
tree | 95ac15dac365ef4d93ceda65d2642f0708291f18 /tissue/document.scm | |
parent | 712bada146097dc9edd032f5810b753e1fea97a0 (diff) | |
download | tissue-e0348f347c1c95c0cb527cd2389a107cd7305ef6.tar.gz tissue-e0348f347c1c95c0cb527cd2389a107cd7305ef6.tar.lz tissue-e0348f347c1c95c0cb527cd2389a107cd7305ef6.zip |
Migrate to GOOPS.
* tissue/document.scm: Do not import (srfi srfi-9). Import (srfi
srfi-19), (ice-9 match) and (oop goops).
(<document>): Delete type.
(<document>, <file-document>): New classes.
(date->alist, alist->date, object->scm, scm->object): New functions.
(document->alist, alist->document, print-document): Delete functions.
(document-term-generator, document-type, document-id-term,
document-text, print): New generic methods.
(read-gemtext-document): Return <file-document> object.
(index-document): Delete function.
* tissue/issue.scm: Do not import (srfi srfi-9) and (srfi
srfi-19). Import (oop goops) and (tissue document).
(date->iso-8601, iso-8601->date): Move to tissue/document.scm.
(<issue>, <post>): Re-implement as class.
(issue->alist, post->alist, alist->issue, alist->post, index-issue):
Delete functions.
(print-issue): Rename to print, a generic method.
(print): Use document-title and file-document-path instead of
issue-title and issue-file respectively. Accept mset argument.
(print-issue-to-gemtext): Use document-title instead of issue-title.
(read-gemtext-issue): Return a <issue> object.
(document-term-generator): New generic methods.
* bin/tissue: Import (tissue document) without a prefix.
(print-document, alist->document, document->text, index-document):
Delete functions.
(tissue-search): Use the print generic function.
(main): Use the document-type, document-id-term,
document-term-generator generic functions and replace-document!
instead of index-document.
* tissue/conditions.scm (&unknown-document-type-violation): Delete
condition.
Diffstat (limited to 'tissue/document.scm')
-rw-r--r-- | tissue/document.scm | 189 |
1 files changed, 139 insertions, 50 deletions
diff --git a/tissue/document.scm b/tissue/document.scm index 202ea35..c70b18e 100644 --- a/tissue/document.scm +++ b/tissue/document.scm @@ -19,52 +19,154 @@ (define-module (tissue document) #:use-module (rnrs hashtables) #:use-module (rnrs io ports) - #:use-module (srfi srfi-9) + #:use-module (srfi srfi-19) #:use-module (srfi srfi-26) #:use-module (srfi srfi-171) + #:use-module (ice-9 match) + #:use-module (oop goops) #:use-module (term ansi-color) #:use-module (xapian xapian) #:use-module (tissue utils) - #:export (document - document? - document-file + #:export (object->scm + scm->object + <document> document-title - document->alist - alist->document - print-document - read-gemtext-document - index-document)) - -(define-record-type <document> - (document file title) - document? - (file document-file) - (title document-title)) - -(define (document->alist document) - "Convert DOCUMENT, a <document> object, to an association list that -can be serialized." - `((type . document) - (file . ,(document-file document)) - (title . ,(document-title document)))) - -(define (alist->document alist) - "Convert ALIST to a <document> object." - (document (assq-ref alist 'file) - (assq-ref alist 'title))) - -(define (print-document document) - "Print DOCUMENT, a <document> object, in search results." + document-type + document-id-term + document-text + document-term-generator + print + <file-document> + file-document-path + read-gemtext-document)) + +(define (date->iso-8601 date) + "Convert DATE, an SRFI-19 date object, to an ISO-8601 date string." + (date->string date "~4")) + +(define (iso-8601->date str) + "Convert STR, an ISO-8601 date string, to an SRFI-19 date object." + (string->date str "~Y-~m-~dT~H:~M:~S~z")) + +(define (date->alist date) + "Convert DATE, an SRFI-19 date object, to an association list." + `((type . <date>) + (iso-8601 . ,(date->iso-8601 date)))) + +(define (alist->date alist) + "Convert an association list to an SRFI-19 date object." + (iso-8601->date (assq-ref alist 'iso-8601))) + +(define (object->scm object) + "Convert GOOPS OBJECT to a serializable object." + (cond + ((or (string? object) + (number? object) + (boolean? object)) + object) + ((date? object) + (date->alist object)) + ((list? object) + (list->vector (map object->scm object))) + (else + (cons (cons 'type (class-name (class-of object))) + (map (lambda (slot) + (let* ((slot-name (slot-definition-name slot)) + (value (if (slot-bound? object slot-name) + (slot-ref object slot-name) + (goops-error "Unbound slot ~s in ~s" slot-name object)))) + (cons slot-name (object->scm value)))) + (class-slots (class-of object))))))) + +(define (scm->object scm) + "Convert serializable object SCM to a GOOPS object." + (cond + ((or (string? scm) + (number? scm) + (boolean? scm)) + scm) + ((vector? scm) + (map scm->object (vector->list scm))) + ;; Association list encoding date + ((eq? (assq-ref scm 'type) + '<date>) + (alist->date scm)) + ;; Association list encoding arbitrary object + (else + (let* ((class (module-ref (current-module) + (assq-ref scm 'type))) + (object (make class))) + (for-each (match-lambda + ((slot-name . value) + (unless (eq? slot-name 'type) + (slot-set! object slot-name (scm->object value))))) + scm) + object)))) + +(define-class <document> () + (title #:accessor document-title #:init-keyword #:title)) + +(define-method (document-type (document <document>)) + "document") + +(define-method (document-term-generator (document <document>)) + "Return a term generator for DOCUMENT. The returned term generator has +indexed the type and text of the document. If further free text is to +be indexed, to prevent phrase searches from spanning between this text +and further text, increase-termpos! must be called before indexing." + (let ((term-generator + (make-term-generator + #:stem (make-stem "en") + #:document (make-document + #:data (call-with-output-string + (cut write (object->scm document) <>)) + #:terms `((,(document-id-term document) . 0)))))) + (index-text! term-generator (document-title document) #:prefix "S") + (index-text! term-generator (document-text document)) + term-generator)) + +(define-class <file-document> (<document>) + (path #:accessor file-document-path #:init-keyword #:path)) + +(define-method (document-id-term (document <file-document>)) + "Return the ID term for DOCUMENT." + (string-append "Q" (file-document-path document))) + +(define-method (document-text (document <file-document>)) + "Return the full text of DOCUMENT." + (call-with-input-file (file-document-path document) + get-string-all)) + +(define-method (document-term-generator (document <file-document>)) + "Return a term generator indexing DOCUMENT." + (let ((term-generator (next-method))) + (increase-termpos! term-generator) + (index-text! term-generator (file-document-path document)) + term-generator)) + +(define-method (print (document <file-document>) mset) + "Print DOCUMENT in command-line search results. MSET is the xapian +MSet object representing a list of search results." (display (colorize-string (document-title document) 'MAGENTA 'UNDERLINE)) (newline) - (display (colorize-string (document-file document) 'YELLOW)) + (display (colorize-string (file-document-path document) 'YELLOW)) + (newline) (newline) - (newline)) + (let ((snippet (mset-snippet mset + (document-text document) + #:length 200 + #:highlight-start (color 'BOLD 'ON-RED) + #:highlight-end (color 'RESET) + #:stemmer (make-stem "en")))) + (unless (string-null? snippet) + (display snippet) + (newline) + (newline)))) (define (read-gemtext-document file) - "Reade gemtext document from FILE. Return a <document> object." - (document file - (or (call-with-input-file file + "Reade gemtext document from FILE. Return a <file-document> object." + (make <file-document> + #:title (or (call-with-input-file file (lambda (port) (port-transduce (tfilter-map (lambda (line) ;; The first level one @@ -75,18 +177,5 @@ can be serialized." get-line-dos-or-unix port))) ;; Fallback to filename if document has no title. - file))) - -(define (index-document db document) - "Index DOCUMENT, a <document> object, in writable xapian DB." - (let* ((idterm (string-append "Q" (document-file document))) - (body (call-with-input-file (document-file document) - get-string-all)) - (doc (make-document #:data (call-with-output-string - (cut write (document->alist document) <>)) - #:terms `((,idterm . 0)))) - (term-generator (make-term-generator #:stem (make-stem "en") - #:document doc))) - (index-text! term-generator "document" #:prefix "XT") - (index-text! term-generator body) - (replace-document! db idterm doc))) + file) + #:path file)) |