From e0348f347c1c95c0cb527cd2389a107cd7305ef6 Mon Sep 17 00:00:00 2001 From: Arun Isaac Date: Mon, 27 Jun 2022 18:09:16 +0530 Subject: Migrate to GOOPS. * tissue/document.scm: Do not import (srfi srfi-9). Import (srfi srfi-19), (ice-9 match) and (oop goops). (): Delete type. (, ): New classes. (date->alist, alist->date, object->scm, scm->object): New functions. (document->alist, alist->document, print-document): Delete functions. (document-term-generator, document-type, document-id-term, document-text, print): New generic methods. (read-gemtext-document): Return object. (index-document): Delete function. * tissue/issue.scm: Do not import (srfi srfi-9) and (srfi srfi-19). Import (oop goops) and (tissue document). (date->iso-8601, iso-8601->date): Move to tissue/document.scm. (, ): Re-implement as class. (issue->alist, post->alist, alist->issue, alist->post, index-issue): Delete functions. (print-issue): Rename to print, a generic method. (print): Use document-title and file-document-path instead of issue-title and issue-file respectively. Accept mset argument. (print-issue-to-gemtext): Use document-title instead of issue-title. (read-gemtext-issue): Return a object. (document-term-generator): New generic methods. * bin/tissue: Import (tissue document) without a prefix. (print-document, alist->document, document->text, index-document): Delete functions. (tissue-search): Use the print generic function. (main): Use the document-type, document-id-term, document-term-generator generic functions and replace-document! instead of index-document. * tissue/conditions.scm (&unknown-document-type-violation): Delete condition. --- tissue/document.scm | 189 ++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 139 insertions(+), 50 deletions(-) (limited to 'tissue/document.scm') diff --git a/tissue/document.scm b/tissue/document.scm index 202ea35..c70b18e 100644 --- a/tissue/document.scm +++ b/tissue/document.scm @@ -19,52 +19,154 @@ (define-module (tissue document) #:use-module (rnrs hashtables) #:use-module (rnrs io ports) - #:use-module (srfi srfi-9) + #:use-module (srfi srfi-19) #:use-module (srfi srfi-26) #:use-module (srfi srfi-171) + #:use-module (ice-9 match) + #:use-module (oop goops) #:use-module (term ansi-color) #:use-module (xapian xapian) #:use-module (tissue utils) - #:export (document - document? - document-file + #:export (object->scm + scm->object + document-title - document->alist - alist->document - print-document - read-gemtext-document - index-document)) - -(define-record-type - (document file title) - document? - (file document-file) - (title document-title)) - -(define (document->alist document) - "Convert DOCUMENT, a object, to an association list that -can be serialized." - `((type . document) - (file . ,(document-file document)) - (title . ,(document-title document)))) - -(define (alist->document alist) - "Convert ALIST to a object." - (document (assq-ref alist 'file) - (assq-ref alist 'title))) - -(define (print-document document) - "Print DOCUMENT, a object, in search results." + document-type + document-id-term + document-text + document-term-generator + print + + file-document-path + read-gemtext-document)) + +(define (date->iso-8601 date) + "Convert DATE, an SRFI-19 date object, to an ISO-8601 date string." + (date->string date "~4")) + +(define (iso-8601->date str) + "Convert STR, an ISO-8601 date string, to an SRFI-19 date object." + (string->date str "~Y-~m-~dT~H:~M:~S~z")) + +(define (date->alist date) + "Convert DATE, an SRFI-19 date object, to an association list." + `((type . ) + (iso-8601 . ,(date->iso-8601 date)))) + +(define (alist->date alist) + "Convert an association list to an SRFI-19 date object." + (iso-8601->date (assq-ref alist 'iso-8601))) + +(define (object->scm object) + "Convert GOOPS OBJECT to a serializable object." + (cond + ((or (string? object) + (number? object) + (boolean? object)) + object) + ((date? object) + (date->alist object)) + ((list? object) + (list->vector (map object->scm object))) + (else + (cons (cons 'type (class-name (class-of object))) + (map (lambda (slot) + (let* ((slot-name (slot-definition-name slot)) + (value (if (slot-bound? object slot-name) + (slot-ref object slot-name) + (goops-error "Unbound slot ~s in ~s" slot-name object)))) + (cons slot-name (object->scm value)))) + (class-slots (class-of object))))))) + +(define (scm->object scm) + "Convert serializable object SCM to a GOOPS object." + (cond + ((or (string? scm) + (number? scm) + (boolean? scm)) + scm) + ((vector? scm) + (map scm->object (vector->list scm))) + ;; Association list encoding date + ((eq? (assq-ref scm 'type) + ') + (alist->date scm)) + ;; Association list encoding arbitrary object + (else + (let* ((class (module-ref (current-module) + (assq-ref scm 'type))) + (object (make class))) + (for-each (match-lambda + ((slot-name . value) + (unless (eq? slot-name 'type) + (slot-set! object slot-name (scm->object value))))) + scm) + object)))) + +(define-class () + (title #:accessor document-title #:init-keyword #:title)) + +(define-method (document-type (document )) + "document") + +(define-method (document-term-generator (document )) + "Return a term generator for DOCUMENT. The returned term generator has +indexed the type and text of the document. If further free text is to +be indexed, to prevent phrase searches from spanning between this text +and further text, increase-termpos! must be called before indexing." + (let ((term-generator + (make-term-generator + #:stem (make-stem "en") + #:document (make-document + #:data (call-with-output-string + (cut write (object->scm document) <>)) + #:terms `((,(document-id-term document) . 0)))))) + (index-text! term-generator (document-title document) #:prefix "S") + (index-text! term-generator (document-text document)) + term-generator)) + +(define-class () + (path #:accessor file-document-path #:init-keyword #:path)) + +(define-method (document-id-term (document )) + "Return the ID term for DOCUMENT." + (string-append "Q" (file-document-path document))) + +(define-method (document-text (document )) + "Return the full text of DOCUMENT." + (call-with-input-file (file-document-path document) + get-string-all)) + +(define-method (document-term-generator (document )) + "Return a term generator indexing DOCUMENT." + (let ((term-generator (next-method))) + (increase-termpos! term-generator) + (index-text! term-generator (file-document-path document)) + term-generator)) + +(define-method (print (document ) mset) + "Print DOCUMENT in command-line search results. MSET is the xapian +MSet object representing a list of search results." (display (colorize-string (document-title document) 'MAGENTA 'UNDERLINE)) (newline) - (display (colorize-string (document-file document) 'YELLOW)) + (display (colorize-string (file-document-path document) 'YELLOW)) + (newline) (newline) - (newline)) + (let ((snippet (mset-snippet mset + (document-text document) + #:length 200 + #:highlight-start (color 'BOLD 'ON-RED) + #:highlight-end (color 'RESET) + #:stemmer (make-stem "en")))) + (unless (string-null? snippet) + (display snippet) + (newline) + (newline)))) (define (read-gemtext-document file) - "Reade gemtext document from FILE. Return a object." - (document file - (or (call-with-input-file file + "Reade gemtext document from FILE. Return a object." + (make + #:title (or (call-with-input-file file (lambda (port) (port-transduce (tfilter-map (lambda (line) ;; The first level one @@ -75,18 +177,5 @@ can be serialized." get-line-dos-or-unix port))) ;; Fallback to filename if document has no title. - file))) - -(define (index-document db document) - "Index DOCUMENT, a object, in writable xapian DB." - (let* ((idterm (string-append "Q" (document-file document))) - (body (call-with-input-file (document-file document) - get-string-all)) - (doc (make-document #:data (call-with-output-string - (cut write (document->alist document) <>)) - #:terms `((,idterm . 0)))) - (term-generator (make-term-generator #:stem (make-stem "en") - #:document doc))) - (index-text! term-generator "document" #:prefix "XT") - (index-text! term-generator body) - (replace-document! db idterm doc))) + file) + #:path file)) -- cgit v1.2.3