diff options
author | Ludovic Court`es | 2007-03-08 18:16:51 +0000 |
---|---|---|
committer | Ludovic Court`es | 2007-03-08 18:16:51 +0000 |
commit | 9c1703c9e6cab30e9f47f41a442083edebbed078 (patch) | |
tree | 41d0f97eab99b681405cca92f4b1a82c3e3cf352 /src | |
parent | 00b478ebf9581b0b36b7d6a6ed67d7c910583a2a (diff) | |
parent | a456e009f1cb0e05d74adac3ad8b3092a280b054 (diff) | |
download | skribilo-9c1703c9e6cab30e9f47f41a442083edebbed078.tar.gz skribilo-9c1703c9e6cab30e9f47f41a442083edebbed078.tar.lz skribilo-9c1703c9e6cab30e9f47f41a442083edebbed078.zip |
Added an experimental `diff' package.
* src/guile/Makefile.am (dist_guilemodule_DATA): Added `differ.scm'.
* src/guile/skribilo/package/Makefile.am (dist_guilemodule_DATA): Added
`diff.scm'.
git-archimport-id: skribilo@sv.gnu.org--2006/skribilo--devo--1.2--patch-63
Diffstat (limited to 'src')
-rw-r--r-- | src/guile/Makefile.am | 2 | ||||
-rw-r--r-- | src/guile/differ.scm | 443 | ||||
-rw-r--r-- | src/guile/skribilo/package/Makefile.am | 2 | ||||
-rw-r--r-- | src/guile/skribilo/package/diff.scm | 304 |
4 files changed, 749 insertions, 2 deletions
diff --git a/src/guile/Makefile.am b/src/guile/Makefile.am index e410a87..6a27133 100644 --- a/src/guile/Makefile.am +++ b/src/guile/Makefile.am @@ -1,5 +1,5 @@ SUBDIRS = skribilo guilemoduledir = $(GUILE_SITE) -dist_guilemodule_DATA = skribilo.scm +dist_guilemodule_DATA = skribilo.scm differ.scm EXTRA_DIST = README diff --git a/src/guile/differ.scm b/src/guile/differ.scm new file mode 100644 index 0000000..f8ff18b --- /dev/null +++ b/src/guile/differ.scm @@ -0,0 +1,443 @@ +;;; "differ.scm" O(NP) Sequence Comparison Algorithm. +;;; Copyright (C) 2001, 2002, 2003, 2004 Aubrey Jaffer + +;;; +;;; Taken from Slib 3a2 and adapated by Ludovic Courtès +;;; <ludovic.courtes@laas.fr> to use Guile's native API (March 2007). +;;; + +(define-module (differ) + :export (diff:longest-common-subsequence diff:edits + diff:edit-length)) + +;Permission to copy this software, to modify it, to redistribute it, +;to distribute modified versions, and to use it for any purpose is +;granted, subject to the following restrictions and understandings. +; +;1. Any copy made of this software must include this copyright notice +;in full. +; +;2. I have made no warranty or representation that the operation of +;this software will be error-free, and I am under no obligation to +;provide any services, by way of maintenance, update, or otherwise. +; +;3. In conjunction with products arising from the use of this +;material, there shall be no use of my name in any advertising, +;promotional, or sales literature without prior written consent in +;each case. + +;;@noindent +;;@code{diff:edit-length} implements the algorithm: +;; +;;@ifinfo +;;@example +;;S. Wu, E. Myers, U. Manber, and W. Miller, +;; "An O(NP) Sequence Comparison Algorithm," +;; Information Processing Letters 35, 6 (1990), 317-323. +;; @url{http://www.cs.arizona.edu/people/gene/PAPERS/np_diff.ps} +;;@end example +;;@end ifinfo +;;@ifset html +;;S. Wu, <A HREF="http://www.cs.arizona.edu/people/gene/vita.html"> +;;E. Myers,</A> U. Manber, and W. Miller, +;;<A HREF="http://www.cs.arizona.edu/people/gene/PAPERS/np_diff.ps"> +;;"An O(NP) Sequence Comparison Algorithm"</A>, +;;Information Processing Letters 35, 6 (1990), 317-323. +;;@end ifset +;; +;;@noindent +;;The values returned by @code{diff:edit-length} can be used to gauge +;;the degree of match between two sequences. +;; +;;@noindent +;;@code{diff:edits} and @code{diff:longest-common-subsequence} combine +;;the algorithm with the divide-and-conquer method outlined in: +;; +;;@ifinfo +;;@example +;;E. Myers and W. Miller, +;; "Optimal alignments in linear space", +;; Computer Application in the Biosciences (CABIOS), 4(1):11-17, 1988. +;; @url{http://www.cs.arizona.edu/people/gene/PAPERS/linear.ps} +;;@end example +;;@end ifinfo +;;@ifset html +;;<A HREF="http://www.cs.arizona.edu/people/gene/vita.html"> +;;E. Myers,</A> and W. Miller, +;;<A HREF="http://www.cs.arizona.edu/people/gene/PAPERS/linear.ps"> +;;"Optimal alignments in linear space"</A>, +;;Computer Application in the Biosciences (CABIOS), 4(1):11-17, 1988. +;;@end ifset +;; +;;@noindent +;;If the items being sequenced are text lines, then the computed +;;edit-list is equivalent to the output of the @dfn{diff} utility +;;program. If the items being sequenced are words, then it is like the +;;lesser known @dfn{spiff} program. + + +;;; p-lim is half the number of gratuitous edits for strings of given +;;; lengths. +;;; When passed #f CC, fp:compare returns edit-distance if successful; +;;; #f otherwise (p > p-lim). When passed CC, fp:compare returns #f. +(define (fp:compare fp fpoff CC A M B N p-lim) + (define Delta (- N M)) + ;;(if (negative? Delta) (slib:error 'fp:compare (fp:subarray A 0 M) '> (fp:subarray B 0 N))) + ;;(set! compares (+ 1 compares)) ;(print 'fp:compare M N p-lim) + (let loop ((p 0)) + (do ((k (- p) (+ 1 k))) + ((>= k Delta)) + (fp:run fp fpoff k A M B N CC p)) + (do ((k (+ Delta p) (+ -1 k))) + ((<= k Delta)) + (fp:run fp fpoff k A M B N CC p)) + (let ((fpval (fp:run fp fpoff Delta A M B N CC p))) + ;; At this point, the cost to (fpval-Delta, fpval) is Delta + 2*p + (cond ((and (not CC) (<= N fpval)) (+ Delta (* 2 p))) + ((and (not (negative? p-lim)) (>= p p-lim)) #f) + (else (loop (+ 1 p))))))) + +;;; Traces runs of matches until they end; then set fp[k]=y. +;;; If CC is supplied, set each CC[y] = min(CC[y], cost) for run. +;;; Returns furthest y reached. +(define (fp:run fp fpoff k A M B N CC p) + (define cost (+ k p p)) + (let snloop ((y (max (+ (array-ref fp (+ -1 k fpoff)) 1) + (array-ref fp (+ 1 k fpoff))))) + (define x (- y k)) + (and CC (<= y N) + (let ((xcst (- M x))) + (cond ((negative? xcst)) + (else (array-set! CC + (min (+ xcst cost) (array-ref CC y)) + y))))) + ;;(set! tick (+ 1 tick)) + (cond ((and (< x M) (< y N) + (eqv? (array-ref A x) (array-ref B y))) + (snloop (+ 1 y))) + (else (array-set! fp y (+ fpoff k)) + y)))) + +;;; Check that only 1 and -1 steps between adjacent CC entries. +;;(define (fp:step-check A M B N CC) +;; (do ((cdx (+ -1 N) (+ -1 cdx))) +;; ((negative? cdx)) +;; (case (- (array-ref CC cdx) (array-ref CC (+ 1 cdx))) +;; ((1 -1) #t) +;; (else (cond ((> 30 (car (array-dimensions CC))) +;; (display "A: ") (print A) +;; (display "B: ") (print B))) +;; (slib:warn +;; "CC" (append (list (max 0 (+ -5 cdx)) ': (min (+ 1 N) (+ 5 cdx)) +;; 'of) +;; (array-dimensions CC)) +;; (fp:subarray CC (max 0 (+ -5 cdx)) (min (+ 1 N) (+ 5 cdx)))))))) + +;;; Correct cost jumps left by fp:compare [which visits only a few (x,y)]. +;;(define (smooth-costs CC N) +;; (do ((cdx (+ -1 N) (+ -1 cdx))) ; smooth from end +;; ((negative? cdx)) +;; (array-set! CC (min (array-ref CC cdx) (+ 1 (array-ref CC (+ 1 cdx)))) +;; cdx)) +;; (do ((cdx 1 (+ 1 cdx))) ; smooth toward end +;; ((> cdx N)) +;; (array-set! CC (min (array-ref CC cdx) (+ 1 (array-ref CC (+ -1 cdx)))) +;; cdx)) +;; CC) + +(define (diff:mid-split N RR CC cost) + ;; RR is not longer than CC. So do for each element of RR. + (let loop ((cdx (+ 1 (quotient N 2))) + (rdx (quotient N 2))) + ;;(if (negative? rdx) (slib:error 'negative? 'rdx)) + (cond ((eqv? cost (+ (array-ref CC rdx) (array-ref RR (- N rdx)))) rdx) + ((eqv? cost (+ (array-ref CC cdx) (array-ref RR (- N cdx)))) cdx) + (else (loop (+ 1 cdx) (+ -1 rdx)))))) + +;;; Return 0-based shared array. +;;; Reverse RA if END < START. +(define (fp:subarray RA start end) + (define n-len (abs (- end start))) + (if (< end start) + (make-shared-array RA (lambda (idx) (list (+ -1 (- start idx)))) n-len) + (make-shared-array RA (lambda (idx) (list (+ start idx))) n-len))) + +(define (fp:init! fp fpoff fill mindx maxdx) + (define mlim (+ fpoff mindx)) + (do ((idx (+ fpoff maxdx) (+ -1 idx))) + ((< idx mlim)) + (array-set! fp fill idx))) + +;;; Split A[start-a..end-a] (shorter array) into smaller and smaller chunks. +;;; EDX is index into EDITS. +;;; EPO is insert/delete polarity (+1 or -1) +(define (diff:divide-and-conquer fp fpoff CCRR A start-a end-a B start-b end-b edits edx epo p-lim) + (define mid-a (quotient (+ start-a end-a) 2)) + (define len-b (- end-b start-b)) + (define len-a (- end-a start-a)) + (let ((tcst (+ p-lim p-lim (- len-b len-a)))) + (define CC (fp:subarray CCRR 0 (+ len-b 1))) + (define RR (fp:subarray CCRR (+ len-b 1) (* 2 (+ len-b 1)))) + (define M2 (- end-a mid-a)) + (define M1 (- mid-a start-a)) + (fp:init! CC 0 (+ len-a len-b) 0 len-b) + (fp:init! fp fpoff -1 (- (+ 1 p-lim)) (+ 1 p-lim (- len-b M1))) + (fp:compare fp fpoff CC + (fp:subarray A start-a mid-a) M1 + (fp:subarray B start-b end-b) len-b + (min p-lim len-a)) + (fp:init! RR 0 (+ len-a len-b) 0 len-b) + (fp:init! fp fpoff -1 (- (+ 1 p-lim)) (+ 1 p-lim (- len-b M2))) + (fp:compare fp fpoff RR + (fp:subarray A end-a mid-a) M2 + (fp:subarray B end-b start-b) len-b + (min p-lim len-a)) + ;;(smooth-costs CC len-b) (smooth-costs RR len-b) + (let ((b-splt (diff:mid-split len-b RR CC tcst))) + (define est-c (array-ref CC b-splt)) + (define est-r (array-ref RR (- len-b b-splt))) + ;;(set! splts (cons (/ b-splt (max .1 len-b)) splts)) + ;;(display "A: ") (array-for-each display (fp:subarray A start-a mid-a)) (display " + ") (array-for-each display (fp:subarray A mid-a end-a)) (newline) + ;;(display "B: ") (array-for-each display (fp:subarray B start-b end-b)) (newline) + ;;(print 'cc cc) (print 'rr (fp:subarray RR (+ 1 len-b) 0)) + ;;(print (make-string (+ 7 (* 2 b-splt)) #\-) '^ (list b-splt)) + (check-cost! 'CC est-c + (diff2et fp fpoff CCRR + A start-a mid-a + B start-b (+ start-b b-splt) + edits edx epo + (quotient (- est-c (- b-splt (- mid-a start-a))) + 2))) + (check-cost! 'RR est-r + (diff2et fp fpoff CCRR + A mid-a end-a + B (+ start-b b-splt) end-b + edits (+ est-c edx) epo + (quotient (- est-r (- (- len-b b-splt) + (- end-a mid-a))) + 2))) + (+ est-c est-r)))) + +;;; Trim; then diff sub-arrays; either one longer. Returns edit-length +(define (diff2et fp fpoff CCRR A start-a end-a B start-b end-b edits edx epo p-lim) + ;; (if (< (- end-a start-a) p-lim) (slib:warn 'diff2et 'len-a (- end-a start-a) 'len-b (- end-b start-b) 'p-lim p-lim)) + (do ((bdx (+ -1 end-b) (+ -1 bdx)) + (adx (+ -1 end-a) (+ -1 adx))) + ((not (and (<= start-b bdx) + (<= start-a adx) + (eqv? (array-ref A adx) (array-ref B bdx)))) + (do ((bsx start-b (+ 1 bsx)) + (asx start-a (+ 1 asx))) + ((not (and (< bsx bdx) + (< asx adx) + (eqv? (array-ref A asx) (array-ref B bsx)))) + ;;(print 'trim-et (- asx start-a) '+ (- end-a adx)) + (let ((delta (- (- bdx bsx) (- adx asx)))) + (if (negative? delta) + (diff2ez fp fpoff CCRR B bsx (+ 1 bdx) A asx (+ 1 adx) + edits edx (- epo) (+ delta p-lim)) + (diff2ez fp fpoff CCRR A asx (+ 1 adx) B bsx (+ 1 bdx) + edits edx epo p-lim)))) + ;;(set! tick (+ 1 tick)) + )) + ;;(set! tick (+ 1 tick)) + )) + +;;; Diff sub-arrays, A not longer than B. Returns edit-length +(define (diff2ez fp fpoff CCRR A start-a end-a B start-b end-b edits edx epo p-lim) + (define len-a (- end-a start-a)) + (define len-b (- end-b start-b)) + ;;(if (> len-a len-b) (slib:error 'diff2ez len-a '> len-b)) + (cond ((zero? p-lim) ; B inserts only + (if (= len-b len-a) + 0 ; A = B; no edits + (let loop ((adx start-a) + (bdx start-b) + (edx edx)) + (cond ((>= bdx end-b) (- len-b len-a)) + ((>= adx end-a) + (do ((idx bdx (+ 1 idx)) + (edx edx (+ 1 edx))) + ((>= idx end-b) (- len-b len-a)) + (array-set! edits (* epo (+ 1 idx)) edx))) + ((eqv? (array-ref A adx) (array-ref B bdx)) + ;;(set! tick (+ 1 tick)) + (loop (+ 1 adx) (+ 1 bdx) edx)) + (else (array-set! edits (* epo (+ 1 bdx)) edx) + ;;(set! tick (+ 1 tick)) + (loop adx (+ 1 bdx) (+ 1 edx))))))) + ((<= len-a p-lim) ; delete all A; insert all B + ;;(if (< len-a p-lim) (slib:error 'diff2ez len-a len-b 'p-lim p-lim)) + (do ((idx start-a (+ 1 idx)) + (jdx start-b (+ 1 jdx))) + ((and (>= idx end-a) (>= jdx end-b)) (+ len-a len-b)) + (cond ((< jdx end-b) + (array-set! edits (* epo (+ 1 jdx)) edx) + (set! edx (+ 1 edx)))) + (cond ((< idx end-a) + (array-set! edits (* epo (- -1 idx)) edx) + (set! edx (+ 1 edx)))))) + (else (diff:divide-and-conquer + fp fpoff CCRR A start-a end-a B start-b end-b + edits edx epo p-lim)))) + +(define (check-cost! name est cost) + (if (not (eqv? est cost)) + (slib:warn name "cost check failed" est '!= cost))) + +;;;; Routines interfacing API layer to algorithms. + +(define (diff:invert-edits! edits) + (define cost (car (array-dimensions edits))) + (do ((idx (+ -1 cost) (+ -1 idx))) + ((negative? idx)) + (array-set! edits (- (array-ref edits idx)) idx))) + +;;; len-a < len-b +(define (edits2lcs! lcs edits A) + (define cost (car (array-dimensions edits))) + (define len-a (car (array-dimensions A))) + (let loop ((edx 0) + (sdx 0) + (adx 0)) + (let ((edit (if (< edx cost) (array-ref edits edx) 0))) + (cond ((>= adx len-a)) + ((positive? edit) + (loop (+ 1 edx) sdx adx)) + ((zero? edit) + (array-set! lcs (array-ref A adx) sdx) + (loop edx (+ 1 sdx) (+ 1 adx))) + ((>= adx (- -1 edit)) + (loop (+ 1 edx) sdx (+ 1 adx))) + (else + (array-set! lcs (array-ref A adx) sdx) + (loop edx (+ 1 sdx) (+ 1 adx))))))) + +;; A not longer than B (M <= N) +(define (diff2edits! edits fp CCRR A B) + (define N (car (array-dimensions B))) + (define M (car (array-dimensions A))) + (define est (car (array-dimensions edits))) + (let ((p-lim (quotient (- est (- N M)) 2))) + (check-cost! 'diff2edits! + est + (diff2et fp (+ 1 p-lim) + CCRR A 0 M B 0 N edits 0 1 p-lim)))) + +;; A not longer than B (M <= N) +(define (diff2editlen fp A B p-lim) + (define N (car (array-dimensions B))) + (define M (car (array-dimensions A))) + (let ((maxdx (if (negative? p-lim) (+ 1 N) (+ 1 p-lim (- N M)))) + (mindx (if (negative? p-lim) (- (+ 1 M)) (- (+ 1 p-lim))))) + (fp:init! fp (- mindx) -1 mindx maxdx) + (fp:compare fp (- mindx) #f A M B N p-lim))) + + +;;; +;;; Public API. +;;; + +;;@args array1 array2 p-lim +;;@args array1 array2 +;;@1 and @2 are one-dimensional arrays. +;; +;;The non-negative integer @3, if provided, is maximum number of +;;deletions of the shorter sequence to allow. @0 will return @code{#f} +;;if more deletions would be necessary. +;; +;;@0 returns a one-dimensional array of length @code{(quotient (- (+ +;;len1 len2) (diff:edit-length @1 @2)) 2)} holding the longest sequence +;;common to both @var{array}s. +(define (diff:longest-common-subsequence A B . p-lim) + (define M (car (array-dimensions A))) + (define N (car (array-dimensions B))) + (set! p-lim (if (null? p-lim) -1 (car p-lim))) + (let ((edits (if (< N M) + (diff:edits B A p-lim) + (diff:edits A B p-lim)))) + (and edits + (let* ((cost (car (array-dimensions edits))) + (lcs (make-typed-array (array-type A) *unspecified* + (/ (- (+ N M) cost) 2)))) + (edits2lcs! lcs edits (if (< N M) B A)) + lcs)))) + +;;@args array1 array2 p-lim +;;@args array1 array2 +;;@1 and @2 are one-dimensional arrays. +;; +;;The non-negative integer @3, if provided, is maximum number of +;;deletions of the shorter sequence to allow. @0 will return @code{#f} +;;if more deletions would be necessary. +;; +;;@0 returns a vector of length @code{(diff:edit-length @1 @2)} composed +;;of a shortest sequence of edits transformaing @1 to @2. +;; +;;Each edit is an integer: +;;@table @asis +;;@item @var{k} > 0 +;;Inserts @code{(array-ref @1 (+ -1 @var{j}))} into the sequence. +;;@item @var{k} < 0 +;;Deletes @code{(array-ref @2 (- -1 @var{k}))} from the sequence. +;;@end table +(define (diff:edits A B . p-lim) + (define M (car (array-dimensions A))) + (define N (car (array-dimensions B))) + (set! p-lim (if (null? p-lim) -1 (car p-lim))) + (let ((fp (make-typed-array 's32 0 + (if (negative? p-lim) + (+ 3 M N) + (+ 3 (abs (- N M)) p-lim p-lim))))) + (define est (if (< N M) + (diff2editlen fp B A p-lim) + (diff2editlen fp A B p-lim))) + (and est + (let ((edits (make-typed-array 's32 0 est)) + (CCRR (make-typed-array 's32 0 (* 2 (+ (max M N) 1))))) + (cond ((< N M) + (diff2edits! edits fp CCRR B A) + (diff:invert-edits! edits)) + (else + (diff2edits! edits fp CCRR A B))) + ;;(diff:order-edits! edits est) + edits)))) + +;;@args array1 array2 p-lim +;;@args array1 array2 +;;@1 and @2 are one-dimensional arrays. +;; +;;The non-negative integer @3, if provided, is maximum number of +;;deletions of the shorter sequence to allow. @0 will return @code{#f} +;;if more deletions would be necessary. +;; +;;@0 returns the length of the shortest sequence of edits transformaing +;;@1 to @2. +(define (diff:edit-length A B . p-lim) + (define M (car (array-dimensions A))) + (define N (car (array-dimensions B))) + (set! p-lim (if (null? p-lim) -1 (car p-lim))) + (let ((fp (make-typed-array 's32 0 + (if (negative? p-lim) + (+ 3 M N) + (+ 3 (abs (- N M)) p-lim p-lim))))) + (if (< N M) + (diff2editlen fp B A p-lim) + (diff2editlen fp A B p-lim)))) + +;;@example +;;(diff:longest-common-subsequence "fghiejcklm" "fgehijkpqrlm") +;;@result{} "fghijklm" +;; +;;(diff:edit-length "fghiejcklm" "fgehijkpqrlm") +;;@result{} 6 +;; +;;(diff:edits "fghiejcklm" "fgehijkpqrlm") +;;@result{} #A:fixZ32b(3 -5 -7 8 9 10) +;; ; e c h p q r +;;@end example + +;;(trace-all "/home/jaffer/slib/differ.scm")(set! *qp-width* 333)(untrace fp:run fp:subarray) + + +;;;arch-tag: 8e80eb1d-fb11-4872-895a-8adcee26580d diff --git a/src/guile/skribilo/package/Makefile.am b/src/guile/skribilo/package/Makefile.am index 693f088..ac7c18e 100644 --- a/src/guile/skribilo/package/Makefile.am +++ b/src/guile/skribilo/package/Makefile.am @@ -2,6 +2,6 @@ guilemoduledir = $(GUILE_SITE)/skribilo/package dist_guilemodule_DATA = acmproc.scm french.scm jfp.scm letter.scm \ lncs.scm scribe.scm sigplan.scm skribe.scm \ slide.scm web-article.scm web-book.scm \ - eq.scm pie.scm base.scm + eq.scm pie.scm base.scm diff.scm SUBDIRS = slide eq pie diff --git a/src/guile/skribilo/package/diff.scm b/src/guile/skribilo/package/diff.scm new file mode 100644 index 0000000..f42f7f4 --- /dev/null +++ b/src/guile/skribilo/package/diff.scm @@ -0,0 +1,304 @@ +;;; diff.scm -- A document difference highlighting package. +;;; +;;; Copyright 2007 Ludovic Courtès <ludovic.courtes@laas.fr> +;;; +;;; +;;; This program is free software; you can redistribute it and/or modify +;;; it under the terms of the GNU General Public License as published by +;;; the Free Software Foundation; either version 2 of the License, or +;;; (at your option) any later version. +;;; +;;; This program is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;; GNU General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with this program; if not, write to the Free Software +;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, +;;; USA. + +(define-module (skribilo package diff) + :use-module (differ) + :use-module (srfi srfi-1) + :use-module (srfi srfi-39) + :use-module (ice-9 optargs) + + :use-module (skribilo ast) + :use-module (skribilo lib) + :autoload (skribilo reader) (*document-reader*) + :autoload (skribilo engine) (*current-engine*) + :autoload (skribilo module) (make-run-time-module) + :autoload (skribilo resolve) (resolve!) + :autoload (skribilo evaluator) (evaluate-ast-from-port) + :autoload (skribilo biblio) (*bib-table* make-bib-table) + :use-module (skribilo package base) + :use-module (skribilo utils syntax) + + :export (make-diff-document-from-files)) + +(fluid-set! current-reader %skribilo-module-reader) + +;;; Author: Ludovic Courtès +;;; +;;; Commentary: +;;; +;;; This package provides facilities to automatically produce documents where +;;; changes from a previous version of the document are highlighted. +;;; +;;; Warning: This is very experimental at this stage! +;;; +;;; Code: + + + +;;; +;;; Markup. +;;; + +(define-markup (deletion :rest args) + (color :fg "red" "[deletion]")) + +(define-markup (insertion :rest args) + (color :fg "green" args)) + +(define-markup (unchanged :rest args) + args) + + +;;; +;;; Helpers for string diffs. +;;; + +(define (coalesce-edits edits) + ;; Coalesce EDITS (an array of edits as returned by `diff:edits') into a + ;; list of contiguous changes, each change being denoted by `(CHANGE-KIND + ;; START END)' where CHANGE-KIND is one of `deletion', `insertion' or + ;; `replacement'. + (define (do-coalesce edit-kind edit result) + (cond ((null? result) + `((,edit-kind ,edit ,edit))) + ((eq? (caar result) edit-kind) + (let ((start (cadr (car result))) + (end (caddr (car result)))) + (if (= edit (+ end 1)) + (cons `(,edit-kind ,start ,edit) + (cdr result)) + (cons `(,edit-kind ,edit ,edit) + result)))) + (else + (let ((start (cadr (car result))) + (end (caddr (car result)))) + (if (and (= start end edit) + (not (eq? (caar result) 'replacement))) + (do-coalesce 'replacement edit (cdr result)) + (cons `(,edit-kind ,edit ,edit) + result)))))) + + (reverse! (fold (lambda (edit result) + (if (negative? edit) + (let ((edit (- -1 edit))) + (do-coalesce 'deletion edit result)) + (let ((edit (- edit 1))) + (do-coalesce 'insertion edit result)))) + '() + (array->list edits)))) + +(define (add-unchanged edits str-len) + ;; Add information about unchanged regions to EDITS, a list returned by + ;; `coalesce-edits'. STR-LEN should be the length of the _target_ string, + ;; i.e., the second argument of `diff:edits'. + (define %nop '(unchanged 0 0)) + (define (strip-nop result) + (if (equal? (car result) %nop) + (cdr result) + result)) + + (let loop ((edits edits) + (result (list (list 'unchanged 0 0))) + (str-pos 0)) + (if (null? edits) + (strip-nop + (reverse! (if (< str-pos (- str-len 1)) + (cons (list 'unchanged str-pos (- str-len 1)) + result) + result))) + (let* ((change (car edits)) + (kind (car change)) + (start (cadr change)) + (end (caddr change)) + + (prev-unchanged? (eq? (caar result) 'unchanged)) + (prev-start (cadr (car result))) + (prev-end (caddr (car result)))) + + (loop (cdr edits) + (if (memq kind '(insertion replacement)) + (if (> start (+ 1 prev-end)) + (if prev-unchanged? + (cons* change + `(unchanged ,prev-start + ,(- start 1)) + (cdr result)) + (cons* change + `(unchanged ,(+ 1 prev-end) + ,(- start 1)) + result)) + (cons change result)) + (cons change result)) + (+ end 1)))))) + +(define (string-diff-sequences str1 str2) + ;; Return a "diff sequence" between STR1 and STR2. The diff sequence is + ;; alist of 3-element list whose car represent a diff type (a symbol, + ;; either `unchanged', `replacement', `insertion', or `deletion') and two + ;; integers denoting where the change took place. These two integers are + ;; an indices in STR1 in the case of `deletion', indices in STR2 otherwise. + (add-unchanged (coalesce-edits (diff:edits str1 str2)) + (string-length str2))) + + + +;;; +;;; AST diffing. +;;; + +(define %undiffable-markups + ;; List of markups to not diff. + '(ref url-ref bib-ref bib-ref+ line-ref unref + figref ;; non-standard + mark + image symbol lout-illustration + &the-bibliography + toc + index &index-entry &the-index &the-index-header)) + +(define (make-diff-document ast1 ast2) + ;; Return a document based on AST2 that highlights differences between AST1 + ;; and AST2, enclosing unchanged parts in `unchanged' markups, etc. + (let loop ((ast1 ast1) + (ast2 ast2)) + ;;(format (current-error-port) "diff: ~a ~a~%" ast1 ast2) + (cond ((string? ast2) + (if (string? ast1) + (reverse! + (fold (lambda (edit result) + (let ((start (cadr edit)) + (end (+ 1 (caddr edit)))) + (cons (case (car edit) + ((insertion) + (insertion (substring ast2 start end))) + ((deletion) + (deletion (substring ast1 start end))) + ((replacement) + (replacement (substring ast2 start end))) + ((unchanged) + (unchanged (substring ast2 start end)))) + result))) + '() + (string-diff-sequences ast1 ast2))) + (insertion ast2))) + + ((document? ast2) + (let ((ident (or (markup-ident ast2) + (ast->string (markup-option ast2 :title)) + (symbol->string (gensym "document")))) + (opts (markup-options ast2)) + (class (markup-class ast2)) + (body (markup-body ast2))) + (new document + (markup 'document) + (ident ident) + (class class) + (options opts) + (body (loop (if (markup? ast1) + (markup-body ast1) + ast1) + body)) + (env (list (list 'chapter-counter 0) (list 'chapter-env '()) + (list 'section-counter 0) (list 'section-env '()) + (list 'footnote-counter 0) + (list 'footnote-env '()) + (list 'figure-counter 0) + (list 'figure-env '())))))) + + ((container? ast2) + (let ((kind (markup-markup ast2)) + (ident (markup-ident ast2)) + (opts (markup-options ast2)) + (class (markup-class ast2)) + (body (markup-body ast2))) + (new container + (markup kind) + (ident ident) + (class class) + (options opts) + (body (loop (if (markup? ast1) + (markup-body ast1) + ast1) + body))))) + + ((markup? ast2) + (let ((kind (markup-markup ast2)) + (ident (markup-ident ast2)) + (opts (markup-options ast2)) + (class (markup-class ast2)) + (body (markup-body ast2))) + (new markup + (markup kind) + (ident ident) + (class class) + (options opts) + (body (if (memq kind %undiffable-markups) + body + (loop (if (markup? ast1) + (markup-body ast1) + ast1) + body)))))) + + ((list? ast2) + (if (list? ast1) + (map loop ast1 ast2) + (map (lambda (x) + (loop ast1 x)) + ast2))) + + (else + (insertion ast2))))) + + + +;;; +;;; Public API. +;;; + +(define* (make-diff-document-from-files old-file new-file + :key (reader (*document-reader*)) + (env '()) + (engine (*current-engine*))) + ;; Return a document similar to NEW-FILE, where differences from OLD-FILE + ;; are highlighted. + (let ((ast1 + (parameterize ((*bib-table* (make-bib-table 'doc-1))) + (evaluate-ast-from-port (open-input-file old-file) + :reader reader + :module (make-run-time-module)))) + (~~ (skribe-message "diff: first document loaded")) + (ast2 + (parameterize ((*bib-table* (make-bib-table 'doc-2))) + (evaluate-ast-from-port (open-input-file new-file) + :reader reader + :module (make-run-time-module)))) + (%% (skribe-message "diff: second document loaded"))) + + (resolve! ast1 engine env) + (resolve! ast2 engine env) + (let ((diff (make-diff-document ast1 ast2))) + (format (current-error-port) "diff-doc: ~a ~a~%" + diff (document? diff)) + diff))) + + +;;; diff.scm ends here + +;;; arch-tag: 69ad10fa-5688-4835-8956-439e44e26847 |