summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--src/guile/Makefile.am2
-rw-r--r--src/guile/differ.scm443
-rw-r--r--src/guile/skribilo/package/Makefile.am2
-rw-r--r--src/guile/skribilo/package/diff.scm304
4 files changed, 749 insertions, 2 deletions
diff --git a/src/guile/Makefile.am b/src/guile/Makefile.am
index e410a87..6a27133 100644
--- a/src/guile/Makefile.am
+++ b/src/guile/Makefile.am
@@ -1,5 +1,5 @@
 SUBDIRS = skribilo
 
 guilemoduledir = $(GUILE_SITE)
-dist_guilemodule_DATA = skribilo.scm
+dist_guilemodule_DATA = skribilo.scm differ.scm
 EXTRA_DIST = README
diff --git a/src/guile/differ.scm b/src/guile/differ.scm
new file mode 100644
index 0000000..f8ff18b
--- /dev/null
+++ b/src/guile/differ.scm
@@ -0,0 +1,443 @@
+;;; "differ.scm" O(NP) Sequence Comparison Algorithm.
+;;; Copyright (C) 2001, 2002, 2003, 2004 Aubrey Jaffer
+
+;;;
+;;; Taken from Slib 3a2 and adapated by Ludovic Courtès
+;;; <ludovic.courtes@laas.fr> to use Guile's native API (March 2007).
+;;;
+
+(define-module (differ)
+  :export (diff:longest-common-subsequence diff:edits
+           diff:edit-length))
+
+;Permission to copy this software, to modify it, to redistribute it,
+;to distribute modified versions, and to use it for any purpose is
+;granted, subject to the following restrictions and understandings.
+;
+;1.  Any copy made of this software must include this copyright notice
+;in full.
+;
+;2.  I have made no warranty or representation that the operation of
+;this software will be error-free, and I am under no obligation to
+;provide any services, by way of maintenance, update, or otherwise.
+;
+;3.  In conjunction with products arising from the use of this
+;material, there shall be no use of my name in any advertising,
+;promotional, or sales literature without prior written consent in
+;each case.
+
+;;@noindent
+;;@code{diff:edit-length} implements the algorithm:
+;;
+;;@ifinfo
+;;@example
+;;S. Wu, E. Myers, U. Manber, and W. Miller,
+;;   "An O(NP) Sequence Comparison Algorithm,"
+;;   Information Processing Letters 35, 6 (1990), 317-323.
+;;   @url{http://www.cs.arizona.edu/people/gene/PAPERS/np_diff.ps}
+;;@end example
+;;@end ifinfo
+;;@ifset html
+;;S. Wu, <A HREF="http://www.cs.arizona.edu/people/gene/vita.html">
+;;E. Myers,</A> U. Manber, and W. Miller,
+;;<A HREF="http://www.cs.arizona.edu/people/gene/PAPERS/np_diff.ps">
+;;"An O(NP) Sequence Comparison Algorithm"</A>,
+;;Information Processing Letters 35, 6 (1990), 317-323.
+;;@end ifset
+;;
+;;@noindent
+;;The values returned by @code{diff:edit-length} can be used to gauge
+;;the degree of match between two sequences.
+;;
+;;@noindent
+;;@code{diff:edits} and @code{diff:longest-common-subsequence} combine
+;;the algorithm with the divide-and-conquer method outlined in:
+;;
+;;@ifinfo
+;;@example
+;;E. Myers and W. Miller,
+;;   "Optimal alignments in linear space",
+;;   Computer Application in the Biosciences (CABIOS), 4(1):11-17, 1988.
+;;   @url{http://www.cs.arizona.edu/people/gene/PAPERS/linear.ps}
+;;@end example
+;;@end ifinfo
+;;@ifset html
+;;<A HREF="http://www.cs.arizona.edu/people/gene/vita.html">
+;;E. Myers,</A> and W. Miller,
+;;<A HREF="http://www.cs.arizona.edu/people/gene/PAPERS/linear.ps">
+;;"Optimal alignments in linear space"</A>,
+;;Computer Application in the Biosciences (CABIOS), 4(1):11-17, 1988.
+;;@end ifset
+;;
+;;@noindent
+;;If the items being sequenced are text lines, then the computed
+;;edit-list is equivalent to the output of the @dfn{diff} utility
+;;program.  If the items being sequenced are words, then it is like the
+;;lesser known @dfn{spiff} program.
+
+
+;;; p-lim is half the number of gratuitous edits for strings of given
+;;; lengths.
+;;; When passed #f CC, fp:compare returns edit-distance if successful;
+;;; #f otherwise (p > p-lim).  When passed CC, fp:compare returns #f.
+(define (fp:compare fp fpoff CC A M B N p-lim)
+  (define Delta (- N M))
+  ;;(if (negative? Delta) (slib:error 'fp:compare (fp:subarray A 0 M) '> (fp:subarray B 0 N)))
+  ;;(set! compares (+ 1 compares))  ;(print 'fp:compare M N p-lim)
+  (let loop ((p 0))
+    (do ((k (- p) (+ 1 k)))
+	((>= k Delta))
+      (fp:run fp fpoff k A M B N CC p))
+    (do ((k (+ Delta p) (+ -1 k)))
+	((<= k Delta))
+      (fp:run fp fpoff k A M B N CC p))
+    (let ((fpval (fp:run fp fpoff Delta A M B N CC p)))
+      ;; At this point, the cost to (fpval-Delta, fpval) is Delta + 2*p
+      (cond ((and (not CC) (<= N fpval)) (+ Delta (* 2 p)))
+	    ((and (not (negative? p-lim)) (>= p p-lim)) #f)
+	    (else (loop (+ 1 p)))))))
+
+;;; Traces runs of matches until they end; then set fp[k]=y.
+;;; If CC is supplied, set each CC[y] = min(CC[y], cost) for run.
+;;; Returns furthest y reached.
+(define (fp:run fp fpoff k A M B N CC p)
+  (define cost (+ k p p))
+  (let snloop ((y (max (+ (array-ref fp (+ -1 k fpoff)) 1)
+		       (array-ref fp (+ 1 k fpoff)))))
+    (define x (- y k))
+    (and CC (<= y N)
+	 (let ((xcst (- M x)))
+	   (cond ((negative? xcst))
+		 (else (array-set! CC
+				   (min (+ xcst cost) (array-ref CC y))
+				   y)))))
+    ;;(set! tick (+ 1 tick))
+    (cond ((and (< x M) (< y N)
+		(eqv? (array-ref A x) (array-ref B y)))
+	   (snloop (+ 1 y)))
+	  (else (array-set! fp y (+ fpoff k))
+		y))))
+
+;;; Check that only 1 and -1 steps between adjacent CC entries.
+;;(define (fp:step-check A M B N CC)
+;;  (do ((cdx (+ -1 N) (+ -1 cdx)))
+;;      ((negative? cdx))
+;;    (case (- (array-ref CC cdx) (array-ref CC (+ 1 cdx)))
+;;      ((1 -1) #t)
+;;      (else (cond ((> 30 (car (array-dimensions CC)))
+;;		   (display "A: ") (print A)
+;;		   (display "B: ") (print B)))
+;;	    (slib:warn
+;;	     "CC" (append (list (max 0 (+ -5 cdx)) ': (min (+ 1 N) (+ 5 cdx))
+;;				'of)
+;;			  (array-dimensions CC))
+;;	     (fp:subarray CC (max 0 (+ -5 cdx)) (min (+ 1 N) (+ 5 cdx))))))))
+
+;;; Correct cost jumps left by fp:compare [which visits only a few (x,y)].
+;;(define (smooth-costs CC N)
+;;  (do ((cdx (+ -1 N) (+ -1 cdx)))	; smooth from end
+;;      ((negative? cdx))
+;;    (array-set! CC (min (array-ref CC cdx) (+ 1 (array-ref CC (+ 1 cdx))))
+;;		cdx))
+;;  (do ((cdx 1 (+ 1 cdx)))		; smooth toward end
+;;      ((> cdx N))
+;;    (array-set! CC (min (array-ref CC cdx) (+ 1 (array-ref CC (+ -1 cdx))))
+;;		cdx))
+;;  CC)
+
+(define (diff:mid-split N RR CC cost)
+  ;; RR is not longer than CC.  So do for each element of RR.
+  (let loop ((cdx (+ 1 (quotient N 2)))
+	     (rdx (quotient N 2)))
+    ;;(if (negative? rdx) (slib:error 'negative? 'rdx))
+    (cond ((eqv? cost (+ (array-ref CC rdx) (array-ref RR (- N rdx)))) rdx)
+	  ((eqv? cost (+ (array-ref CC cdx) (array-ref RR (- N cdx)))) cdx)
+	  (else (loop (+ 1 cdx) (+ -1 rdx))))))
+
+;;; Return 0-based shared array.
+;;; Reverse RA if END < START.
+(define (fp:subarray RA start end)
+  (define n-len (abs (- end start)))
+  (if (< end start)
+      (make-shared-array RA (lambda (idx) (list (+ -1 (- start idx)))) n-len)
+      (make-shared-array RA (lambda (idx) (list (+ start idx))) n-len)))
+
+(define (fp:init! fp fpoff fill mindx maxdx)
+  (define mlim (+ fpoff mindx))
+  (do ((idx (+ fpoff maxdx) (+ -1 idx)))
+      ((< idx mlim))
+    (array-set! fp fill idx)))
+
+;;; Split A[start-a..end-a] (shorter array) into smaller and smaller chunks.
+;;; EDX is index into EDITS.
+;;; EPO is insert/delete polarity (+1 or -1)
+(define (diff:divide-and-conquer fp fpoff CCRR A start-a end-a B start-b end-b edits edx epo p-lim)
+  (define mid-a (quotient (+ start-a end-a) 2))
+  (define len-b (- end-b start-b))
+  (define len-a (- end-a start-a))
+  (let ((tcst (+ p-lim p-lim (- len-b len-a))))
+    (define CC (fp:subarray CCRR 0 (+ len-b 1)))
+    (define RR (fp:subarray CCRR (+ len-b 1) (* 2 (+ len-b 1))))
+    (define M2 (- end-a mid-a))
+    (define M1 (- mid-a start-a))
+    (fp:init! CC 0 (+ len-a len-b) 0 len-b)
+    (fp:init! fp fpoff -1 (- (+ 1 p-lim)) (+ 1 p-lim (- len-b M1)))
+    (fp:compare fp fpoff CC
+		(fp:subarray A start-a mid-a) M1
+		(fp:subarray B start-b end-b) len-b
+		(min p-lim len-a))
+    (fp:init! RR 0 (+ len-a len-b) 0 len-b)
+    (fp:init! fp fpoff -1 (- (+ 1 p-lim)) (+ 1 p-lim (- len-b M2)))
+    (fp:compare fp fpoff RR
+		(fp:subarray A end-a mid-a)   M2
+		(fp:subarray B end-b start-b) len-b
+		(min p-lim len-a))
+    ;;(smooth-costs CC len-b) (smooth-costs RR len-b)
+    (let ((b-splt (diff:mid-split len-b RR CC tcst)))
+      (define est-c (array-ref CC b-splt))
+      (define est-r (array-ref RR (- len-b b-splt)))
+      ;;(set! splts (cons (/ b-splt (max .1 len-b)) splts))
+      ;;(display "A: ") (array-for-each display (fp:subarray A start-a mid-a)) (display " + ") (array-for-each display (fp:subarray A mid-a end-a)) (newline)
+      ;;(display "B: ") (array-for-each display (fp:subarray B start-b end-b)) (newline)
+      ;;(print 'cc cc) (print 'rr (fp:subarray RR (+ 1 len-b) 0))
+      ;;(print (make-string (+ 7 (* 2 b-splt)) #\-) '^  (list b-splt))
+      (check-cost! 'CC est-c
+		  (diff2et fp fpoff CCRR
+			   A start-a mid-a
+			   B start-b (+ start-b b-splt)
+			   edits edx epo
+			   (quotient (- est-c (- b-splt (- mid-a start-a)))
+				     2)))
+      (check-cost! 'RR est-r
+		  (diff2et fp fpoff CCRR
+			   A mid-a end-a
+			   B (+ start-b b-splt) end-b
+			   edits (+ est-c edx) epo
+			   (quotient (- est-r (- (- len-b b-splt)
+						 (- end-a mid-a)))
+				     2)))
+      (+ est-c est-r))))
+
+;;; Trim; then diff sub-arrays; either one longer.  Returns edit-length
+(define (diff2et fp fpoff CCRR A start-a end-a B start-b end-b edits edx epo p-lim)
+  ;;  (if (< (- end-a start-a) p-lim) (slib:warn 'diff2et 'len-a (- end-a start-a) 'len-b (- end-b start-b) 'p-lim p-lim))
+  (do ((bdx (+ -1 end-b) (+ -1 bdx))
+       (adx (+ -1 end-a) (+ -1 adx)))
+      ((not (and (<= start-b bdx)
+		 (<= start-a adx)
+		 (eqv? (array-ref A adx) (array-ref B bdx))))
+       (do ((bsx start-b (+ 1 bsx))
+	    (asx start-a (+ 1 asx)))
+	   ((not (and (< bsx bdx)
+		      (< asx adx)
+		      (eqv? (array-ref A asx) (array-ref B bsx))))
+	    ;;(print 'trim-et (- asx start-a) '+ (- end-a adx))
+	    (let ((delta (- (- bdx bsx) (- adx asx))))
+	      (if (negative? delta)
+		  (diff2ez fp fpoff CCRR B bsx (+ 1 bdx) A asx (+ 1 adx)
+			   edits edx (- epo) (+ delta p-lim))
+		  (diff2ez fp fpoff CCRR A asx (+ 1 adx) B bsx (+ 1 bdx)
+			   edits edx epo p-lim))))
+	 ;;(set! tick (+ 1 tick))
+	 ))
+    ;;(set! tick (+ 1 tick))
+    ))
+
+;;; Diff sub-arrays, A not longer than B.  Returns edit-length
+(define (diff2ez fp fpoff CCRR A start-a end-a B start-b end-b edits edx epo p-lim)
+  (define len-a (- end-a start-a))
+  (define len-b (- end-b start-b))
+  ;;(if (> len-a len-b) (slib:error 'diff2ez len-a '> len-b))
+  (cond ((zero? p-lim)			; B inserts only
+	 (if (= len-b len-a)
+	     0				; A = B; no edits
+	     (let loop ((adx start-a)
+			(bdx start-b)
+			(edx edx))
+	       (cond ((>= bdx end-b) (- len-b len-a))
+		     ((>= adx end-a)
+		      (do ((idx bdx (+ 1 idx))
+			   (edx edx (+ 1 edx)))
+			  ((>= idx end-b) (- len-b len-a))
+			(array-set! edits (* epo (+ 1 idx)) edx)))
+		     ((eqv? (array-ref A adx) (array-ref B bdx))
+		      ;;(set! tick (+ 1 tick))
+		      (loop (+ 1 adx) (+ 1 bdx) edx))
+		     (else (array-set! edits (* epo (+ 1 bdx)) edx)
+			   ;;(set! tick (+ 1 tick))
+			   (loop adx (+ 1 bdx) (+ 1 edx)))))))
+	((<= len-a p-lim)		; delete all A; insert all B
+	 ;;(if (< len-a p-lim) (slib:error 'diff2ez len-a len-b 'p-lim p-lim))
+	 (do ((idx start-a (+ 1 idx))
+	      (jdx start-b (+ 1 jdx)))
+	     ((and (>= idx end-a) (>= jdx end-b)) (+ len-a len-b))
+	   (cond ((< jdx end-b)
+		  (array-set! edits (* epo (+ 1 jdx)) edx)
+		  (set! edx (+ 1 edx))))
+	   (cond ((< idx end-a)
+		  (array-set! edits (* epo (- -1 idx)) edx)
+		  (set! edx (+ 1 edx))))))
+	(else (diff:divide-and-conquer
+	       fp fpoff CCRR A start-a end-a B start-b end-b
+	       edits edx epo p-lim))))
+
+(define (check-cost! name est cost)
+  (if (not (eqv? est cost))
+      (slib:warn name "cost check failed" est '!= cost)))
+
+;;;; Routines interfacing API layer to algorithms.
+
+(define (diff:invert-edits! edits)
+  (define cost (car (array-dimensions edits)))
+  (do ((idx (+ -1 cost) (+ -1 idx)))
+      ((negative? idx))
+    (array-set! edits (- (array-ref edits idx)) idx)))
+
+;;; len-a < len-b
+(define (edits2lcs! lcs edits A)
+  (define cost (car (array-dimensions edits)))
+  (define len-a (car (array-dimensions A)))
+  (let loop ((edx 0)
+	     (sdx 0)
+	     (adx 0))
+    (let ((edit (if (< edx cost) (array-ref edits edx) 0)))
+      (cond ((>= adx len-a))
+	    ((positive? edit)
+	     (loop (+ 1 edx) sdx adx))
+	    ((zero? edit)
+	     (array-set! lcs (array-ref A adx) sdx)
+	     (loop edx (+ 1 sdx) (+ 1 adx)))
+	    ((>= adx (- -1 edit))
+	     (loop (+ 1 edx) sdx (+ 1 adx)))
+	    (else
+	     (array-set! lcs (array-ref A adx) sdx)
+	     (loop edx (+ 1 sdx) (+ 1 adx)))))))
+
+;; A not longer than B (M <= N)
+(define (diff2edits! edits fp CCRR A B)
+  (define N (car (array-dimensions B)))
+  (define M (car (array-dimensions A)))
+  (define est (car (array-dimensions edits)))
+  (let ((p-lim (quotient (- est (- N M)) 2)))
+    (check-cost! 'diff2edits!
+		 est
+		 (diff2et fp (+ 1 p-lim)
+			  CCRR A 0 M B 0 N edits 0 1 p-lim))))
+
+;; A not longer than B (M <= N)
+(define (diff2editlen fp A B p-lim)
+  (define N (car (array-dimensions B)))
+  (define M (car (array-dimensions A)))
+  (let ((maxdx (if (negative? p-lim) (+ 1 N) (+ 1 p-lim (- N M))))
+	(mindx (if (negative? p-lim) (- (+ 1 M)) (- (+ 1 p-lim)))))
+    (fp:init! fp (- mindx) -1 mindx maxdx)
+    (fp:compare fp (- mindx) #f A M B N p-lim)))
+
+
+;;;
+;;; Public API.
+;;;
+
+;;@args array1 array2 p-lim
+;;@args array1 array2
+;;@1 and @2 are one-dimensional arrays.
+;;
+;;The non-negative integer @3, if provided, is maximum number of
+;;deletions of the shorter sequence to allow.  @0 will return @code{#f}
+;;if more deletions would be necessary.
+;;
+;;@0 returns a one-dimensional array of length @code{(quotient (- (+
+;;len1 len2) (diff:edit-length @1 @2)) 2)} holding the longest sequence
+;;common to both @var{array}s.
+(define (diff:longest-common-subsequence A B . p-lim)
+  (define M (car (array-dimensions A)))
+  (define N (car (array-dimensions B)))
+  (set! p-lim (if (null? p-lim) -1 (car p-lim)))
+  (let ((edits (if (< N M)
+		   (diff:edits B A p-lim)
+		   (diff:edits A B p-lim))))
+    (and edits
+	 (let* ((cost (car (array-dimensions edits)))
+		(lcs (make-typed-array (array-type A) *unspecified*
+                                       (/ (- (+ N M) cost) 2))))
+	   (edits2lcs! lcs edits (if (< N M) B A))
+	   lcs))))
+
+;;@args array1 array2 p-lim
+;;@args array1 array2
+;;@1 and @2 are one-dimensional arrays.
+;;
+;;The non-negative integer @3, if provided, is maximum number of
+;;deletions of the shorter sequence to allow.  @0 will return @code{#f}
+;;if more deletions would be necessary.
+;;
+;;@0 returns a vector of length @code{(diff:edit-length @1 @2)} composed
+;;of a shortest sequence of edits transformaing @1 to @2.
+;;
+;;Each edit is an integer:
+;;@table @asis
+;;@item @var{k} > 0
+;;Inserts @code{(array-ref @1 (+ -1 @var{j}))} into the sequence.
+;;@item @var{k} < 0
+;;Deletes @code{(array-ref @2 (- -1 @var{k}))} from the sequence.
+;;@end table
+(define (diff:edits A B . p-lim)
+  (define M (car (array-dimensions A)))
+  (define N (car (array-dimensions B)))
+  (set! p-lim (if (null? p-lim) -1 (car p-lim)))
+  (let ((fp (make-typed-array 's32 0
+                              (if (negative? p-lim)
+                                  (+ 3 M N)
+                                  (+ 3 (abs (- N M)) p-lim p-lim)))))
+    (define est (if (< N M)
+		    (diff2editlen fp B A p-lim)
+		    (diff2editlen fp A B p-lim)))
+    (and est
+	 (let ((edits (make-typed-array 's32 0 est))
+	       (CCRR (make-typed-array 's32 0 (* 2 (+ (max M N) 1)))))
+	   (cond ((< N M)
+		  (diff2edits! edits fp CCRR B A)
+		  (diff:invert-edits! edits))
+		 (else
+		  (diff2edits! edits fp CCRR A B)))
+	   ;;(diff:order-edits! edits est)
+	   edits))))
+
+;;@args array1 array2 p-lim
+;;@args array1 array2
+;;@1 and @2 are one-dimensional arrays.
+;;
+;;The non-negative integer @3, if provided, is maximum number of
+;;deletions of the shorter sequence to allow.  @0 will return @code{#f}
+;;if more deletions would be necessary.
+;;
+;;@0 returns the length of the shortest sequence of edits transformaing
+;;@1 to @2.
+(define (diff:edit-length A B . p-lim)
+  (define M (car (array-dimensions A)))
+  (define N (car (array-dimensions B)))
+  (set! p-lim (if (null? p-lim) -1 (car p-lim)))
+  (let ((fp (make-typed-array 's32 0
+                              (if (negative? p-lim)
+                                  (+ 3 M N)
+                                  (+ 3 (abs (- N M)) p-lim p-lim)))))
+    (if (< N M)
+	(diff2editlen fp B A p-lim)
+	(diff2editlen fp A B p-lim))))
+
+;;@example
+;;(diff:longest-common-subsequence "fghiejcklm" "fgehijkpqrlm")
+;;@result{} "fghijklm"
+;;
+;;(diff:edit-length "fghiejcklm" "fgehijkpqrlm")
+;;@result{} 6
+;;
+;;(diff:edits "fghiejcklm" "fgehijkpqrlm")
+;;@result{} #A:fixZ32b(3 -5 -7 8 9 10)
+;;       ; e  c  h p q  r
+;;@end example
+
+;;(trace-all "/home/jaffer/slib/differ.scm")(set! *qp-width* 333)(untrace fp:run fp:subarray)
+
+
+;;;arch-tag: 8e80eb1d-fb11-4872-895a-8adcee26580d
diff --git a/src/guile/skribilo/package/Makefile.am b/src/guile/skribilo/package/Makefile.am
index 693f088..ac7c18e 100644
--- a/src/guile/skribilo/package/Makefile.am
+++ b/src/guile/skribilo/package/Makefile.am
@@ -2,6 +2,6 @@ guilemoduledir = $(GUILE_SITE)/skribilo/package
 dist_guilemodule_DATA = acmproc.scm french.scm jfp.scm letter.scm	\
 			lncs.scm scribe.scm sigplan.scm skribe.scm	\
 			slide.scm web-article.scm web-book.scm		\
-			eq.scm pie.scm base.scm
+			eq.scm pie.scm base.scm diff.scm
 
 SUBDIRS = slide eq pie
diff --git a/src/guile/skribilo/package/diff.scm b/src/guile/skribilo/package/diff.scm
new file mode 100644
index 0000000..f42f7f4
--- /dev/null
+++ b/src/guile/skribilo/package/diff.scm
@@ -0,0 +1,304 @@
+;;; diff.scm  --  A document difference highlighting package.
+;;;
+;;; Copyright 2007  Ludovic Courtès <ludovic.courtes@laas.fr>
+;;;
+;;;
+;;; This program is free software; you can redistribute it and/or modify
+;;; it under the terms of the GNU General Public License as published by
+;;; the Free Software Foundation; either version 2 of the License, or
+;;; (at your option) any later version.
+;;;
+;;; This program is distributed in the hope that it will be useful,
+;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;;; GNU General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU General Public License
+;;; along with this program; if not, write to the Free Software
+;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
+;;; USA.
+
+(define-module (skribilo package diff)
+  :use-module (differ)
+  :use-module (srfi srfi-1)
+  :use-module (srfi srfi-39)
+  :use-module (ice-9 optargs)
+
+  :use-module (skribilo ast)
+  :use-module (skribilo lib)
+  :autoload   (skribilo reader)        (*document-reader*)
+  :autoload   (skribilo engine)        (*current-engine*)
+  :autoload   (skribilo module)        (make-run-time-module)
+  :autoload   (skribilo resolve)       (resolve!)
+  :autoload   (skribilo evaluator)     (evaluate-ast-from-port)
+  :autoload   (skribilo biblio)        (*bib-table* make-bib-table)
+  :use-module (skribilo package base)
+  :use-module (skribilo utils syntax)
+
+  :export (make-diff-document-from-files))
+
+(fluid-set! current-reader %skribilo-module-reader)
+
+;;; Author: Ludovic Courtès
+;;;
+;;; Commentary:
+;;;
+;;; This package provides facilities to automatically produce documents where
+;;; changes from a previous version of the document are highlighted.
+;;;
+;;; Warning: This is very experimental at this stage!
+;;;
+;;; Code:
+
+
+
+;;;
+;;; Markup.
+;;;
+
+(define-markup (deletion :rest args)
+  (color :fg "red" "[deletion]"))
+
+(define-markup (insertion :rest args)
+  (color :fg "green" args))
+
+(define-markup (unchanged :rest args)
+  args)
+
+
+;;;
+;;; Helpers for string diffs.
+;;;
+
+(define (coalesce-edits edits)
+  ;; Coalesce EDITS (an array of edits as returned by `diff:edits') into a
+  ;; list of contiguous changes, each change being denoted by `(CHANGE-KIND
+  ;; START END)' where CHANGE-KIND is one of `deletion', `insertion' or
+  ;; `replacement'.
+  (define (do-coalesce edit-kind edit result)
+    (cond ((null? result)
+           `((,edit-kind ,edit ,edit)))
+          ((eq? (caar result) edit-kind)
+           (let ((start (cadr  (car result)))
+                 (end   (caddr (car result))))
+             (if (= edit (+ end 1))
+                 (cons `(,edit-kind ,start ,edit)
+                       (cdr result))
+                 (cons `(,edit-kind ,edit ,edit)
+                       result))))
+          (else
+           (let ((start (cadr  (car result)))
+                 (end   (caddr (car result))))
+             (if (and (= start end edit)
+                      (not (eq? (caar result) 'replacement)))
+                 (do-coalesce 'replacement edit (cdr result))
+                 (cons `(,edit-kind ,edit ,edit)
+                       result))))))
+
+  (reverse! (fold (lambda (edit result)
+                    (if (negative? edit)
+                        (let ((edit (- -1 edit)))
+                          (do-coalesce 'deletion edit result))
+                        (let ((edit (- edit 1)))
+                          (do-coalesce 'insertion edit result))))
+                  '()
+                  (array->list edits))))
+
+(define (add-unchanged edits str-len)
+  ;; Add information about unchanged regions to EDITS, a list returned by
+  ;; `coalesce-edits'.  STR-LEN should be the length of the _target_ string,
+  ;; i.e., the second argument of `diff:edits'.
+  (define %nop '(unchanged 0 0))
+  (define (strip-nop result)
+    (if (equal? (car result) %nop)
+        (cdr result)
+        result))
+
+  (let loop ((edits   edits)
+             (result  (list (list 'unchanged 0 0)))
+             (str-pos 0))
+    (if (null? edits)
+        (strip-nop
+         (reverse! (if (< str-pos (- str-len 1))
+                       (cons (list 'unchanged str-pos (- str-len 1))
+                             result)
+                       result)))
+        (let* ((change (car edits))
+               (kind  (car change))
+               (start (cadr change))
+               (end   (caddr change))
+
+               (prev-unchanged? (eq? (caar result) 'unchanged))
+               (prev-start      (cadr (car result)))
+               (prev-end        (caddr (car result))))
+
+          (loop (cdr edits)
+                (if (memq kind '(insertion replacement))
+                    (if (> start (+ 1 prev-end))
+                        (if prev-unchanged?
+                            (cons* change
+                                   `(unchanged ,prev-start
+                                               ,(- start 1))
+                                   (cdr result))
+                            (cons* change
+                                   `(unchanged ,(+ 1 prev-end)
+                                               ,(- start 1))
+                                   result))
+                        (cons change result))
+                    (cons change result))
+                (+ end 1))))))
+
+(define (string-diff-sequences str1 str2)
+  ;; Return a "diff sequence" between STR1 and STR2.  The diff sequence is
+  ;; alist of 3-element list whose car represent a diff type (a symbol,
+  ;; either `unchanged', `replacement', `insertion', or `deletion') and two
+  ;; integers denoting where the change took place.  These two integers are
+  ;; an indices in STR1 in the case of `deletion', indices in STR2 otherwise.
+  (add-unchanged (coalesce-edits (diff:edits str1 str2))
+                 (string-length str2)))
+
+
+
+;;;
+;;; AST diffing.
+;;;
+
+(define %undiffable-markups
+  ;; List of markups to not diff.
+  '(ref url-ref bib-ref bib-ref+ line-ref unref
+    figref ;; non-standard
+    mark
+    image symbol lout-illustration
+    &the-bibliography
+    toc
+    index &index-entry &the-index &the-index-header))
+
+(define (make-diff-document ast1 ast2)
+  ;; Return a document based on AST2 that highlights differences between AST1
+  ;; and AST2, enclosing unchanged parts in `unchanged' markups, etc.
+  (let loop ((ast1 ast1)
+             (ast2 ast2))
+    ;;(format (current-error-port) "diff: ~a ~a~%" ast1 ast2)
+    (cond ((string? ast2)
+           (if (string? ast1)
+               (reverse!
+                (fold (lambda (edit result)
+                        (let ((start (cadr edit))
+                              (end   (+ 1 (caddr edit))))
+                          (cons (case (car edit)
+                                  ((insertion)
+                                   (insertion (substring ast2 start end)))
+                                  ((deletion)
+                                   (deletion  (substring ast1 start end)))
+                                  ((replacement)
+                                   (replacement (substring ast2 start end)))
+                                  ((unchanged)
+                                   (unchanged (substring ast2 start end))))
+                                result)))
+                      '()
+                      (string-diff-sequences ast1 ast2)))
+               (insertion ast2)))
+
+          ((document? ast2)
+           (let ((ident (or (markup-ident ast2)
+                            (ast->string (markup-option ast2 :title))
+                            (symbol->string (gensym "document"))))
+                 (opts  (markup-options ast2))
+                 (class (markup-class ast2))
+                 (body  (markup-body ast2)))
+             (new document
+                  (markup 'document)
+                  (ident ident)
+                  (class class)
+                  (options opts)
+                  (body (loop (if (markup? ast1)
+                                  (markup-body ast1)
+                                  ast1)
+                              body))
+                  (env (list (list 'chapter-counter 0) (list 'chapter-env '())
+                             (list 'section-counter 0) (list 'section-env '())
+                             (list 'footnote-counter 0)
+                             (list 'footnote-env '())
+                             (list 'figure-counter 0)
+                             (list 'figure-env '()))))))
+
+          ((container? ast2)
+           (let ((kind  (markup-markup ast2))
+                 (ident (markup-ident ast2))
+                 (opts  (markup-options ast2))
+                 (class (markup-class ast2))
+                 (body  (markup-body ast2)))
+             (new container
+                  (markup  kind)
+                  (ident   ident)
+                  (class   class)
+                  (options opts)
+                  (body (loop (if (markup? ast1)
+                                  (markup-body ast1)
+                                  ast1)
+                              body)))))
+
+          ((markup? ast2)
+           (let ((kind  (markup-markup ast2))
+                 (ident (markup-ident ast2))
+                 (opts  (markup-options ast2))
+                 (class (markup-class ast2))
+                 (body  (markup-body ast2)))
+             (new markup
+                  (markup  kind)
+                  (ident   ident)
+                  (class   class)
+                  (options opts)
+                  (body (if (memq kind %undiffable-markups)
+                            body
+                            (loop (if (markup? ast1)
+                                      (markup-body ast1)
+                                      ast1)
+                                  body))))))
+
+          ((list? ast2)
+           (if (list? ast1)
+               (map loop ast1 ast2)
+               (map (lambda (x)
+                      (loop ast1 x))
+                    ast2)))
+
+          (else
+           (insertion ast2)))))
+
+
+
+;;;
+;;; Public API.
+;;;
+
+(define* (make-diff-document-from-files old-file new-file
+                                        :key (reader (*document-reader*))
+                                             (env '())
+                                             (engine (*current-engine*)))
+  ;; Return a document similar to NEW-FILE, where differences from OLD-FILE
+  ;; are highlighted.
+  (let ((ast1
+         (parameterize ((*bib-table* (make-bib-table 'doc-1)))
+           (evaluate-ast-from-port (open-input-file old-file)
+                                   :reader reader
+                                   :module (make-run-time-module))))
+        (~~ (skribe-message "diff: first document loaded"))
+        (ast2
+         (parameterize ((*bib-table* (make-bib-table 'doc-2)))
+           (evaluate-ast-from-port (open-input-file new-file)
+                                   :reader reader
+                                   :module (make-run-time-module))))
+        (%% (skribe-message "diff: second document loaded")))
+
+    (resolve! ast1 engine env)
+    (resolve! ast2 engine env)
+    (let ((diff (make-diff-document ast1 ast2)))
+      (format (current-error-port) "diff-doc: ~a ~a~%"
+              diff (document? diff))
+      diff)))
+
+
+;;; diff.scm ends here
+
+;;; arch-tag: 69ad10fa-5688-4835-8956-439e44e26847