diff options
Diffstat (limited to 'src/guile/differ.scm')
-rw-r--r-- | src/guile/differ.scm | 443 |
1 files changed, 0 insertions, 443 deletions
diff --git a/src/guile/differ.scm b/src/guile/differ.scm deleted file mode 100644 index f8ff18b..0000000 --- a/src/guile/differ.scm +++ /dev/null @@ -1,443 +0,0 @@ -;;; "differ.scm" O(NP) Sequence Comparison Algorithm. -;;; Copyright (C) 2001, 2002, 2003, 2004 Aubrey Jaffer - -;;; -;;; Taken from Slib 3a2 and adapated by Ludovic Courtès -;;; <ludovic.courtes@laas.fr> to use Guile's native API (March 2007). -;;; - -(define-module (differ) - :export (diff:longest-common-subsequence diff:edits - diff:edit-length)) - -;Permission to copy this software, to modify it, to redistribute it, -;to distribute modified versions, and to use it for any purpose is -;granted, subject to the following restrictions and understandings. -; -;1. Any copy made of this software must include this copyright notice -;in full. -; -;2. I have made no warranty or representation that the operation of -;this software will be error-free, and I am under no obligation to -;provide any services, by way of maintenance, update, or otherwise. -; -;3. In conjunction with products arising from the use of this -;material, there shall be no use of my name in any advertising, -;promotional, or sales literature without prior written consent in -;each case. - -;;@noindent -;;@code{diff:edit-length} implements the algorithm: -;; -;;@ifinfo -;;@example -;;S. Wu, E. Myers, U. Manber, and W. Miller, -;; "An O(NP) Sequence Comparison Algorithm," -;; Information Processing Letters 35, 6 (1990), 317-323. -;; @url{http://www.cs.arizona.edu/people/gene/PAPERS/np_diff.ps} -;;@end example -;;@end ifinfo -;;@ifset html -;;S. Wu, <A HREF="http://www.cs.arizona.edu/people/gene/vita.html"> -;;E. Myers,</A> U. Manber, and W. Miller, -;;<A HREF="http://www.cs.arizona.edu/people/gene/PAPERS/np_diff.ps"> -;;"An O(NP) Sequence Comparison Algorithm"</A>, -;;Information Processing Letters 35, 6 (1990), 317-323. -;;@end ifset -;; -;;@noindent -;;The values returned by @code{diff:edit-length} can be used to gauge -;;the degree of match between two sequences. -;; -;;@noindent -;;@code{diff:edits} and @code{diff:longest-common-subsequence} combine -;;the algorithm with the divide-and-conquer method outlined in: -;; -;;@ifinfo -;;@example -;;E. Myers and W. Miller, -;; "Optimal alignments in linear space", -;; Computer Application in the Biosciences (CABIOS), 4(1):11-17, 1988. -;; @url{http://www.cs.arizona.edu/people/gene/PAPERS/linear.ps} -;;@end example -;;@end ifinfo -;;@ifset html -;;<A HREF="http://www.cs.arizona.edu/people/gene/vita.html"> -;;E. Myers,</A> and W. Miller, -;;<A HREF="http://www.cs.arizona.edu/people/gene/PAPERS/linear.ps"> -;;"Optimal alignments in linear space"</A>, -;;Computer Application in the Biosciences (CABIOS), 4(1):11-17, 1988. -;;@end ifset -;; -;;@noindent -;;If the items being sequenced are text lines, then the computed -;;edit-list is equivalent to the output of the @dfn{diff} utility -;;program. If the items being sequenced are words, then it is like the -;;lesser known @dfn{spiff} program. - - -;;; p-lim is half the number of gratuitous edits for strings of given -;;; lengths. -;;; When passed #f CC, fp:compare returns edit-distance if successful; -;;; #f otherwise (p > p-lim). When passed CC, fp:compare returns #f. -(define (fp:compare fp fpoff CC A M B N p-lim) - (define Delta (- N M)) - ;;(if (negative? Delta) (slib:error 'fp:compare (fp:subarray A 0 M) '> (fp:subarray B 0 N))) - ;;(set! compares (+ 1 compares)) ;(print 'fp:compare M N p-lim) - (let loop ((p 0)) - (do ((k (- p) (+ 1 k))) - ((>= k Delta)) - (fp:run fp fpoff k A M B N CC p)) - (do ((k (+ Delta p) (+ -1 k))) - ((<= k Delta)) - (fp:run fp fpoff k A M B N CC p)) - (let ((fpval (fp:run fp fpoff Delta A M B N CC p))) - ;; At this point, the cost to (fpval-Delta, fpval) is Delta + 2*p - (cond ((and (not CC) (<= N fpval)) (+ Delta (* 2 p))) - ((and (not (negative? p-lim)) (>= p p-lim)) #f) - (else (loop (+ 1 p))))))) - -;;; Traces runs of matches until they end; then set fp[k]=y. -;;; If CC is supplied, set each CC[y] = min(CC[y], cost) for run. -;;; Returns furthest y reached. -(define (fp:run fp fpoff k A M B N CC p) - (define cost (+ k p p)) - (let snloop ((y (max (+ (array-ref fp (+ -1 k fpoff)) 1) - (array-ref fp (+ 1 k fpoff))))) - (define x (- y k)) - (and CC (<= y N) - (let ((xcst (- M x))) - (cond ((negative? xcst)) - (else (array-set! CC - (min (+ xcst cost) (array-ref CC y)) - y))))) - ;;(set! tick (+ 1 tick)) - (cond ((and (< x M) (< y N) - (eqv? (array-ref A x) (array-ref B y))) - (snloop (+ 1 y))) - (else (array-set! fp y (+ fpoff k)) - y)))) - -;;; Check that only 1 and -1 steps between adjacent CC entries. -;;(define (fp:step-check A M B N CC) -;; (do ((cdx (+ -1 N) (+ -1 cdx))) -;; ((negative? cdx)) -;; (case (- (array-ref CC cdx) (array-ref CC (+ 1 cdx))) -;; ((1 -1) #t) -;; (else (cond ((> 30 (car (array-dimensions CC))) -;; (display "A: ") (print A) -;; (display "B: ") (print B))) -;; (slib:warn -;; "CC" (append (list (max 0 (+ -5 cdx)) ': (min (+ 1 N) (+ 5 cdx)) -;; 'of) -;; (array-dimensions CC)) -;; (fp:subarray CC (max 0 (+ -5 cdx)) (min (+ 1 N) (+ 5 cdx)))))))) - -;;; Correct cost jumps left by fp:compare [which visits only a few (x,y)]. -;;(define (smooth-costs CC N) -;; (do ((cdx (+ -1 N) (+ -1 cdx))) ; smooth from end -;; ((negative? cdx)) -;; (array-set! CC (min (array-ref CC cdx) (+ 1 (array-ref CC (+ 1 cdx)))) -;; cdx)) -;; (do ((cdx 1 (+ 1 cdx))) ; smooth toward end -;; ((> cdx N)) -;; (array-set! CC (min (array-ref CC cdx) (+ 1 (array-ref CC (+ -1 cdx)))) -;; cdx)) -;; CC) - -(define (diff:mid-split N RR CC cost) - ;; RR is not longer than CC. So do for each element of RR. - (let loop ((cdx (+ 1 (quotient N 2))) - (rdx (quotient N 2))) - ;;(if (negative? rdx) (slib:error 'negative? 'rdx)) - (cond ((eqv? cost (+ (array-ref CC rdx) (array-ref RR (- N rdx)))) rdx) - ((eqv? cost (+ (array-ref CC cdx) (array-ref RR (- N cdx)))) cdx) - (else (loop (+ 1 cdx) (+ -1 rdx)))))) - -;;; Return 0-based shared array. -;;; Reverse RA if END < START. -(define (fp:subarray RA start end) - (define n-len (abs (- end start))) - (if (< end start) - (make-shared-array RA (lambda (idx) (list (+ -1 (- start idx)))) n-len) - (make-shared-array RA (lambda (idx) (list (+ start idx))) n-len))) - -(define (fp:init! fp fpoff fill mindx maxdx) - (define mlim (+ fpoff mindx)) - (do ((idx (+ fpoff maxdx) (+ -1 idx))) - ((< idx mlim)) - (array-set! fp fill idx))) - -;;; Split A[start-a..end-a] (shorter array) into smaller and smaller chunks. -;;; EDX is index into EDITS. -;;; EPO is insert/delete polarity (+1 or -1) -(define (diff:divide-and-conquer fp fpoff CCRR A start-a end-a B start-b end-b edits edx epo p-lim) - (define mid-a (quotient (+ start-a end-a) 2)) - (define len-b (- end-b start-b)) - (define len-a (- end-a start-a)) - (let ((tcst (+ p-lim p-lim (- len-b len-a)))) - (define CC (fp:subarray CCRR 0 (+ len-b 1))) - (define RR (fp:subarray CCRR (+ len-b 1) (* 2 (+ len-b 1)))) - (define M2 (- end-a mid-a)) - (define M1 (- mid-a start-a)) - (fp:init! CC 0 (+ len-a len-b) 0 len-b) - (fp:init! fp fpoff -1 (- (+ 1 p-lim)) (+ 1 p-lim (- len-b M1))) - (fp:compare fp fpoff CC - (fp:subarray A start-a mid-a) M1 - (fp:subarray B start-b end-b) len-b - (min p-lim len-a)) - (fp:init! RR 0 (+ len-a len-b) 0 len-b) - (fp:init! fp fpoff -1 (- (+ 1 p-lim)) (+ 1 p-lim (- len-b M2))) - (fp:compare fp fpoff RR - (fp:subarray A end-a mid-a) M2 - (fp:subarray B end-b start-b) len-b - (min p-lim len-a)) - ;;(smooth-costs CC len-b) (smooth-costs RR len-b) - (let ((b-splt (diff:mid-split len-b RR CC tcst))) - (define est-c (array-ref CC b-splt)) - (define est-r (array-ref RR (- len-b b-splt))) - ;;(set! splts (cons (/ b-splt (max .1 len-b)) splts)) - ;;(display "A: ") (array-for-each display (fp:subarray A start-a mid-a)) (display " + ") (array-for-each display (fp:subarray A mid-a end-a)) (newline) - ;;(display "B: ") (array-for-each display (fp:subarray B start-b end-b)) (newline) - ;;(print 'cc cc) (print 'rr (fp:subarray RR (+ 1 len-b) 0)) - ;;(print (make-string (+ 7 (* 2 b-splt)) #\-) '^ (list b-splt)) - (check-cost! 'CC est-c - (diff2et fp fpoff CCRR - A start-a mid-a - B start-b (+ start-b b-splt) - edits edx epo - (quotient (- est-c (- b-splt (- mid-a start-a))) - 2))) - (check-cost! 'RR est-r - (diff2et fp fpoff CCRR - A mid-a end-a - B (+ start-b b-splt) end-b - edits (+ est-c edx) epo - (quotient (- est-r (- (- len-b b-splt) - (- end-a mid-a))) - 2))) - (+ est-c est-r)))) - -;;; Trim; then diff sub-arrays; either one longer. Returns edit-length -(define (diff2et fp fpoff CCRR A start-a end-a B start-b end-b edits edx epo p-lim) - ;; (if (< (- end-a start-a) p-lim) (slib:warn 'diff2et 'len-a (- end-a start-a) 'len-b (- end-b start-b) 'p-lim p-lim)) - (do ((bdx (+ -1 end-b) (+ -1 bdx)) - (adx (+ -1 end-a) (+ -1 adx))) - ((not (and (<= start-b bdx) - (<= start-a adx) - (eqv? (array-ref A adx) (array-ref B bdx)))) - (do ((bsx start-b (+ 1 bsx)) - (asx start-a (+ 1 asx))) - ((not (and (< bsx bdx) - (< asx adx) - (eqv? (array-ref A asx) (array-ref B bsx)))) - ;;(print 'trim-et (- asx start-a) '+ (- end-a adx)) - (let ((delta (- (- bdx bsx) (- adx asx)))) - (if (negative? delta) - (diff2ez fp fpoff CCRR B bsx (+ 1 bdx) A asx (+ 1 adx) - edits edx (- epo) (+ delta p-lim)) - (diff2ez fp fpoff CCRR A asx (+ 1 adx) B bsx (+ 1 bdx) - edits edx epo p-lim)))) - ;;(set! tick (+ 1 tick)) - )) - ;;(set! tick (+ 1 tick)) - )) - -;;; Diff sub-arrays, A not longer than B. Returns edit-length -(define (diff2ez fp fpoff CCRR A start-a end-a B start-b end-b edits edx epo p-lim) - (define len-a (- end-a start-a)) - (define len-b (- end-b start-b)) - ;;(if (> len-a len-b) (slib:error 'diff2ez len-a '> len-b)) - (cond ((zero? p-lim) ; B inserts only - (if (= len-b len-a) - 0 ; A = B; no edits - (let loop ((adx start-a) - (bdx start-b) - (edx edx)) - (cond ((>= bdx end-b) (- len-b len-a)) - ((>= adx end-a) - (do ((idx bdx (+ 1 idx)) - (edx edx (+ 1 edx))) - ((>= idx end-b) (- len-b len-a)) - (array-set! edits (* epo (+ 1 idx)) edx))) - ((eqv? (array-ref A adx) (array-ref B bdx)) - ;;(set! tick (+ 1 tick)) - (loop (+ 1 adx) (+ 1 bdx) edx)) - (else (array-set! edits (* epo (+ 1 bdx)) edx) - ;;(set! tick (+ 1 tick)) - (loop adx (+ 1 bdx) (+ 1 edx))))))) - ((<= len-a p-lim) ; delete all A; insert all B - ;;(if (< len-a p-lim) (slib:error 'diff2ez len-a len-b 'p-lim p-lim)) - (do ((idx start-a (+ 1 idx)) - (jdx start-b (+ 1 jdx))) - ((and (>= idx end-a) (>= jdx end-b)) (+ len-a len-b)) - (cond ((< jdx end-b) - (array-set! edits (* epo (+ 1 jdx)) edx) - (set! edx (+ 1 edx)))) - (cond ((< idx end-a) - (array-set! edits (* epo (- -1 idx)) edx) - (set! edx (+ 1 edx)))))) - (else (diff:divide-and-conquer - fp fpoff CCRR A start-a end-a B start-b end-b - edits edx epo p-lim)))) - -(define (check-cost! name est cost) - (if (not (eqv? est cost)) - (slib:warn name "cost check failed" est '!= cost))) - -;;;; Routines interfacing API layer to algorithms. - -(define (diff:invert-edits! edits) - (define cost (car (array-dimensions edits))) - (do ((idx (+ -1 cost) (+ -1 idx))) - ((negative? idx)) - (array-set! edits (- (array-ref edits idx)) idx))) - -;;; len-a < len-b -(define (edits2lcs! lcs edits A) - (define cost (car (array-dimensions edits))) - (define len-a (car (array-dimensions A))) - (let loop ((edx 0) - (sdx 0) - (adx 0)) - (let ((edit (if (< edx cost) (array-ref edits edx) 0))) - (cond ((>= adx len-a)) - ((positive? edit) - (loop (+ 1 edx) sdx adx)) - ((zero? edit) - (array-set! lcs (array-ref A adx) sdx) - (loop edx (+ 1 sdx) (+ 1 adx))) - ((>= adx (- -1 edit)) - (loop (+ 1 edx) sdx (+ 1 adx))) - (else - (array-set! lcs (array-ref A adx) sdx) - (loop edx (+ 1 sdx) (+ 1 adx))))))) - -;; A not longer than B (M <= N) -(define (diff2edits! edits fp CCRR A B) - (define N (car (array-dimensions B))) - (define M (car (array-dimensions A))) - (define est (car (array-dimensions edits))) - (let ((p-lim (quotient (- est (- N M)) 2))) - (check-cost! 'diff2edits! - est - (diff2et fp (+ 1 p-lim) - CCRR A 0 M B 0 N edits 0 1 p-lim)))) - -;; A not longer than B (M <= N) -(define (diff2editlen fp A B p-lim) - (define N (car (array-dimensions B))) - (define M (car (array-dimensions A))) - (let ((maxdx (if (negative? p-lim) (+ 1 N) (+ 1 p-lim (- N M)))) - (mindx (if (negative? p-lim) (- (+ 1 M)) (- (+ 1 p-lim))))) - (fp:init! fp (- mindx) -1 mindx maxdx) - (fp:compare fp (- mindx) #f A M B N p-lim))) - - -;;; -;;; Public API. -;;; - -;;@args array1 array2 p-lim -;;@args array1 array2 -;;@1 and @2 are one-dimensional arrays. -;; -;;The non-negative integer @3, if provided, is maximum number of -;;deletions of the shorter sequence to allow. @0 will return @code{#f} -;;if more deletions would be necessary. -;; -;;@0 returns a one-dimensional array of length @code{(quotient (- (+ -;;len1 len2) (diff:edit-length @1 @2)) 2)} holding the longest sequence -;;common to both @var{array}s. -(define (diff:longest-common-subsequence A B . p-lim) - (define M (car (array-dimensions A))) - (define N (car (array-dimensions B))) - (set! p-lim (if (null? p-lim) -1 (car p-lim))) - (let ((edits (if (< N M) - (diff:edits B A p-lim) - (diff:edits A B p-lim)))) - (and edits - (let* ((cost (car (array-dimensions edits))) - (lcs (make-typed-array (array-type A) *unspecified* - (/ (- (+ N M) cost) 2)))) - (edits2lcs! lcs edits (if (< N M) B A)) - lcs)))) - -;;@args array1 array2 p-lim -;;@args array1 array2 -;;@1 and @2 are one-dimensional arrays. -;; -;;The non-negative integer @3, if provided, is maximum number of -;;deletions of the shorter sequence to allow. @0 will return @code{#f} -;;if more deletions would be necessary. -;; -;;@0 returns a vector of length @code{(diff:edit-length @1 @2)} composed -;;of a shortest sequence of edits transformaing @1 to @2. -;; -;;Each edit is an integer: -;;@table @asis -;;@item @var{k} > 0 -;;Inserts @code{(array-ref @1 (+ -1 @var{j}))} into the sequence. -;;@item @var{k} < 0 -;;Deletes @code{(array-ref @2 (- -1 @var{k}))} from the sequence. -;;@end table -(define (diff:edits A B . p-lim) - (define M (car (array-dimensions A))) - (define N (car (array-dimensions B))) - (set! p-lim (if (null? p-lim) -1 (car p-lim))) - (let ((fp (make-typed-array 's32 0 - (if (negative? p-lim) - (+ 3 M N) - (+ 3 (abs (- N M)) p-lim p-lim))))) - (define est (if (< N M) - (diff2editlen fp B A p-lim) - (diff2editlen fp A B p-lim))) - (and est - (let ((edits (make-typed-array 's32 0 est)) - (CCRR (make-typed-array 's32 0 (* 2 (+ (max M N) 1))))) - (cond ((< N M) - (diff2edits! edits fp CCRR B A) - (diff:invert-edits! edits)) - (else - (diff2edits! edits fp CCRR A B))) - ;;(diff:order-edits! edits est) - edits)))) - -;;@args array1 array2 p-lim -;;@args array1 array2 -;;@1 and @2 are one-dimensional arrays. -;; -;;The non-negative integer @3, if provided, is maximum number of -;;deletions of the shorter sequence to allow. @0 will return @code{#f} -;;if more deletions would be necessary. -;; -;;@0 returns the length of the shortest sequence of edits transformaing -;;@1 to @2. -(define (diff:edit-length A B . p-lim) - (define M (car (array-dimensions A))) - (define N (car (array-dimensions B))) - (set! p-lim (if (null? p-lim) -1 (car p-lim))) - (let ((fp (make-typed-array 's32 0 - (if (negative? p-lim) - (+ 3 M N) - (+ 3 (abs (- N M)) p-lim p-lim))))) - (if (< N M) - (diff2editlen fp B A p-lim) - (diff2editlen fp A B p-lim)))) - -;;@example -;;(diff:longest-common-subsequence "fghiejcklm" "fgehijkpqrlm") -;;@result{} "fghijklm" -;; -;;(diff:edit-length "fghiejcklm" "fgehijkpqrlm") -;;@result{} 6 -;; -;;(diff:edits "fghiejcklm" "fgehijkpqrlm") -;;@result{} #A:fixZ32b(3 -5 -7 8 9 10) -;; ; e c h p q r -;;@end example - -;;(trace-all "/home/jaffer/slib/differ.scm")(set! *qp-width* 333)(untrace fp:run fp:subarray) - - -;;;arch-tag: 8e80eb1d-fb11-4872-895a-8adcee26580d |