aboutsummaryrefslogtreecommitdiff
path: root/src/guile/silex/lexparser.scm
diff options
context:
space:
mode:
authorLudovic Courtès2008-01-18 12:36:59 +0100
committerLudovic Courtès2008-01-18 12:36:59 +0100
commit7efd05778cddec0293e0d48199f3aeee2aad6178 (patch)
tree806be6fc0190c511374f15332c4465e27048b111 /src/guile/silex/lexparser.scm
parenta3b7dfffbda5fe148920c7556244ab35b99109a5 (diff)
downloadskribilo-7efd05778cddec0293e0d48199f3aeee2aad6178.tar.gz
skribilo-7efd05778cddec0293e0d48199f3aeee2aad6178.tar.lz
skribilo-7efd05778cddec0293e0d48199f3aeee2aad6178.zip
Add SILex, for simplicity.
Diffstat (limited to 'src/guile/silex/lexparser.scm')
-rw-r--r--src/guile/silex/lexparser.scm812
1 files changed, 812 insertions, 0 deletions
diff --git a/src/guile/silex/lexparser.scm b/src/guile/silex/lexparser.scm
new file mode 100644
index 0000000..75072ed
--- /dev/null
+++ b/src/guile/silex/lexparser.scm
@@ -0,0 +1,812 @@
+; SILex - Scheme Implementation of Lex
+; Copyright (C) 2001 Danny Dube'
+;
+; This program is free software; you can redistribute it and/or
+; modify it under the terms of the GNU General Public License
+; as published by the Free Software Foundation; either version 2
+; of the License, or (at your option) any later version.
+;
+; This program is distributed in the hope that it will be useful,
+; but WITHOUT ANY WARRANTY; without even the implied warranty of
+; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+; GNU General Public License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with this program; if not, write to the Free Software
+; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+;
+; Fonctions auxilliaires du lexer
+;
+
+(define parse-spec-char
+ (lambda (lexeme line column)
+ (make-tok char-tok lexeme line column newline-ch)))
+
+(define parse-digits-char
+ (lambda (lexeme line column)
+ (let* ((num (substring lexeme 1 (string-length lexeme)))
+ (n (string->number num)))
+ (make-tok char-tok lexeme line column n))))
+
+(define parse-quoted-char
+ (lambda (lexeme line column)
+ (let ((c (string-ref lexeme 1)))
+ (make-tok char-tok lexeme line column (char->integer c)))))
+
+(define parse-ordinary-char
+ (lambda (lexeme line column)
+ (let ((c (string-ref lexeme 0)))
+ (make-tok char-tok lexeme line column (char->integer c)))))
+
+(define string-downcase
+ (lambda (s)
+ (let* ((l (string->list s))
+ (ld (map char-downcase l)))
+ (list->string ld))))
+
+(define extract-id
+ (lambda (s)
+ (let ((len (string-length s)))
+ (substring s 1 (- len 1)))))
+
+(define parse-id
+ (lambda (lexeme line column)
+ (make-tok id-tok lexeme line column (string-downcase lexeme) lexeme)))
+
+(define parse-id-ref
+ (lambda (lexeme line column)
+ (let* ((orig-name (extract-id lexeme))
+ (name (string-downcase orig-name)))
+ (make-tok subst-tok lexeme line column name orig-name))))
+
+(define parse-power-m
+ (lambda (lexeme line column)
+ (let* ((len (string-length lexeme))
+ (substr (substring lexeme 1 (- len 1)))
+ (m (string->number substr))
+ (range (cons m m)))
+ (make-tok power-tok lexeme line column range))))
+
+(define parse-power-m-inf
+ (lambda (lexeme line column)
+ (let* ((len (string-length lexeme))
+ (substr (substring lexeme 1 (- len 2)))
+ (m (string->number substr))
+ (range (cons m 'inf)))
+ (make-tok power-tok lexeme line column range))))
+
+(define parse-power-m-n
+ (lambda (lexeme line column)
+ (let ((len (string-length lexeme)))
+ (let loop ((comma 2))
+ (if (char=? (string-ref lexeme comma) #\,)
+ (let* ((sub1 (substring lexeme 1 comma))
+ (sub2 (substring lexeme (+ comma 1) (- len 1)))
+ (m (string->number sub1))
+ (n (string->number sub2))
+ (range (cons m n)))
+ (make-tok power-tok lexeme line column range))
+ (loop (+ comma 1)))))))
+
+
+
+
+;
+; Lexer generique
+;
+
+(define lexer-raw #f)
+(define lexer-stack '())
+
+(define lexer-alist #f)
+
+(define lexer-buffer #f)
+(define lexer-buffer-empty? #t)
+
+(define lexer-history '())
+(define lexer-history-interp #f)
+
+(define init-lexer
+ (lambda (port)
+ (let* ((IS (lexer-make-IS 'port port 'all))
+ (action-lexer (lexer-make-lexer action-tables IS))
+ (class-lexer (lexer-make-lexer class-tables IS))
+ (macro-lexer (lexer-make-lexer macro-tables IS))
+ (regexp-lexer (lexer-make-lexer regexp-tables IS))
+ (string-lexer (lexer-make-lexer string-tables IS)))
+ (set! lexer-raw #f)
+ (set! lexer-stack '())
+ (set! lexer-alist
+ (list (cons 'action action-lexer)
+ (cons 'class class-lexer)
+ (cons 'macro macro-lexer)
+ (cons 'regexp regexp-lexer)
+ (cons 'string string-lexer)))
+ (set! lexer-buffer-empty? #t)
+ (set! lexer-history '()))))
+
+; Lexer brut
+; S'assurer qu'il n'y a pas de risque de changer de
+; lexer quand le buffer est rempli
+(define push-lexer
+ (lambda (name)
+ (set! lexer-stack (cons lexer-raw lexer-stack))
+ (set! lexer-raw (cdr (assq name lexer-alist)))))
+
+(define pop-lexer
+ (lambda ()
+ (set! lexer-raw (car lexer-stack))
+ (set! lexer-stack (cdr lexer-stack))))
+
+; Traite le "unget" (capacite du unget: 1)
+(define lexer2
+ (lambda ()
+ (if lexer-buffer-empty?
+ (lexer-raw)
+ (begin
+ (set! lexer-buffer-empty? #t)
+ lexer-buffer))))
+
+(define lexer2-unget
+ (lambda (tok)
+ (set! lexer-buffer tok)
+ (set! lexer-buffer-empty? #f)))
+
+; Traite l'historique
+(define lexer
+ (lambda ()
+ (let* ((tok (lexer2))
+ (tok-lexeme (get-tok-lexeme tok))
+ (hist-lexeme (if lexer-history-interp
+ (blank-translate tok-lexeme)
+ tok-lexeme)))
+ (set! lexer-history (cons hist-lexeme lexer-history))
+ tok)))
+
+(define lexer-unget
+ (lambda (tok)
+ (set! lexer-history (cdr lexer-history))
+ (lexer2-unget tok)))
+
+(define lexer-set-blank-history
+ (lambda (b)
+ (set! lexer-history-interp b)))
+
+(define blank-translate
+ (lambda (s)
+ (let ((ss (string-copy s)))
+ (let loop ((i (- (string-length ss) 1)))
+ (cond ((< i 0)
+ ss)
+ ((char=? (string-ref ss i) (integer->char tab-ch))
+ (loop (- i 1)))
+ ((char=? (string-ref ss i) #\newline)
+ (loop (- i 1)))
+ (else
+ (string-set! ss i #\space)
+ (loop (- i 1))))))))
+
+(define lexer-get-history
+ (lambda ()
+ (let* ((rightlist (reverse lexer-history))
+ (str (apply string-append rightlist))
+ (strlen (string-length str))
+ (str2 (if (and (> strlen 0)
+ (char=? (string-ref str (- strlen 1)) #\newline))
+ str
+ (string-append str (string #\newline)))))
+ (set! lexer-history '())
+ str2)))
+
+
+
+
+;
+; Traitement des listes de tokens
+;
+
+(define de-anchor-tokens
+ (let ((not-anchor-toks (make-dispatch-table number-of-tokens
+ (list (cons caret-tok #f)
+ (cons dollar-tok #f)
+ (cons <<EOF>>-tok #f)
+ (cons <<ERROR>>-tok #f))
+ #t)))
+ (lambda (tok-list)
+ (if (null? tok-list)
+ '()
+ (let* ((tok (car tok-list))
+ (tok-type (get-tok-type tok))
+ (toks (cdr tok-list))
+ (new-toks (de-anchor-tokens toks)))
+ (cond ((vector-ref not-anchor-toks tok-type)
+ (cons tok new-toks))
+ ((or (= tok-type caret-tok) (= tok-type dollar-tok))
+ (let* ((line (get-tok-line tok))
+ (column (get-tok-column tok))
+ (attr (if (= tok-type caret-tok) caret-ch dollar-ch))
+ (new-tok (make-tok char-tok "" line column attr)))
+ (cons new-tok new-toks)))
+ ((= tok-type <<EOF>>-tok)
+ (lex-error (get-tok-line tok)
+ (get-tok-column tok)
+ "the <<EOF>> anchor must be used alone"
+ " and only after %%."))
+ ((= tok-type <<ERROR>>-tok)
+ (lex-error (get-tok-line tok)
+ (get-tok-column tok)
+ "the <<ERROR>> anchor must be used alone"
+ " and only after %%."))))))))
+
+(define strip-end
+ (lambda (l)
+ (if (null? (cdr l))
+ '()
+ (cons (car l) (strip-end (cdr l))))))
+
+(define extract-anchors
+ (lambda (tok-list)
+ (let* ((tok1 (car tok-list))
+ (line (get-tok-line tok1))
+ (tok1-type (get-tok-type tok1)))
+ (cond ((and (= tok1-type <<EOF>>-tok) (null? (cdr tok-list)))
+ (make-rule line #t #f #f #f '() #f))
+ ((and (= tok1-type <<ERROR>>-tok) (null? (cdr tok-list)))
+ (make-rule line #f #t #f #f '() #f))
+ (else
+ (let* ((bol? (= tok1-type caret-tok))
+ (tok-list2 (if bol? (cdr tok-list) tok-list)))
+ (if (null? tok-list2)
+ (make-rule line #f #f bol? #f tok-list2 #f)
+ (let* ((len (length tok-list2))
+ (tok2 (list-ref tok-list2 (- len 1)))
+ (tok2-type (get-tok-type tok2))
+ (eol? (= tok2-type dollar-tok))
+ (tok-list3 (if eol?
+ (strip-end tok-list2)
+ tok-list2)))
+ (make-rule line #f #f bol? eol? tok-list3 #f)))))))))
+
+(define char-list->conc
+ (lambda (char-list)
+ (if (null? char-list)
+ (make-re epsilon-re)
+ (let loop ((cl char-list))
+ (let* ((c (car cl))
+ (cl2 (cdr cl)))
+ (if (null? cl2)
+ (make-re char-re c)
+ (make-re conc-re (make-re char-re c) (loop cl2))))))))
+
+(define parse-tokens-atom
+ (let ((action-table
+ (make-dispatch-table
+ number-of-tokens
+ (list (cons lpar-tok
+ (lambda (tok tok-list macros)
+ (parse-tokens-sub tok-list macros)))
+ (cons dot-tok
+ (lambda (tok tok-list macros)
+ (cons (make-re class-re dot-class) (cdr tok-list))))
+ (cons subst-tok
+ (lambda (tok tok-list macros)
+ (let* ((name (get-tok-attr tok))
+ (ass (assoc name macros)))
+ (if ass
+ (cons (cdr ass) (cdr tok-list))
+ (lex-error (get-tok-line tok)
+ (get-tok-column tok)
+ "unknown macro \""
+ (get-tok-2nd-attr tok)
+ "\".")))))
+ (cons char-tok
+ (lambda (tok tok-list macros)
+ (let ((c (get-tok-attr tok)))
+ (cons (make-re char-re c) (cdr tok-list)))))
+ (cons class-tok
+ (lambda (tok tok-list macros)
+ (let ((class (get-tok-attr tok)))
+ (cons (make-re class-re class) (cdr tok-list)))))
+ (cons string-tok
+ (lambda (tok tok-list macros)
+ (let* ((char-list (get-tok-attr tok))
+ (re (char-list->conc char-list)))
+ (cons re (cdr tok-list))))))
+ (lambda (tok tok-list macros)
+ (lex-error (get-tok-line tok)
+ (get-tok-column tok)
+ "syntax error in regular expression.")))))
+ (lambda (tok-list macros)
+ (let* ((tok (car tok-list))
+ (tok-type (get-tok-type tok))
+ (action (vector-ref action-table tok-type)))
+ (action tok tok-list macros)))))
+
+(define check-power-tok
+ (lambda (tok)
+ (let* ((range (get-tok-attr tok))
+ (start (car range))
+ (end (cdr range)))
+ (if (or (eq? 'inf end) (<= start end))
+ range
+ (lex-error (get-tok-line tok)
+ (get-tok-column tok)
+ "incorrect power specification.")))))
+
+(define power->star-plus
+ (lambda (re range)
+ (power->star-plus-rec re (car range) (cdr range))))
+
+(define power->star-plus-rec
+ (lambda (re start end)
+ (cond ((eq? end 'inf)
+ (cond ((= start 0)
+ (make-re star-re re))
+ ((= start 1)
+ (make-re plus-re re))
+ (else
+ (make-re conc-re
+ re
+ (power->star-plus-rec re (- start 1) 'inf)))))
+ ((= start 0)
+ (cond ((= end 0)
+ (make-re epsilon-re))
+ ((= end 1)
+ (make-re question-re re))
+ (else
+ (make-re question-re
+ (power->star-plus-rec re 1 end)))))
+ ((= start 1)
+ (if (= end 1)
+ re
+ (make-re conc-re re (power->star-plus-rec re 0 (- end 1)))))
+ (else
+ (make-re conc-re
+ re
+ (power->star-plus-rec re (- start 1) (- end 1)))))))
+
+(define parse-tokens-fact
+ (let ((not-op-toks (make-dispatch-table number-of-tokens
+ (list (cons question-tok #f)
+ (cons plus-tok #f)
+ (cons star-tok #f)
+ (cons power-tok #f))
+ #t)))
+ (lambda (tok-list macros)
+ (let* ((result (parse-tokens-atom tok-list macros))
+ (re (car result))
+ (tok-list2 (cdr result)))
+ (let loop ((re re) (tok-list3 tok-list2))
+ (let* ((tok (car tok-list3))
+ (tok-type (get-tok-type tok)))
+ (cond ((vector-ref not-op-toks tok-type)
+ (cons re tok-list3))
+ ((= tok-type question-tok)
+ (loop (make-re question-re re) (cdr tok-list3)))
+ ((= tok-type plus-tok)
+ (loop (make-re plus-re re) (cdr tok-list3)))
+ ((= tok-type star-tok)
+ (loop (make-re star-re re) (cdr tok-list3)))
+ ((= tok-type power-tok)
+ (loop (power->star-plus re (check-power-tok tok))
+ (cdr tok-list3))))))))))
+
+(define parse-tokens-conc
+ (lambda (tok-list macros)
+ (let* ((result1 (parse-tokens-fact tok-list macros))
+ (re1 (car result1))
+ (tok-list2 (cdr result1))
+ (tok (car tok-list2))
+ (tok-type (get-tok-type tok)))
+ (cond ((or (= tok-type pipe-tok)
+ (= tok-type rpar-tok))
+ result1)
+ (else ; Autres facteurs
+ (let* ((result2 (parse-tokens-conc tok-list2 macros))
+ (re2 (car result2))
+ (tok-list3 (cdr result2)))
+ (cons (make-re conc-re re1 re2) tok-list3)))))))
+
+(define parse-tokens-or
+ (lambda (tok-list macros)
+ (let* ((result1 (parse-tokens-conc tok-list macros))
+ (re1 (car result1))
+ (tok-list2 (cdr result1))
+ (tok (car tok-list2))
+ (tok-type (get-tok-type tok)))
+ (cond ((= tok-type pipe-tok)
+ (let* ((tok-list3 (cdr tok-list2))
+ (result2 (parse-tokens-or tok-list3 macros))
+ (re2 (car result2))
+ (tok-list4 (cdr result2)))
+ (cons (make-re or-re re1 re2) tok-list4)))
+ (else ; rpar-tok
+ result1)))))
+
+(define parse-tokens-sub
+ (lambda (tok-list macros)
+ (let* ((tok-list2 (cdr tok-list)) ; Manger le lpar-tok
+ (result (parse-tokens-or tok-list2 macros))
+ (re (car result))
+ (tok-list3 (cdr result))
+ (tok-list4 (cdr tok-list3))) ; Manger le rpar-tok
+ (cons re tok-list4))))
+
+(define parse-tokens-match
+ (lambda (tok-list line)
+ (let loop ((tl tok-list) (count 0))
+ (if (null? tl)
+ (if (> count 0)
+ (lex-error line
+ #f
+ "mismatched parentheses."))
+ (let* ((tok (car tl))
+ (tok-type (get-tok-type tok)))
+ (cond ((= tok-type lpar-tok)
+ (loop (cdr tl) (+ count 1)))
+ ((= tok-type rpar-tok)
+ (if (zero? count)
+ (lex-error line
+ #f
+ "mismatched parentheses."))
+ (loop (cdr tl) (- count 1)))
+ (else
+ (loop (cdr tl) count))))))))
+
+; Ne traite pas les anchors
+(define parse-tokens
+ (lambda (tok-list macros)
+ (if (null? tok-list)
+ (make-re epsilon-re)
+ (let ((line (get-tok-line (car tok-list))))
+ (parse-tokens-match tok-list line)
+ (let* ((begin-par (make-tok lpar-tok "" line 1))
+ (end-par (make-tok rpar-tok "" line 1)))
+ (let* ((tok-list2 (append (list begin-par)
+ tok-list
+ (list end-par)))
+ (result (parse-tokens-sub tok-list2 macros)))
+ (car result))))))) ; (cdr result) == () obligatoirement
+
+(define tokens->regexp
+ (lambda (tok-list macros)
+ (let ((tok-list2 (de-anchor-tokens tok-list)))
+ (parse-tokens tok-list2 macros))))
+
+(define tokens->rule
+ (lambda (tok-list macros)
+ (let* ((rule (extract-anchors tok-list))
+ (tok-list2 (get-rule-regexp rule))
+ (tok-list3 (de-anchor-tokens tok-list2))
+ (re (parse-tokens tok-list3 macros)))
+ (set-rule-regexp rule re)
+ rule)))
+
+; Retourne une paire: <<EOF>>-action et vecteur des regles ordinaires
+(define adapt-rules
+ (lambda (rules)
+ (let loop ((r rules) (revr '()) (<<EOF>>-action #f) (<<ERROR>>-action #f))
+ (if (null? r)
+ (cons (or <<EOF>>-action default-<<EOF>>-action)
+ (cons (or <<ERROR>>-action default-<<ERROR>>-action)
+ (list->vector (reverse revr))))
+ (let ((r1 (car r)))
+ (cond ((get-rule-eof? r1)
+ (if <<EOF>>-action
+ (lex-error (get-rule-line r1)
+ #f
+ "the <<EOF>> anchor can be "
+ "used at most once.")
+ (loop (cdr r)
+ revr
+ (get-rule-action r1)
+ <<ERROR>>-action)))
+ ((get-rule-error? r1)
+ (if <<ERROR>>-action
+ (lex-error (get-rule-line r1)
+ #f
+ "the <<ERROR>> anchor can be "
+ "used at most once.")
+ (loop (cdr r)
+ revr
+ <<EOF>>-action
+ (get-rule-action r1))))
+ (else
+ (loop (cdr r)
+ (cons r1 revr)
+ <<EOF>>-action
+ <<ERROR>>-action))))))))
+
+
+
+
+;
+; Analyseur de fichier lex
+;
+
+(define parse-hv-blanks
+ (lambda ()
+ (let* ((tok (lexer))
+ (tok-type (get-tok-type tok)))
+ (if (or (= tok-type hblank-tok)
+ (= tok-type vblank-tok))
+ (parse-hv-blanks)
+ (lexer-unget tok)))))
+
+(define parse-class-range
+ (lambda ()
+ (let* ((tok (lexer))
+ (tok-type (get-tok-type tok)))
+ (cond ((= tok-type char-tok)
+ (let* ((c (get-tok-attr tok))
+ (tok2 (lexer))
+ (tok2-type (get-tok-type tok2)))
+ (if (not (= tok2-type minus-tok))
+ (begin
+ (lexer-unget tok2)
+ (cons c c))
+ (let* ((tok3 (lexer))
+ (tok3-type (get-tok-type tok3)))
+ (cond ((= tok3-type char-tok)
+ (let ((c2 (get-tok-attr tok3)))
+ (if (> c c2)
+ (lex-error (get-tok-line tok3)
+ (get-tok-column tok3)
+ "bad range specification in "
+ "character class;"
+ #\newline
+ "the start character is "
+ "higher than the end one.")
+ (cons c c2))))
+ ((or (= tok3-type rbrack-tok)
+ (= tok3-type minus-tok))
+ (lex-error (get-tok-line tok3)
+ (get-tok-column tok3)
+ "bad range specification in "
+ "character class; a specification"
+ #\newline
+ "like \"-x\", \"x--\" or \"x-]\" has "
+ "been used."))
+ ((= tok3-type eof-tok)
+ (lex-error (get-tok-line tok3)
+ #f
+ "eof of file found while parsing "
+ "a character class.")))))))
+ ((= tok-type minus-tok)
+ (lex-error (get-tok-line tok)
+ (get-tok-column tok)
+ "bad range specification in character class; a "
+ "specification"
+ #\newline
+ "like \"-x\", \"x--\" or \"x-]\" has been used."))
+ ((= tok-type rbrack-tok)
+ #f)
+ ((= tok-type eof-tok)
+ (lex-error (get-tok-line tok)
+ #f
+ "eof of file found while parsing "
+ "a character class."))))))
+
+(define parse-class
+ (lambda (initial-class negative-class? line column)
+ (push-lexer 'class)
+ (let loop ((class initial-class))
+ (let ((new-range (parse-class-range)))
+ (if new-range
+ (loop (class-union (list new-range) class))
+ (let ((class (if negative-class?
+ (class-compl class)
+ class)))
+ (pop-lexer)
+ (make-tok class-tok "" line column class)))))))
+
+(define parse-string
+ (lambda (line column)
+ (push-lexer 'string)
+ (let ((char-list (let loop ()
+ (let* ((tok (lexer))
+ (tok-type (get-tok-type tok)))
+ (cond ((= tok-type char-tok)
+ (cons (get-tok-attr tok) (loop)))
+ ((= tok-type doublequote-tok)
+ (pop-lexer)
+ '())
+ (else ; eof-tok
+ (lex-error (get-tok-line tok)
+ #f
+ "end of file found while "
+ "parsing a string.")))))))
+ (make-tok string-tok "" line column char-list))))
+
+(define parse-regexp
+ (let* ((end-action
+ (lambda (tok loop)
+ (lexer-unget tok)
+ (pop-lexer)
+ (lexer-set-blank-history #f)
+ `()))
+ (action-table
+ (make-dispatch-table
+ number-of-tokens
+ (list (cons eof-tok end-action)
+ (cons hblank-tok end-action)
+ (cons vblank-tok end-action)
+ (cons lbrack-tok
+ (lambda (tok loop)
+ (let ((tok1 (parse-class (list)
+ #f
+ (get-tok-line tok)
+ (get-tok-column tok))))
+ (cons tok1 (loop)))))
+ (cons lbrack-rbrack-tok
+ (lambda (tok loop)
+ (let ((tok1 (parse-class
+ (list (cons rbrack-ch rbrack-ch))
+ #f
+ (get-tok-line tok)
+ (get-tok-column tok))))
+ (cons tok1 (loop)))))
+ (cons lbrack-caret-tok
+ (lambda (tok loop)
+ (let ((tok1 (parse-class (list)
+ #t
+ (get-tok-line tok)
+ (get-tok-column tok))))
+ (cons tok1 (loop)))))
+ (cons lbrack-minus-tok
+ (lambda (tok loop)
+ (let ((tok1 (parse-class
+ (list (cons minus-ch minus-ch))
+ #f
+ (get-tok-line tok)
+ (get-tok-column tok))))
+ (cons tok1 (loop)))))
+ (cons doublequote-tok
+ (lambda (tok loop)
+ (let ((tok1 (parse-string (get-tok-line tok)
+ (get-tok-column tok))))
+ (cons tok1 (loop)))))
+ (cons illegal-tok
+ (lambda (tok loop)
+ (lex-error (get-tok-line tok)
+ (get-tok-column tok)
+ "syntax error in macro reference."))))
+ (lambda (tok loop)
+ (cons tok (loop))))))
+ (lambda ()
+ (push-lexer 'regexp)
+ (lexer-set-blank-history #t)
+ (parse-hv-blanks)
+ (let loop ()
+ (let* ((tok (lexer))
+ (tok-type (get-tok-type tok))
+ (action (vector-ref action-table tok-type)))
+ (action tok loop))))))
+
+(define parse-ws1-regexp ; Exige un blanc entre le nom et la RE d'une macro
+ (lambda ()
+ (let* ((tok (lexer))
+ (tok-type (get-tok-type tok)))
+ (cond ((or (= tok-type hblank-tok) (= tok-type vblank-tok))
+ (parse-regexp))
+ (else ; percent-percent-tok, id-tok ou illegal-tok
+ (lex-error (get-tok-line tok)
+ (get-tok-column tok)
+ "white space expected."))))))
+
+(define parse-macro
+ (lambda (macros)
+ (push-lexer 'macro)
+ (parse-hv-blanks)
+ (let* ((tok (lexer))
+ (tok-type (get-tok-type tok)))
+ (cond ((= tok-type id-tok)
+ (let* ((name (get-tok-attr tok))
+ (ass (assoc name macros)))
+ (if ass
+ (lex-error (get-tok-line tok)
+ (get-tok-column tok)
+ "the macro \""
+ (get-tok-2nd-attr tok)
+ "\" has already been defined.")
+ (let* ((tok-list (parse-ws1-regexp))
+ (regexp (tokens->regexp tok-list macros)))
+ (pop-lexer)
+ (cons name regexp)))))
+ ((= tok-type percent-percent-tok)
+ (pop-lexer)
+ #f)
+ ((= tok-type illegal-tok)
+ (lex-error (get-tok-line tok)
+ (get-tok-column tok)
+ "macro name expected."))
+ ((= tok-type eof-tok)
+ (lex-error (get-tok-line tok)
+ #f
+ "end of file found before %%."))))))
+
+(define parse-macros
+ (lambda ()
+ (let loop ((macros '()))
+ (let ((macro (parse-macro macros)))
+ (if macro
+ (loop (cons macro macros))
+ macros)))))
+
+(define parse-action-end
+ (lambda (<<EOF>>-action? <<ERROR>>-action? action?)
+ (let ((act (lexer-get-history)))
+ (cond (action?
+ act)
+ (<<EOF>>-action?
+ (string-append act default-<<EOF>>-action))
+ (<<ERROR>>-action?
+ (string-append act default-<<ERROR>>-action))
+ (else
+ (string-append act default-action))))))
+
+(define parse-action
+ (lambda (<<EOF>>-action? <<ERROR>>-action?)
+ (push-lexer 'action)
+ (let loop ((action? #f))
+ (let* ((tok (lexer))
+ (tok-type (get-tok-type tok)))
+ (cond ((= tok-type char-tok)
+ (loop #t))
+ ((= tok-type hblank-tok)
+ (loop action?))
+ ((= tok-type vblank-tok)
+ (push-lexer 'regexp)
+ (let* ((tok (lexer))
+ (tok-type (get-tok-type tok))
+ (bidon (lexer-unget tok)))
+ (pop-lexer)
+ (if (or (= tok-type hblank-tok)
+ (= tok-type vblank-tok))
+ (loop action?)
+ (begin
+ (pop-lexer)
+ (parse-action-end <<EOF>>-action?
+ <<ERROR>>-action?
+ action?)))))
+ (else ; eof-tok
+ (lexer-unget tok)
+ (pop-lexer)
+ (parse-action-end <<EOF>>-action?
+ <<ERROR>>-action?
+ action?)))))))
+
+(define parse-rule
+ (lambda (macros)
+ (let ((tok-list (parse-regexp)))
+ (if (null? tok-list)
+ #f
+ (let* ((rule (tokens->rule tok-list macros))
+ (action
+ (parse-action (get-rule-eof? rule) (get-rule-error? rule))))
+ (set-rule-action rule action)
+ rule)))))
+
+(define parse-rules
+ (lambda (macros)
+ (parse-action #f #f)
+ (let loop ()
+ (let ((rule (parse-rule macros)))
+ (if rule
+ (cons rule (loop))
+ '())))))
+
+(define parser
+ (lambda (filename)
+ (let* ((port (open-input-file filename))
+ (port-open? #t))
+ (lex-unwind-protect (lambda ()
+ (if port-open?
+ (close-input-port port))))
+ (init-lexer port)
+ (let* ((macros (parse-macros))
+ (rules (parse-rules macros)))
+ (close-input-port port)
+ (set! port-open? #f)
+ (adapt-rules rules)))))