diff options
author | Ludovic Courtès | 2008-01-18 12:36:59 +0100 |
---|---|---|
committer | Ludovic Courtès | 2008-01-18 12:36:59 +0100 |
commit | 7efd05778cddec0293e0d48199f3aeee2aad6178 (patch) | |
tree | 806be6fc0190c511374f15332c4465e27048b111 /src/guile/silex/lexparser.scm | |
parent | a3b7dfffbda5fe148920c7556244ab35b99109a5 (diff) | |
download | skribilo-7efd05778cddec0293e0d48199f3aeee2aad6178.tar.gz skribilo-7efd05778cddec0293e0d48199f3aeee2aad6178.tar.lz skribilo-7efd05778cddec0293e0d48199f3aeee2aad6178.zip |
Add SILex, for simplicity.
Diffstat (limited to 'src/guile/silex/lexparser.scm')
-rw-r--r-- | src/guile/silex/lexparser.scm | 812 |
1 files changed, 812 insertions, 0 deletions
diff --git a/src/guile/silex/lexparser.scm b/src/guile/silex/lexparser.scm new file mode 100644 index 0000000..75072ed --- /dev/null +++ b/src/guile/silex/lexparser.scm @@ -0,0 +1,812 @@ +; SILex - Scheme Implementation of Lex +; Copyright (C) 2001 Danny Dube' +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation; either version 2 +; of the License, or (at your option) any later version. +; +; This program is distributed in the hope that it will be useful, +; but WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +; GNU General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, write to the Free Software +; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +; +; Fonctions auxilliaires du lexer +; + +(define parse-spec-char + (lambda (lexeme line column) + (make-tok char-tok lexeme line column newline-ch))) + +(define parse-digits-char + (lambda (lexeme line column) + (let* ((num (substring lexeme 1 (string-length lexeme))) + (n (string->number num))) + (make-tok char-tok lexeme line column n)))) + +(define parse-quoted-char + (lambda (lexeme line column) + (let ((c (string-ref lexeme 1))) + (make-tok char-tok lexeme line column (char->integer c))))) + +(define parse-ordinary-char + (lambda (lexeme line column) + (let ((c (string-ref lexeme 0))) + (make-tok char-tok lexeme line column (char->integer c))))) + +(define string-downcase + (lambda (s) + (let* ((l (string->list s)) + (ld (map char-downcase l))) + (list->string ld)))) + +(define extract-id + (lambda (s) + (let ((len (string-length s))) + (substring s 1 (- len 1))))) + +(define parse-id + (lambda (lexeme line column) + (make-tok id-tok lexeme line column (string-downcase lexeme) lexeme))) + +(define parse-id-ref + (lambda (lexeme line column) + (let* ((orig-name (extract-id lexeme)) + (name (string-downcase orig-name))) + (make-tok subst-tok lexeme line column name orig-name)))) + +(define parse-power-m + (lambda (lexeme line column) + (let* ((len (string-length lexeme)) + (substr (substring lexeme 1 (- len 1))) + (m (string->number substr)) + (range (cons m m))) + (make-tok power-tok lexeme line column range)))) + +(define parse-power-m-inf + (lambda (lexeme line column) + (let* ((len (string-length lexeme)) + (substr (substring lexeme 1 (- len 2))) + (m (string->number substr)) + (range (cons m 'inf))) + (make-tok power-tok lexeme line column range)))) + +(define parse-power-m-n + (lambda (lexeme line column) + (let ((len (string-length lexeme))) + (let loop ((comma 2)) + (if (char=? (string-ref lexeme comma) #\,) + (let* ((sub1 (substring lexeme 1 comma)) + (sub2 (substring lexeme (+ comma 1) (- len 1))) + (m (string->number sub1)) + (n (string->number sub2)) + (range (cons m n))) + (make-tok power-tok lexeme line column range)) + (loop (+ comma 1))))))) + + + + +; +; Lexer generique +; + +(define lexer-raw #f) +(define lexer-stack '()) + +(define lexer-alist #f) + +(define lexer-buffer #f) +(define lexer-buffer-empty? #t) + +(define lexer-history '()) +(define lexer-history-interp #f) + +(define init-lexer + (lambda (port) + (let* ((IS (lexer-make-IS 'port port 'all)) + (action-lexer (lexer-make-lexer action-tables IS)) + (class-lexer (lexer-make-lexer class-tables IS)) + (macro-lexer (lexer-make-lexer macro-tables IS)) + (regexp-lexer (lexer-make-lexer regexp-tables IS)) + (string-lexer (lexer-make-lexer string-tables IS))) + (set! lexer-raw #f) + (set! lexer-stack '()) + (set! lexer-alist + (list (cons 'action action-lexer) + (cons 'class class-lexer) + (cons 'macro macro-lexer) + (cons 'regexp regexp-lexer) + (cons 'string string-lexer))) + (set! lexer-buffer-empty? #t) + (set! lexer-history '())))) + +; Lexer brut +; S'assurer qu'il n'y a pas de risque de changer de +; lexer quand le buffer est rempli +(define push-lexer + (lambda (name) + (set! lexer-stack (cons lexer-raw lexer-stack)) + (set! lexer-raw (cdr (assq name lexer-alist))))) + +(define pop-lexer + (lambda () + (set! lexer-raw (car lexer-stack)) + (set! lexer-stack (cdr lexer-stack)))) + +; Traite le "unget" (capacite du unget: 1) +(define lexer2 + (lambda () + (if lexer-buffer-empty? + (lexer-raw) + (begin + (set! lexer-buffer-empty? #t) + lexer-buffer)))) + +(define lexer2-unget + (lambda (tok) + (set! lexer-buffer tok) + (set! lexer-buffer-empty? #f))) + +; Traite l'historique +(define lexer + (lambda () + (let* ((tok (lexer2)) + (tok-lexeme (get-tok-lexeme tok)) + (hist-lexeme (if lexer-history-interp + (blank-translate tok-lexeme) + tok-lexeme))) + (set! lexer-history (cons hist-lexeme lexer-history)) + tok))) + +(define lexer-unget + (lambda (tok) + (set! lexer-history (cdr lexer-history)) + (lexer2-unget tok))) + +(define lexer-set-blank-history + (lambda (b) + (set! lexer-history-interp b))) + +(define blank-translate + (lambda (s) + (let ((ss (string-copy s))) + (let loop ((i (- (string-length ss) 1))) + (cond ((< i 0) + ss) + ((char=? (string-ref ss i) (integer->char tab-ch)) + (loop (- i 1))) + ((char=? (string-ref ss i) #\newline) + (loop (- i 1))) + (else + (string-set! ss i #\space) + (loop (- i 1)))))))) + +(define lexer-get-history + (lambda () + (let* ((rightlist (reverse lexer-history)) + (str (apply string-append rightlist)) + (strlen (string-length str)) + (str2 (if (and (> strlen 0) + (char=? (string-ref str (- strlen 1)) #\newline)) + str + (string-append str (string #\newline))))) + (set! lexer-history '()) + str2))) + + + + +; +; Traitement des listes de tokens +; + +(define de-anchor-tokens + (let ((not-anchor-toks (make-dispatch-table number-of-tokens + (list (cons caret-tok #f) + (cons dollar-tok #f) + (cons <<EOF>>-tok #f) + (cons <<ERROR>>-tok #f)) + #t))) + (lambda (tok-list) + (if (null? tok-list) + '() + (let* ((tok (car tok-list)) + (tok-type (get-tok-type tok)) + (toks (cdr tok-list)) + (new-toks (de-anchor-tokens toks))) + (cond ((vector-ref not-anchor-toks tok-type) + (cons tok new-toks)) + ((or (= tok-type caret-tok) (= tok-type dollar-tok)) + (let* ((line (get-tok-line tok)) + (column (get-tok-column tok)) + (attr (if (= tok-type caret-tok) caret-ch dollar-ch)) + (new-tok (make-tok char-tok "" line column attr))) + (cons new-tok new-toks))) + ((= tok-type <<EOF>>-tok) + (lex-error (get-tok-line tok) + (get-tok-column tok) + "the <<EOF>> anchor must be used alone" + " and only after %%.")) + ((= tok-type <<ERROR>>-tok) + (lex-error (get-tok-line tok) + (get-tok-column tok) + "the <<ERROR>> anchor must be used alone" + " and only after %%.")))))))) + +(define strip-end + (lambda (l) + (if (null? (cdr l)) + '() + (cons (car l) (strip-end (cdr l)))))) + +(define extract-anchors + (lambda (tok-list) + (let* ((tok1 (car tok-list)) + (line (get-tok-line tok1)) + (tok1-type (get-tok-type tok1))) + (cond ((and (= tok1-type <<EOF>>-tok) (null? (cdr tok-list))) + (make-rule line #t #f #f #f '() #f)) + ((and (= tok1-type <<ERROR>>-tok) (null? (cdr tok-list))) + (make-rule line #f #t #f #f '() #f)) + (else + (let* ((bol? (= tok1-type caret-tok)) + (tok-list2 (if bol? (cdr tok-list) tok-list))) + (if (null? tok-list2) + (make-rule line #f #f bol? #f tok-list2 #f) + (let* ((len (length tok-list2)) + (tok2 (list-ref tok-list2 (- len 1))) + (tok2-type (get-tok-type tok2)) + (eol? (= tok2-type dollar-tok)) + (tok-list3 (if eol? + (strip-end tok-list2) + tok-list2))) + (make-rule line #f #f bol? eol? tok-list3 #f))))))))) + +(define char-list->conc + (lambda (char-list) + (if (null? char-list) + (make-re epsilon-re) + (let loop ((cl char-list)) + (let* ((c (car cl)) + (cl2 (cdr cl))) + (if (null? cl2) + (make-re char-re c) + (make-re conc-re (make-re char-re c) (loop cl2)))))))) + +(define parse-tokens-atom + (let ((action-table + (make-dispatch-table + number-of-tokens + (list (cons lpar-tok + (lambda (tok tok-list macros) + (parse-tokens-sub tok-list macros))) + (cons dot-tok + (lambda (tok tok-list macros) + (cons (make-re class-re dot-class) (cdr tok-list)))) + (cons subst-tok + (lambda (tok tok-list macros) + (let* ((name (get-tok-attr tok)) + (ass (assoc name macros))) + (if ass + (cons (cdr ass) (cdr tok-list)) + (lex-error (get-tok-line tok) + (get-tok-column tok) + "unknown macro \"" + (get-tok-2nd-attr tok) + "\"."))))) + (cons char-tok + (lambda (tok tok-list macros) + (let ((c (get-tok-attr tok))) + (cons (make-re char-re c) (cdr tok-list))))) + (cons class-tok + (lambda (tok tok-list macros) + (let ((class (get-tok-attr tok))) + (cons (make-re class-re class) (cdr tok-list))))) + (cons string-tok + (lambda (tok tok-list macros) + (let* ((char-list (get-tok-attr tok)) + (re (char-list->conc char-list))) + (cons re (cdr tok-list)))))) + (lambda (tok tok-list macros) + (lex-error (get-tok-line tok) + (get-tok-column tok) + "syntax error in regular expression."))))) + (lambda (tok-list macros) + (let* ((tok (car tok-list)) + (tok-type (get-tok-type tok)) + (action (vector-ref action-table tok-type))) + (action tok tok-list macros))))) + +(define check-power-tok + (lambda (tok) + (let* ((range (get-tok-attr tok)) + (start (car range)) + (end (cdr range))) + (if (or (eq? 'inf end) (<= start end)) + range + (lex-error (get-tok-line tok) + (get-tok-column tok) + "incorrect power specification."))))) + +(define power->star-plus + (lambda (re range) + (power->star-plus-rec re (car range) (cdr range)))) + +(define power->star-plus-rec + (lambda (re start end) + (cond ((eq? end 'inf) + (cond ((= start 0) + (make-re star-re re)) + ((= start 1) + (make-re plus-re re)) + (else + (make-re conc-re + re + (power->star-plus-rec re (- start 1) 'inf))))) + ((= start 0) + (cond ((= end 0) + (make-re epsilon-re)) + ((= end 1) + (make-re question-re re)) + (else + (make-re question-re + (power->star-plus-rec re 1 end))))) + ((= start 1) + (if (= end 1) + re + (make-re conc-re re (power->star-plus-rec re 0 (- end 1))))) + (else + (make-re conc-re + re + (power->star-plus-rec re (- start 1) (- end 1))))))) + +(define parse-tokens-fact + (let ((not-op-toks (make-dispatch-table number-of-tokens + (list (cons question-tok #f) + (cons plus-tok #f) + (cons star-tok #f) + (cons power-tok #f)) + #t))) + (lambda (tok-list macros) + (let* ((result (parse-tokens-atom tok-list macros)) + (re (car result)) + (tok-list2 (cdr result))) + (let loop ((re re) (tok-list3 tok-list2)) + (let* ((tok (car tok-list3)) + (tok-type (get-tok-type tok))) + (cond ((vector-ref not-op-toks tok-type) + (cons re tok-list3)) + ((= tok-type question-tok) + (loop (make-re question-re re) (cdr tok-list3))) + ((= tok-type plus-tok) + (loop (make-re plus-re re) (cdr tok-list3))) + ((= tok-type star-tok) + (loop (make-re star-re re) (cdr tok-list3))) + ((= tok-type power-tok) + (loop (power->star-plus re (check-power-tok tok)) + (cdr tok-list3)))))))))) + +(define parse-tokens-conc + (lambda (tok-list macros) + (let* ((result1 (parse-tokens-fact tok-list macros)) + (re1 (car result1)) + (tok-list2 (cdr result1)) + (tok (car tok-list2)) + (tok-type (get-tok-type tok))) + (cond ((or (= tok-type pipe-tok) + (= tok-type rpar-tok)) + result1) + (else ; Autres facteurs + (let* ((result2 (parse-tokens-conc tok-list2 macros)) + (re2 (car result2)) + (tok-list3 (cdr result2))) + (cons (make-re conc-re re1 re2) tok-list3))))))) + +(define parse-tokens-or + (lambda (tok-list macros) + (let* ((result1 (parse-tokens-conc tok-list macros)) + (re1 (car result1)) + (tok-list2 (cdr result1)) + (tok (car tok-list2)) + (tok-type (get-tok-type tok))) + (cond ((= tok-type pipe-tok) + (let* ((tok-list3 (cdr tok-list2)) + (result2 (parse-tokens-or tok-list3 macros)) + (re2 (car result2)) + (tok-list4 (cdr result2))) + (cons (make-re or-re re1 re2) tok-list4))) + (else ; rpar-tok + result1))))) + +(define parse-tokens-sub + (lambda (tok-list macros) + (let* ((tok-list2 (cdr tok-list)) ; Manger le lpar-tok + (result (parse-tokens-or tok-list2 macros)) + (re (car result)) + (tok-list3 (cdr result)) + (tok-list4 (cdr tok-list3))) ; Manger le rpar-tok + (cons re tok-list4)))) + +(define parse-tokens-match + (lambda (tok-list line) + (let loop ((tl tok-list) (count 0)) + (if (null? tl) + (if (> count 0) + (lex-error line + #f + "mismatched parentheses.")) + (let* ((tok (car tl)) + (tok-type (get-tok-type tok))) + (cond ((= tok-type lpar-tok) + (loop (cdr tl) (+ count 1))) + ((= tok-type rpar-tok) + (if (zero? count) + (lex-error line + #f + "mismatched parentheses.")) + (loop (cdr tl) (- count 1))) + (else + (loop (cdr tl) count)))))))) + +; Ne traite pas les anchors +(define parse-tokens + (lambda (tok-list macros) + (if (null? tok-list) + (make-re epsilon-re) + (let ((line (get-tok-line (car tok-list)))) + (parse-tokens-match tok-list line) + (let* ((begin-par (make-tok lpar-tok "" line 1)) + (end-par (make-tok rpar-tok "" line 1))) + (let* ((tok-list2 (append (list begin-par) + tok-list + (list end-par))) + (result (parse-tokens-sub tok-list2 macros))) + (car result))))))) ; (cdr result) == () obligatoirement + +(define tokens->regexp + (lambda (tok-list macros) + (let ((tok-list2 (de-anchor-tokens tok-list))) + (parse-tokens tok-list2 macros)))) + +(define tokens->rule + (lambda (tok-list macros) + (let* ((rule (extract-anchors tok-list)) + (tok-list2 (get-rule-regexp rule)) + (tok-list3 (de-anchor-tokens tok-list2)) + (re (parse-tokens tok-list3 macros))) + (set-rule-regexp rule re) + rule))) + +; Retourne une paire: <<EOF>>-action et vecteur des regles ordinaires +(define adapt-rules + (lambda (rules) + (let loop ((r rules) (revr '()) (<<EOF>>-action #f) (<<ERROR>>-action #f)) + (if (null? r) + (cons (or <<EOF>>-action default-<<EOF>>-action) + (cons (or <<ERROR>>-action default-<<ERROR>>-action) + (list->vector (reverse revr)))) + (let ((r1 (car r))) + (cond ((get-rule-eof? r1) + (if <<EOF>>-action + (lex-error (get-rule-line r1) + #f + "the <<EOF>> anchor can be " + "used at most once.") + (loop (cdr r) + revr + (get-rule-action r1) + <<ERROR>>-action))) + ((get-rule-error? r1) + (if <<ERROR>>-action + (lex-error (get-rule-line r1) + #f + "the <<ERROR>> anchor can be " + "used at most once.") + (loop (cdr r) + revr + <<EOF>>-action + (get-rule-action r1)))) + (else + (loop (cdr r) + (cons r1 revr) + <<EOF>>-action + <<ERROR>>-action)))))))) + + + + +; +; Analyseur de fichier lex +; + +(define parse-hv-blanks + (lambda () + (let* ((tok (lexer)) + (tok-type (get-tok-type tok))) + (if (or (= tok-type hblank-tok) + (= tok-type vblank-tok)) + (parse-hv-blanks) + (lexer-unget tok))))) + +(define parse-class-range + (lambda () + (let* ((tok (lexer)) + (tok-type (get-tok-type tok))) + (cond ((= tok-type char-tok) + (let* ((c (get-tok-attr tok)) + (tok2 (lexer)) + (tok2-type (get-tok-type tok2))) + (if (not (= tok2-type minus-tok)) + (begin + (lexer-unget tok2) + (cons c c)) + (let* ((tok3 (lexer)) + (tok3-type (get-tok-type tok3))) + (cond ((= tok3-type char-tok) + (let ((c2 (get-tok-attr tok3))) + (if (> c c2) + (lex-error (get-tok-line tok3) + (get-tok-column tok3) + "bad range specification in " + "character class;" + #\newline + "the start character is " + "higher than the end one.") + (cons c c2)))) + ((or (= tok3-type rbrack-tok) + (= tok3-type minus-tok)) + (lex-error (get-tok-line tok3) + (get-tok-column tok3) + "bad range specification in " + "character class; a specification" + #\newline + "like \"-x\", \"x--\" or \"x-]\" has " + "been used.")) + ((= tok3-type eof-tok) + (lex-error (get-tok-line tok3) + #f + "eof of file found while parsing " + "a character class."))))))) + ((= tok-type minus-tok) + (lex-error (get-tok-line tok) + (get-tok-column tok) + "bad range specification in character class; a " + "specification" + #\newline + "like \"-x\", \"x--\" or \"x-]\" has been used.")) + ((= tok-type rbrack-tok) + #f) + ((= tok-type eof-tok) + (lex-error (get-tok-line tok) + #f + "eof of file found while parsing " + "a character class.")))))) + +(define parse-class + (lambda (initial-class negative-class? line column) + (push-lexer 'class) + (let loop ((class initial-class)) + (let ((new-range (parse-class-range))) + (if new-range + (loop (class-union (list new-range) class)) + (let ((class (if negative-class? + (class-compl class) + class))) + (pop-lexer) + (make-tok class-tok "" line column class))))))) + +(define parse-string + (lambda (line column) + (push-lexer 'string) + (let ((char-list (let loop () + (let* ((tok (lexer)) + (tok-type (get-tok-type tok))) + (cond ((= tok-type char-tok) + (cons (get-tok-attr tok) (loop))) + ((= tok-type doublequote-tok) + (pop-lexer) + '()) + (else ; eof-tok + (lex-error (get-tok-line tok) + #f + "end of file found while " + "parsing a string."))))))) + (make-tok string-tok "" line column char-list)))) + +(define parse-regexp + (let* ((end-action + (lambda (tok loop) + (lexer-unget tok) + (pop-lexer) + (lexer-set-blank-history #f) + `())) + (action-table + (make-dispatch-table + number-of-tokens + (list (cons eof-tok end-action) + (cons hblank-tok end-action) + (cons vblank-tok end-action) + (cons lbrack-tok + (lambda (tok loop) + (let ((tok1 (parse-class (list) + #f + (get-tok-line tok) + (get-tok-column tok)))) + (cons tok1 (loop))))) + (cons lbrack-rbrack-tok + (lambda (tok loop) + (let ((tok1 (parse-class + (list (cons rbrack-ch rbrack-ch)) + #f + (get-tok-line tok) + (get-tok-column tok)))) + (cons tok1 (loop))))) + (cons lbrack-caret-tok + (lambda (tok loop) + (let ((tok1 (parse-class (list) + #t + (get-tok-line tok) + (get-tok-column tok)))) + (cons tok1 (loop))))) + (cons lbrack-minus-tok + (lambda (tok loop) + (let ((tok1 (parse-class + (list (cons minus-ch minus-ch)) + #f + (get-tok-line tok) + (get-tok-column tok)))) + (cons tok1 (loop))))) + (cons doublequote-tok + (lambda (tok loop) + (let ((tok1 (parse-string (get-tok-line tok) + (get-tok-column tok)))) + (cons tok1 (loop))))) + (cons illegal-tok + (lambda (tok loop) + (lex-error (get-tok-line tok) + (get-tok-column tok) + "syntax error in macro reference.")))) + (lambda (tok loop) + (cons tok (loop)))))) + (lambda () + (push-lexer 'regexp) + (lexer-set-blank-history #t) + (parse-hv-blanks) + (let loop () + (let* ((tok (lexer)) + (tok-type (get-tok-type tok)) + (action (vector-ref action-table tok-type))) + (action tok loop)))))) + +(define parse-ws1-regexp ; Exige un blanc entre le nom et la RE d'une macro + (lambda () + (let* ((tok (lexer)) + (tok-type (get-tok-type tok))) + (cond ((or (= tok-type hblank-tok) (= tok-type vblank-tok)) + (parse-regexp)) + (else ; percent-percent-tok, id-tok ou illegal-tok + (lex-error (get-tok-line tok) + (get-tok-column tok) + "white space expected.")))))) + +(define parse-macro + (lambda (macros) + (push-lexer 'macro) + (parse-hv-blanks) + (let* ((tok (lexer)) + (tok-type (get-tok-type tok))) + (cond ((= tok-type id-tok) + (let* ((name (get-tok-attr tok)) + (ass (assoc name macros))) + (if ass + (lex-error (get-tok-line tok) + (get-tok-column tok) + "the macro \"" + (get-tok-2nd-attr tok) + "\" has already been defined.") + (let* ((tok-list (parse-ws1-regexp)) + (regexp (tokens->regexp tok-list macros))) + (pop-lexer) + (cons name regexp))))) + ((= tok-type percent-percent-tok) + (pop-lexer) + #f) + ((= tok-type illegal-tok) + (lex-error (get-tok-line tok) + (get-tok-column tok) + "macro name expected.")) + ((= tok-type eof-tok) + (lex-error (get-tok-line tok) + #f + "end of file found before %%.")))))) + +(define parse-macros + (lambda () + (let loop ((macros '())) + (let ((macro (parse-macro macros))) + (if macro + (loop (cons macro macros)) + macros))))) + +(define parse-action-end + (lambda (<<EOF>>-action? <<ERROR>>-action? action?) + (let ((act (lexer-get-history))) + (cond (action? + act) + (<<EOF>>-action? + (string-append act default-<<EOF>>-action)) + (<<ERROR>>-action? + (string-append act default-<<ERROR>>-action)) + (else + (string-append act default-action)))))) + +(define parse-action + (lambda (<<EOF>>-action? <<ERROR>>-action?) + (push-lexer 'action) + (let loop ((action? #f)) + (let* ((tok (lexer)) + (tok-type (get-tok-type tok))) + (cond ((= tok-type char-tok) + (loop #t)) + ((= tok-type hblank-tok) + (loop action?)) + ((= tok-type vblank-tok) + (push-lexer 'regexp) + (let* ((tok (lexer)) + (tok-type (get-tok-type tok)) + (bidon (lexer-unget tok))) + (pop-lexer) + (if (or (= tok-type hblank-tok) + (= tok-type vblank-tok)) + (loop action?) + (begin + (pop-lexer) + (parse-action-end <<EOF>>-action? + <<ERROR>>-action? + action?))))) + (else ; eof-tok + (lexer-unget tok) + (pop-lexer) + (parse-action-end <<EOF>>-action? + <<ERROR>>-action? + action?))))))) + +(define parse-rule + (lambda (macros) + (let ((tok-list (parse-regexp))) + (if (null? tok-list) + #f + (let* ((rule (tokens->rule tok-list macros)) + (action + (parse-action (get-rule-eof? rule) (get-rule-error? rule)))) + (set-rule-action rule action) + rule))))) + +(define parse-rules + (lambda (macros) + (parse-action #f #f) + (let loop () + (let ((rule (parse-rule macros))) + (if rule + (cons rule (loop)) + '()))))) + +(define parser + (lambda (filename) + (let* ((port (open-input-file filename)) + (port-open? #t)) + (lex-unwind-protect (lambda () + (if port-open? + (close-input-port port)))) + (init-lexer port) + (let* ((macros (parse-macros)) + (rules (parse-rules macros))) + (close-input-port port) + (set! port-open? #f) + (adapt-rules rules))))) |