1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
|
; SILex - Scheme Implementation of Lex
; Copyright (C) 2001 Danny Dube'
;
; This program is free software; you can redistribute it and/or
; modify it under the terms of the GNU General Public License
; as published by the Free Software Foundation; either version 2
; of the License, or (at your option) any later version.
;
; This program is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
; GNU General Public License for more details.
;
; You should have received a copy of the GNU General Public License
; along with this program; if not, write to the Free Software
; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
;
; Fonctions auxilliaires du lexer
;
(define parse-spec-char
(lambda (lexeme line column)
(make-tok char-tok lexeme line column newline-ch)))
(define parse-digits-char
(lambda (lexeme line column)
(let* ((num (substring lexeme 1 (string-length lexeme)))
(n (string->number num)))
(make-tok char-tok lexeme line column n))))
(define parse-quoted-char
(lambda (lexeme line column)
(let ((c (string-ref lexeme 1)))
(make-tok char-tok lexeme line column (char->integer c)))))
(define parse-ordinary-char
(lambda (lexeme line column)
(let ((c (string-ref lexeme 0)))
(make-tok char-tok lexeme line column (char->integer c)))))
(define string-downcase
(lambda (s)
(let* ((l (string->list s))
(ld (map char-downcase l)))
(list->string ld))))
(define extract-id
(lambda (s)
(let ((len (string-length s)))
(substring s 1 (- len 1)))))
(define parse-id
(lambda (lexeme line column)
(make-tok id-tok lexeme line column (string-downcase lexeme) lexeme)))
(define parse-id-ref
(lambda (lexeme line column)
(let* ((orig-name (extract-id lexeme))
(name (string-downcase orig-name)))
(make-tok subst-tok lexeme line column name orig-name))))
(define parse-power-m
(lambda (lexeme line column)
(let* ((len (string-length lexeme))
(substr (substring lexeme 1 (- len 1)))
(m (string->number substr))
(range (cons m m)))
(make-tok power-tok lexeme line column range))))
(define parse-power-m-inf
(lambda (lexeme line column)
(let* ((len (string-length lexeme))
(substr (substring lexeme 1 (- len 2)))
(m (string->number substr))
(range (cons m 'inf)))
(make-tok power-tok lexeme line column range))))
(define parse-power-m-n
(lambda (lexeme line column)
(let ((len (string-length lexeme)))
(let loop ((comma 2))
(if (char=? (string-ref lexeme comma) #\,)
(let* ((sub1 (substring lexeme 1 comma))
(sub2 (substring lexeme (+ comma 1) (- len 1)))
(m (string->number sub1))
(n (string->number sub2))
(range (cons m n)))
(make-tok power-tok lexeme line column range))
(loop (+ comma 1)))))))
;
; Lexer generique
;
(define lexer-raw #f)
(define lexer-stack '())
(define lexer-alist #f)
(define lexer-buffer #f)
(define lexer-buffer-empty? #t)
(define lexer-history '())
(define lexer-history-interp #f)
(define init-lexer
(lambda (port)
(let* ((IS (lexer-make-IS 'port port 'all))
(action-lexer (lexer-make-lexer action-tables IS))
(class-lexer (lexer-make-lexer class-tables IS))
(macro-lexer (lexer-make-lexer macro-tables IS))
(regexp-lexer (lexer-make-lexer regexp-tables IS))
(string-lexer (lexer-make-lexer string-tables IS)))
(set! lexer-raw #f)
(set! lexer-stack '())
(set! lexer-alist
(list (cons 'action action-lexer)
(cons 'class class-lexer)
(cons 'macro macro-lexer)
(cons 'regexp regexp-lexer)
(cons 'string string-lexer)))
(set! lexer-buffer-empty? #t)
(set! lexer-history '()))))
; Lexer brut
; S'assurer qu'il n'y a pas de risque de changer de
; lexer quand le buffer est rempli
(define push-lexer
(lambda (name)
(set! lexer-stack (cons lexer-raw lexer-stack))
(set! lexer-raw (cdr (assq name lexer-alist)))))
(define pop-lexer
(lambda ()
(set! lexer-raw (car lexer-stack))
(set! lexer-stack (cdr lexer-stack))))
; Traite le "unget" (capacite du unget: 1)
(define lexer2
(lambda ()
(if lexer-buffer-empty?
(lexer-raw)
(begin
(set! lexer-buffer-empty? #t)
lexer-buffer))))
(define lexer2-unget
(lambda (tok)
(set! lexer-buffer tok)
(set! lexer-buffer-empty? #f)))
; Traite l'historique
(define lexer
(lambda ()
(let* ((tok (lexer2))
(tok-lexeme (get-tok-lexeme tok))
(hist-lexeme (if lexer-history-interp
(blank-translate tok-lexeme)
tok-lexeme)))
(set! lexer-history (cons hist-lexeme lexer-history))
tok)))
(define lexer-unget
(lambda (tok)
(set! lexer-history (cdr lexer-history))
(lexer2-unget tok)))
(define lexer-set-blank-history
(lambda (b)
(set! lexer-history-interp b)))
(define blank-translate
(lambda (s)
(let ((ss (string-copy s)))
(let loop ((i (- (string-length ss) 1)))
(cond ((< i 0)
ss)
((char=? (string-ref ss i) (integer->char tab-ch))
(loop (- i 1)))
((char=? (string-ref ss i) #\newline)
(loop (- i 1)))
(else
(string-set! ss i #\space)
(loop (- i 1))))))))
(define lexer-get-history
(lambda ()
(let* ((rightlist (reverse lexer-history))
(str (apply string-append rightlist))
(strlen (string-length str))
(str2 (if (and (> strlen 0)
(char=? (string-ref str (- strlen 1)) #\newline))
str
(string-append str (string #\newline)))))
(set! lexer-history '())
str2)))
;
; Traitement des listes de tokens
;
(define de-anchor-tokens
(let ((not-anchor-toks (make-dispatch-table number-of-tokens
(list (cons caret-tok #f)
(cons dollar-tok #f)
(cons <<EOF>>-tok #f)
(cons <<ERROR>>-tok #f))
#t)))
(lambda (tok-list)
(if (null? tok-list)
'()
(let* ((tok (car tok-list))
(tok-type (get-tok-type tok))
(toks (cdr tok-list))
(new-toks (de-anchor-tokens toks)))
(cond ((vector-ref not-anchor-toks tok-type)
(cons tok new-toks))
((or (= tok-type caret-tok) (= tok-type dollar-tok))
(let* ((line (get-tok-line tok))
(column (get-tok-column tok))
(attr (if (= tok-type caret-tok) caret-ch dollar-ch))
(new-tok (make-tok char-tok "" line column attr)))
(cons new-tok new-toks)))
((= tok-type <<EOF>>-tok)
(lex-error (get-tok-line tok)
(get-tok-column tok)
"the <<EOF>> anchor must be used alone"
" and only after %%."))
((= tok-type <<ERROR>>-tok)
(lex-error (get-tok-line tok)
(get-tok-column tok)
"the <<ERROR>> anchor must be used alone"
" and only after %%."))))))))
(define strip-end
(lambda (l)
(if (null? (cdr l))
'()
(cons (car l) (strip-end (cdr l))))))
(define extract-anchors
(lambda (tok-list)
(let* ((tok1 (car tok-list))
(line (get-tok-line tok1))
(tok1-type (get-tok-type tok1)))
(cond ((and (= tok1-type <<EOF>>-tok) (null? (cdr tok-list)))
(make-rule line #t #f #f #f '() #f))
((and (= tok1-type <<ERROR>>-tok) (null? (cdr tok-list)))
(make-rule line #f #t #f #f '() #f))
(else
(let* ((bol? (= tok1-type caret-tok))
(tok-list2 (if bol? (cdr tok-list) tok-list)))
(if (null? tok-list2)
(make-rule line #f #f bol? #f tok-list2 #f)
(let* ((len (length tok-list2))
(tok2 (list-ref tok-list2 (- len 1)))
(tok2-type (get-tok-type tok2))
(eol? (= tok2-type dollar-tok))
(tok-list3 (if eol?
(strip-end tok-list2)
tok-list2)))
(make-rule line #f #f bol? eol? tok-list3 #f)))))))))
(define char-list->conc
(lambda (char-list)
(if (null? char-list)
(make-re epsilon-re)
(let loop ((cl char-list))
(let* ((c (car cl))
(cl2 (cdr cl)))
(if (null? cl2)
(make-re char-re c)
(make-re conc-re (make-re char-re c) (loop cl2))))))))
(define parse-tokens-atom
(let ((action-table
(make-dispatch-table
number-of-tokens
(list (cons lpar-tok
(lambda (tok tok-list macros)
(parse-tokens-sub tok-list macros)))
(cons dot-tok
(lambda (tok tok-list macros)
(cons (make-re class-re dot-class) (cdr tok-list))))
(cons subst-tok
(lambda (tok tok-list macros)
(let* ((name (get-tok-attr tok))
(ass (assoc name macros)))
(if ass
(cons (cdr ass) (cdr tok-list))
(lex-error (get-tok-line tok)
(get-tok-column tok)
"unknown macro \""
(get-tok-2nd-attr tok)
"\".")))))
(cons char-tok
(lambda (tok tok-list macros)
(let ((c (get-tok-attr tok)))
(cons (make-re char-re c) (cdr tok-list)))))
(cons class-tok
(lambda (tok tok-list macros)
(let ((class (get-tok-attr tok)))
(cons (make-re class-re class) (cdr tok-list)))))
(cons string-tok
(lambda (tok tok-list macros)
(let* ((char-list (get-tok-attr tok))
(re (char-list->conc char-list)))
(cons re (cdr tok-list))))))
(lambda (tok tok-list macros)
(lex-error (get-tok-line tok)
(get-tok-column tok)
"syntax error in regular expression.")))))
(lambda (tok-list macros)
(let* ((tok (car tok-list))
(tok-type (get-tok-type tok))
(action (vector-ref action-table tok-type)))
(action tok tok-list macros)))))
(define check-power-tok
(lambda (tok)
(let* ((range (get-tok-attr tok))
(start (car range))
(end (cdr range)))
(if (or (eq? 'inf end) (<= start end))
range
(lex-error (get-tok-line tok)
(get-tok-column tok)
"incorrect power specification.")))))
(define power->star-plus
(lambda (re range)
(power->star-plus-rec re (car range) (cdr range))))
(define power->star-plus-rec
(lambda (re start end)
(cond ((eq? end 'inf)
(cond ((= start 0)
(make-re star-re re))
((= start 1)
(make-re plus-re re))
(else
(make-re conc-re
re
(power->star-plus-rec re (- start 1) 'inf)))))
((= start 0)
(cond ((= end 0)
(make-re epsilon-re))
((= end 1)
(make-re question-re re))
(else
(make-re question-re
(power->star-plus-rec re 1 end)))))
((= start 1)
(if (= end 1)
re
(make-re conc-re re (power->star-plus-rec re 0 (- end 1)))))
(else
(make-re conc-re
re
(power->star-plus-rec re (- start 1) (- end 1)))))))
(define parse-tokens-fact
(let ((not-op-toks (make-dispatch-table number-of-tokens
(list (cons question-tok #f)
(cons plus-tok #f)
(cons star-tok #f)
(cons power-tok #f))
#t)))
(lambda (tok-list macros)
(let* ((result (parse-tokens-atom tok-list macros))
(re (car result))
(tok-list2 (cdr result)))
(let loop ((re re) (tok-list3 tok-list2))
(let* ((tok (car tok-list3))
(tok-type (get-tok-type tok)))
(cond ((vector-ref not-op-toks tok-type)
(cons re tok-list3))
((= tok-type question-tok)
(loop (make-re question-re re) (cdr tok-list3)))
((= tok-type plus-tok)
(loop (make-re plus-re re) (cdr tok-list3)))
((= tok-type star-tok)
(loop (make-re star-re re) (cdr tok-list3)))
((= tok-type power-tok)
(loop (power->star-plus re (check-power-tok tok))
(cdr tok-list3))))))))))
(define parse-tokens-conc
(lambda (tok-list macros)
(let* ((result1 (parse-tokens-fact tok-list macros))
(re1 (car result1))
(tok-list2 (cdr result1))
(tok (car tok-list2))
(tok-type (get-tok-type tok)))
(cond ((or (= tok-type pipe-tok)
(= tok-type rpar-tok))
result1)
(else ; Autres facteurs
(let* ((result2 (parse-tokens-conc tok-list2 macros))
(re2 (car result2))
(tok-list3 (cdr result2)))
(cons (make-re conc-re re1 re2) tok-list3)))))))
(define parse-tokens-or
(lambda (tok-list macros)
(let* ((result1 (parse-tokens-conc tok-list macros))
(re1 (car result1))
(tok-list2 (cdr result1))
(tok (car tok-list2))
(tok-type (get-tok-type tok)))
(cond ((= tok-type pipe-tok)
(let* ((tok-list3 (cdr tok-list2))
(result2 (parse-tokens-or tok-list3 macros))
(re2 (car result2))
(tok-list4 (cdr result2)))
(cons (make-re or-re re1 re2) tok-list4)))
(else ; rpar-tok
result1)))))
(define parse-tokens-sub
(lambda (tok-list macros)
(let* ((tok-list2 (cdr tok-list)) ; Manger le lpar-tok
(result (parse-tokens-or tok-list2 macros))
(re (car result))
(tok-list3 (cdr result))
(tok-list4 (cdr tok-list3))) ; Manger le rpar-tok
(cons re tok-list4))))
(define parse-tokens-match
(lambda (tok-list line)
(let loop ((tl tok-list) (count 0))
(if (null? tl)
(if (> count 0)
(lex-error line
#f
"mismatched parentheses."))
(let* ((tok (car tl))
(tok-type (get-tok-type tok)))
(cond ((= tok-type lpar-tok)
(loop (cdr tl) (+ count 1)))
((= tok-type rpar-tok)
(if (zero? count)
(lex-error line
#f
"mismatched parentheses."))
(loop (cdr tl) (- count 1)))
(else
(loop (cdr tl) count))))))))
; Ne traite pas les anchors
(define parse-tokens
(lambda (tok-list macros)
(if (null? tok-list)
(make-re epsilon-re)
(let ((line (get-tok-line (car tok-list))))
(parse-tokens-match tok-list line)
(let* ((begin-par (make-tok lpar-tok "" line 1))
(end-par (make-tok rpar-tok "" line 1)))
(let* ((tok-list2 (append (list begin-par)
tok-list
(list end-par)))
(result (parse-tokens-sub tok-list2 macros)))
(car result))))))) ; (cdr result) == () obligatoirement
(define tokens->regexp
(lambda (tok-list macros)
(let ((tok-list2 (de-anchor-tokens tok-list)))
(parse-tokens tok-list2 macros))))
(define tokens->rule
(lambda (tok-list macros)
(let* ((rule (extract-anchors tok-list))
(tok-list2 (get-rule-regexp rule))
(tok-list3 (de-anchor-tokens tok-list2))
(re (parse-tokens tok-list3 macros)))
(set-rule-regexp rule re)
rule)))
; Retourne une paire: <<EOF>>-action et vecteur des regles ordinaires
(define adapt-rules
(lambda (rules)
(let loop ((r rules) (revr '()) (<<EOF>>-action #f) (<<ERROR>>-action #f))
(if (null? r)
(cons (or <<EOF>>-action default-<<EOF>>-action)
(cons (or <<ERROR>>-action default-<<ERROR>>-action)
(list->vector (reverse revr))))
(let ((r1 (car r)))
(cond ((get-rule-eof? r1)
(if <<EOF>>-action
(lex-error (get-rule-line r1)
#f
"the <<EOF>> anchor can be "
"used at most once.")
(loop (cdr r)
revr
(get-rule-action r1)
<<ERROR>>-action)))
((get-rule-error? r1)
(if <<ERROR>>-action
(lex-error (get-rule-line r1)
#f
"the <<ERROR>> anchor can be "
"used at most once.")
(loop (cdr r)
revr
<<EOF>>-action
(get-rule-action r1))))
(else
(loop (cdr r)
(cons r1 revr)
<<EOF>>-action
<<ERROR>>-action))))))))
;
; Analyseur de fichier lex
;
(define parse-hv-blanks
(lambda ()
(let* ((tok (lexer))
(tok-type (get-tok-type tok)))
(if (or (= tok-type hblank-tok)
(= tok-type vblank-tok))
(parse-hv-blanks)
(lexer-unget tok)))))
(define parse-class-range
(lambda ()
(let* ((tok (lexer))
(tok-type (get-tok-type tok)))
(cond ((= tok-type char-tok)
(let* ((c (get-tok-attr tok))
(tok2 (lexer))
(tok2-type (get-tok-type tok2)))
(if (not (= tok2-type minus-tok))
(begin
(lexer-unget tok2)
(cons c c))
(let* ((tok3 (lexer))
(tok3-type (get-tok-type tok3)))
(cond ((= tok3-type char-tok)
(let ((c2 (get-tok-attr tok3)))
(if (> c c2)
(lex-error (get-tok-line tok3)
(get-tok-column tok3)
"bad range specification in "
"character class;"
#\newline
"the start character is "
"higher than the end one.")
(cons c c2))))
((or (= tok3-type rbrack-tok)
(= tok3-type minus-tok))
(lex-error (get-tok-line tok3)
(get-tok-column tok3)
"bad range specification in "
"character class; a specification"
#\newline
"like \"-x\", \"x--\" or \"x-]\" has "
"been used."))
((= tok3-type eof-tok)
(lex-error (get-tok-line tok3)
#f
"eof of file found while parsing "
"a character class.")))))))
((= tok-type minus-tok)
(lex-error (get-tok-line tok)
(get-tok-column tok)
"bad range specification in character class; a "
"specification"
#\newline
"like \"-x\", \"x--\" or \"x-]\" has been used."))
((= tok-type rbrack-tok)
#f)
((= tok-type eof-tok)
(lex-error (get-tok-line tok)
#f
"eof of file found while parsing "
"a character class."))))))
(define parse-class
(lambda (initial-class negative-class? line column)
(push-lexer 'class)
(let loop ((class initial-class))
(let ((new-range (parse-class-range)))
(if new-range
(loop (class-union (list new-range) class))
(let ((class (if negative-class?
(class-compl class)
class)))
(pop-lexer)
(make-tok class-tok "" line column class)))))))
(define parse-string
(lambda (line column)
(push-lexer 'string)
(let ((char-list (let loop ()
(let* ((tok (lexer))
(tok-type (get-tok-type tok)))
(cond ((= tok-type char-tok)
(cons (get-tok-attr tok) (loop)))
((= tok-type doublequote-tok)
(pop-lexer)
'())
(else ; eof-tok
(lex-error (get-tok-line tok)
#f
"end of file found while "
"parsing a string.")))))))
(make-tok string-tok "" line column char-list))))
(define parse-regexp
(let* ((end-action
(lambda (tok loop)
(lexer-unget tok)
(pop-lexer)
(lexer-set-blank-history #f)
`()))
(action-table
(make-dispatch-table
number-of-tokens
(list (cons eof-tok end-action)
(cons hblank-tok end-action)
(cons vblank-tok end-action)
(cons lbrack-tok
(lambda (tok loop)
(let ((tok1 (parse-class (list)
#f
(get-tok-line tok)
(get-tok-column tok))))
(cons tok1 (loop)))))
(cons lbrack-rbrack-tok
(lambda (tok loop)
(let ((tok1 (parse-class
(list (cons rbrack-ch rbrack-ch))
#f
(get-tok-line tok)
(get-tok-column tok))))
(cons tok1 (loop)))))
(cons lbrack-caret-tok
(lambda (tok loop)
(let ((tok1 (parse-class (list)
#t
(get-tok-line tok)
(get-tok-column tok))))
(cons tok1 (loop)))))
(cons lbrack-minus-tok
(lambda (tok loop)
(let ((tok1 (parse-class
(list (cons minus-ch minus-ch))
#f
(get-tok-line tok)
(get-tok-column tok))))
(cons tok1 (loop)))))
(cons doublequote-tok
(lambda (tok loop)
(let ((tok1 (parse-string (get-tok-line tok)
(get-tok-column tok))))
(cons tok1 (loop)))))
(cons illegal-tok
(lambda (tok loop)
(lex-error (get-tok-line tok)
(get-tok-column tok)
"syntax error in macro reference."))))
(lambda (tok loop)
(cons tok (loop))))))
(lambda ()
(push-lexer 'regexp)
(lexer-set-blank-history #t)
(parse-hv-blanks)
(let loop ()
(let* ((tok (lexer))
(tok-type (get-tok-type tok))
(action (vector-ref action-table tok-type)))
(action tok loop))))))
(define parse-ws1-regexp ; Exige un blanc entre le nom et la RE d'une macro
(lambda ()
(let* ((tok (lexer))
(tok-type (get-tok-type tok)))
(cond ((or (= tok-type hblank-tok) (= tok-type vblank-tok))
(parse-regexp))
(else ; percent-percent-tok, id-tok ou illegal-tok
(lex-error (get-tok-line tok)
(get-tok-column tok)
"white space expected."))))))
(define parse-macro
(lambda (macros)
(push-lexer 'macro)
(parse-hv-blanks)
(let* ((tok (lexer))
(tok-type (get-tok-type tok)))
(cond ((= tok-type id-tok)
(let* ((name (get-tok-attr tok))
(ass (assoc name macros)))
(if ass
(lex-error (get-tok-line tok)
(get-tok-column tok)
"the macro \""
(get-tok-2nd-attr tok)
"\" has already been defined.")
(let* ((tok-list (parse-ws1-regexp))
(regexp (tokens->regexp tok-list macros)))
(pop-lexer)
(cons name regexp)))))
((= tok-type percent-percent-tok)
(pop-lexer)
#f)
((= tok-type illegal-tok)
(lex-error (get-tok-line tok)
(get-tok-column tok)
"macro name expected."))
((= tok-type eof-tok)
(lex-error (get-tok-line tok)
#f
"end of file found before %%."))))))
(define parse-macros
(lambda ()
(let loop ((macros '()))
(let ((macro (parse-macro macros)))
(if macro
(loop (cons macro macros))
macros)))))
(define parse-action-end
(lambda (<<EOF>>-action? <<ERROR>>-action? action?)
(let ((act (lexer-get-history)))
(cond (action?
act)
(<<EOF>>-action?
(string-append act default-<<EOF>>-action))
(<<ERROR>>-action?
(string-append act default-<<ERROR>>-action))
(else
(string-append act default-action))))))
(define parse-action
(lambda (<<EOF>>-action? <<ERROR>>-action?)
(push-lexer 'action)
(let loop ((action? #f))
(let* ((tok (lexer))
(tok-type (get-tok-type tok)))
(cond ((= tok-type char-tok)
(loop #t))
((= tok-type hblank-tok)
(loop action?))
((= tok-type vblank-tok)
(push-lexer 'regexp)
(let* ((tok (lexer))
(tok-type (get-tok-type tok))
(bidon (lexer-unget tok)))
(pop-lexer)
(if (or (= tok-type hblank-tok)
(= tok-type vblank-tok))
(loop action?)
(begin
(pop-lexer)
(parse-action-end <<EOF>>-action?
<<ERROR>>-action?
action?)))))
(else ; eof-tok
(lexer-unget tok)
(pop-lexer)
(parse-action-end <<EOF>>-action?
<<ERROR>>-action?
action?)))))))
(define parse-rule
(lambda (macros)
(let ((tok-list (parse-regexp)))
(if (null? tok-list)
#f
(let* ((rule (tokens->rule tok-list macros))
(action
(parse-action (get-rule-eof? rule) (get-rule-error? rule))))
(set-rule-action rule action)
rule)))))
(define parse-rules
(lambda (macros)
(parse-action #f #f)
(let loop ()
(let ((rule (parse-rule macros)))
(if rule
(cons rule (loop))
'())))))
(define parser
(lambda (filename)
(let* ((port (open-input-file filename))
(port-open? #t))
(lex-unwind-protect (lambda ()
(if port-open?
(close-input-port port))))
(init-lexer port)
(let* ((macros (parse-macros))
(rules (parse-rules macros)))
(close-input-port port)
(set! port-open? #f)
(adapt-rules rules)))))
|