mes/module/nyacc/lalr.scm

2038 lines
72 KiB
Scheme

;;; nyacc/lalr.scm
;;;
;;; Copyright (C) 2014-2016 Matthew R. Wette
;;;
;;; This library is free software; you can redistribute it and/or
;;; modify it under the terms of the GNU Lesser General Public
;;; License as published by the Free Software Foundation; either
;;; version 3 of the License, or (at your option) any later version.
;;;
;;; This library is distributed in the hope that it will be useful,
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;;; Lesser General Public License for more details.
;;;
;;; You should have received a copy of the GNU Lesser General Public
;;; License along with this library; if not, write to the Free Software
;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
(cond-expand
(guile-2)
(guile
(use-modules (ice-9 syncase))
(use-modules (ice-9 optargs)))
(mes))
;; I need to find way to preserve srconf, rrconf after hashify.
;; compact needs to deal with it ...
(define-module (nyacc lalr)
#:export-syntax (lalr-spec)
#:export (*nyacc-version*
make-lalr-machine compact-machine hashify-machine
lalr-match-table
restart-spec add-recovery-logic!
pp-lalr-notice pp-lalr-grammar pp-lalr-machine
write-lalr-actions write-lalr-tables
pp-rule find-terminal gen-match-table ; used by (nyacc bison)
;; for debugging:
with-spec make-LR0-machine
its-member its-trans
looking-at first-item
terminal? non-terminal?
range-next
process-spec
reserved?
)
#:use-module ((srfi srfi-1) #:select (fold fold-right remove lset-union
lset-intersection lset-difference))
#:use-module ((srfi srfi-9) #:select (define-record-type))
#:use-module ((srfi srfi-43) #:select (vector-map vector-for-each vector-any))
#:use-module (nyacc util)
)
(define *nyacc-version* "0.74.2+fixes")
;; @deffn proxy-? sym rhs
;; @example
;; (LHS (($? RHS))
;; ($P (($$ #f))
;; ($P RHS ($$ (set-cdr! (last-pair $1) (list $2)) $1)))
;; @end example
(define (proxy-? sym rhs)
(list sym
(list '(action #f #f (list)))
rhs))
;; @deffn proxy-+ sym rhs
;; @example
;; (LHS (($* RHS))
;; ($P (($$ '()))
;; ($P RHS ($$ (set-cdr! (last-pair $1) (list $2)) $1)))
;; @end example
(define (proxy-* sym rhs)
(if (pair? (filter (lambda (elt) (eqv? 'action (car elt))) rhs))
(error "no RHS action allowed")) ;; rhs
(list
sym
(list '(action #f #f (list)))
(append (cons (cons 'non-terminal sym) rhs)
(list '(action #f #f
(set-cdr! (last-pair $1) (list $2))
$1)))))
;; @deffn proxy-+ sym rhs
;; @example
;; (LHS (($+ RHS))
;; ($P (RHS ($$ (list $1)))
;; ($P RHS ($$ (set-cdr! (last-pair $1) (list $2)) $1)))
;; @end example
(define (proxy-+ sym rhs)
(if (pair? (filter (lambda (elt) (eq? 'action (car elt))) rhs))
(error "no RHS action allowed")) ;; rhs
(list
sym
(append rhs (list '(action #f #f (list $1))))
(append (cons (cons 'non-terminal sym) rhs)
(list '(action #f #f
(set-cdr! (last-pair $1) (list $2))
$1)))))
;; @deffn reserved? grammar-symbol
;; Determine whether the syntax argument is a reserved symbol, that is.
;; So instead of writing @code{'$fixed} for syntax one can write
;; @code{$fixed}. We may want to change this to
;; @example
;; (reserved-terminal? grammar-symbol)
;; (reserved-non-term? grammar-symbol)
;; @end example
(define (reserved? grammar-symbol)
;; If the first character `$' then it's reserved.
(eqv? #\$ (string-ref (symbol->string (syntax->datum grammar-symbol)) 0)))
;; @deffn lalr-spec grammar => spec
;; This routine reads a grammar in a scheme-like syntax and returns an a-list.
;; This spec' can be an input for @item{make-parser-generator} or
;; @item{pp-spec}.
;;.This will return the specification. Notably the grammar will have rhs
;; arguments decorated with type (e.g., @code{(terminal . #\,)}).
;; Each production rule in the grammar will be of the form
;; @code{(lhs rhs1 rhs2 ...)} where each element of the RHS is one of
;; @itemize
;; @item @code{('terminal . atom)}
;; @item @code{('non-terminal . symbol)}
;; @item @code{('action . (ref narg guts)}
;; @item @code{('proxy . production-rule)}
;; @end itemize
;; Currently, the number of arguments for items is computed in the routine
;; @code{process-grammar}.
(define-syntax lalr-spec
(syntax-rules +++ ()
((_ <expr> +++)
(let* ()
(letrec-syntax
((with-attr-list
(syntax-rules ($prune)
((_ ($prune <symb>) <ex> ...)
(cons '(prune . <symb>) (with-attr-list <ex> ...)))
((_) '())))
(parse-rhs
(lambda (x)
;; The following is syntax-case because we use a fender.
(syntax-case x (quote $$ $$/ref $$-ref $prec $with $empty
$? $* $+)
;; action specifications
((_ ($$ <guts> ...) <e2> ...)
#'(cons '(action #f #f <guts> ...) (parse-rhs <e2> ...)))
((_ ($$-ref <ref>) <e2> ...)
;;#'(cons '(action #f <ref> #f) (parse-rhs <e2> ...)))
#'(cons `(action #f ,<ref> . #f) (parse-rhs <e2> ...)))
((_ ($$/ref <ref> <guts> ...) <e2> ...)
#'(cons `(action #f ,<ref> <guts> ...) (parse-rhs <e2> ...)))
;; other internal $-syntax
((_ ($prec <tok>) <e2> ...)
#'(cons (cons 'prec (tokenize <tok>)) (parse-rhs <e2> ...)))
((_ ($with <lhs-ref> <ex> ...) <e2> ...)
#'(cons `(with <lhs-ref> ,@(with-attr-list <ex> ...))
(parse-rhs <e2> ...)))
((_ $empty <e2> ...) ; TODO: propagate to processor
#'(parse-rhs <e2> ...))
;; (experimental) proxies
((_ ($? <s1> <s2> ...) <e2> ...)
#'(cons (cons* 'proxy proxy-? (parse-rhs <s1> <s2> ...))
(parse-rhs <e2> ...)))
((_ ($+ <s1> <s2> ...) <e2> ...)
#'(cons (cons* 'proxy proxy-+ (parse-rhs <s1> <s2> ...))
(parse-rhs <e2> ...)))
((_ ($* <s1> <s2> ...) <e2> ...)
#'(cons (cons* 'proxy proxy-* (parse-rhs <s1> <s2> ...))
(parse-rhs <e2> ...)))
;; terminals and non-terminals
((_ (quote <e1>) <e2> ...)
#'(cons '(terminal . <e1>) (parse-rhs <e2> ...)))
((_ (<f> ...) <e2> ...)
#'(cons (<f> ...) (parse-rhs <e2> ...)))
((_ <e1> <e2> ...)
(identifier? (syntax <e1>)) ; fender to trap non-term's
(if (reserved? (syntax <e1>))
#'(cons '(terminal . <e1>) (parse-rhs <e2> ...))
#'(cons '(non-terminal . <e1>) (parse-rhs <e2> ...))))
((_ <e1> <e2> ...)
#'(cons '(terminal . <e1>) (parse-rhs <e2> ...)))
((_) #'(list)))))
(parse-rhs-list
(syntax-rules ()
((_ (<ex> ...) <rhs> ...)
(cons (parse-rhs <ex> ...)
(parse-rhs-list <rhs> ...)))
((_) '())))
(parse-grammar
(syntax-rules ()
((_ (<lhs> <rhs> ...) <prod> ...)
(cons (cons '<lhs> (parse-rhs-list <rhs> ...))
(parse-grammar <prod> ...)))
((_) '())))
(tokenize
(lambda (x)
(syntax-case x ()
((_ <tk>) (identifier? (syntax <tk>)) #'(quote <tk>))
((_ <tk>) #'<tk>))))
(tokenize-list
(syntax-rules ()
((_ <tk1> <tk2> ...)
(cons (tokenize <tk1>) (tokenize-list <tk2> ...)))
((_) '())))
(parse-precedence
(syntax-rules (left right nonassoc)
((_ (left <tk> ...) <ex> ...)
(cons (cons 'left (tokenize-list <tk> ...))
(parse-precedence <ex> ...)))
((_ (right <tk> ...) <ex> ...)
(cons (cons 'right (tokenize-list <tk> ...))
(parse-precedence <ex> ...)))
((_ (nonassoc <tk> ...) <ex> ...)
(cons (cons 'nonassoc (tokenize-list <tk> ...))
(parse-precedence <ex> ...)))
((_ <tk> <ex> ...)
(cons (list 'undecl (tokenize <tk>))
(parse-precedence <ex> ...)))
((_) '())))
(lalr-spec-1
(syntax-rules (start expect notice prec< prec> grammar)
((_ (start <symb>) <e> ...)
(cons (cons 'start '<symb>) (lalr-spec-1 <e> ...)))
((_ (expect <n>) <e> ...)
(cons (cons 'expect <n>) (lalr-spec-1 <e> ...)))
((_ (notice <str>) <e> ...)
(cons (cons 'notice <str>) (lalr-spec-1 <e> ...)))
((_ (prec< <ex> ...) <e> ...)
(cons (cons 'precedence (parse-precedence <ex> ...))
(lalr-spec-1 <e> ...)))
((_ (prec> <ex> ...) <e> ...)
(cons (cons 'precedence (reverse (parse-precedence <ex> ...)))
(lalr-spec-1 <e> ...)))
((_ (grammar <prod> ...) <e> ...)
(cons (cons 'grammar (parse-grammar <prod> ...))
(lalr-spec-1 <e> ...)))
((_) '()))))
(process-spec (lalr-spec-1 <expr> +++)))))))
;; @deffn atomize terminal => object
;; Generate an atomic object for a terminal. Expected terminals are strings,
;; characters and symbols. This will convert the strings @code{s} to symbols
;; of the form @code{'$:s}.
(define (atomize terminal)
(if (string? terminal)
(string->symbol (string-append "$:" terminal))
terminal))
;; @deffn normize terminal => char|symbol
;; Normalize a token. This routine will normalize tokens in order to check
;; for similarities. For example, @code{"+"} and @code{#\+} are similar,
;; @code{'foo} and @code{"foo"} are similar.
(define (normize terminal)
(if (not (string? terminal)) terminal
(if (= 1 (string-length terminal))
(string-ref terminal 0)
(string->symbol terminal))))
;; @deffn eqv-terminal? a b
;; This is a predicate to determine if the terminals @code{a} and @code{b}
;; are equivalent.
(define (eqv-terminal? a b)
(eqv? (atomize a) (atomize b)))
;; @deffn find-terminal symb term-l => term-symb
;; Find the terminal in @code{term-l} that is equivalent to @code{symb}.
(define (find-terminal symb term-l)
(let iter ((tl term-l))
(if (null? tl) #f
(if (eqv-terminal? symb (car tl)) (car tl)
(iter (cdr tl))))))
;; @deffn process-spec tree => specification (as a-list)
;; Here we sweep through the production rules. We flatten and order the rules
;; and place all p-rules with like LHSs together. There is a non-trivial
;; amount of extra code to deal with mid-rule actions (MRAs).
(define (process-spec tree)
;; Make a new symbol. This is a helper for proxies and mid-rule-actions.
;; The counter here is the only @code{set!} in @code{process-spec}.
;; Otherwise, I believe @code{process-spec} is referentially transparent.
(define maksy
(let ((cntr 1))
(lambda ()
(let ((c cntr))
(set! cntr (1+ cntr))
(string->symbol (string-append "$P" (number->string c)))))))
;; Canonicalize precedence and associativity. Precedence will appear
;; as sets of equivalent items in increasing order of precedence
;; (e.g., @code{((+ -) (* /)}). The input tree has nodes that look like
;; @example
;; '(precedence (left "+" "-") (left "*" "/"))
;; '(precedence ('then "else")
;; @end example
;; @noindent
;; =>
;; @example
;; (prec ((+ -) (* /)) ((then) (else)))
;; @end example
(define (prec-n-assc tree)
;; prec-l; lt-assc-l rt-assc-l non-assc-l pspec
(let iter ((pll '()) (pl '()) (la '()) (ra '()) (na '())
(spec '()) (tree tree))
(cond
((pair? spec)
;; item ~ ('left "+" "-") => a ~ 'left, tl ~ (#\+ #\-)
(let* ((item (car spec)) (as (car item)) (tl (map atomize (cdr item))))
(case as
((left)
(iter pll (cons tl pl) (append tl la) ra na (cdr spec) tree))
((right)
(iter pll (cons tl pl) la (append tl ra) na (cdr spec) tree))
((nonassoc)
(iter pll (cons tl pl) la ra (append tl na) (cdr spec) tree))
((undecl)
(iter pll (cons tl pl) la ra na (cdr spec) tree)))))
((pair? pl)
(iter (cons (reverse pl) pll) '() la ra na spec tree))
((pair? tree)
(iter pll pl la ra na
(if (eqv? 'precedence (caar tree)) (cdar tree) '()) (cdr tree)))
(else
(list
`(prec . ,(reverse pll))
`(assc (left ,@la) (right ,@ra) (nonassoc ,@na)))))))
;;.@deffn make-mra-proxy sy pel act => ???
;; Generate a mid-rule-action proxy.
(define (make-mra-proxy sy pel act)
(list sy (list (cons* 'action (length pel) (cdr act)))))
;; @deffn gram-check-2 tl nl err-l
;; Check for fatal: symbol used as terminal and non-terminal.
(define (gram-check-2 tl nl err-l)
(let ((cf (lset-intersection eqv? (map atomize tl) nl)))
(if (pair? cf)
(cons (fmtstr "*** symbol is terminal and non-terminal: ~S" cf)
err-l) err-l)))
;; @deffn gram-check-3 ll nl err-l
;; Check for fatal: non-terminal's w/o production rule.
(define (gram-check-3 ll nl err-l)
(fold
(lambda (n l)
(if (not (memq n ll))
(cons (fmtstr "*** non-terminal with no production rule: ~A" n) l)
l))
err-l nl))
;; @deffn gram-check-4 ll nl err-l
;; Check for warning: unused LHS.
;; TODO: which don't appear in OTHER RHS, e.g., (foo (foo))
(define (gram-check-4 ll nl err-l)
(fold
(lambda (s l) (cons (fmtstr "+++ LHS not used in any RHS: ~A" s) l))
err-l
(let iter ((ull '()) (all ll)) ; unused LHSs, all LHS's
(if (null? all) ull
(iter (if (or (memq (car all) nl)
(memq (car all) ull)
(eq? (car all) '$start))
ull (cons (car all) ull))
(cdr all))))))
;; TODO: check for repeated tokens in precedence spec's: prec<, prec>
(let* ((gram (assq-ref tree 'grammar))
(start-symbol (and=> (assq-ref tree 'start) atomize))
(start-rule (lambda () (list start-symbol)))
(add-el (lambda (e l) (if (member e l) l (cons e l))))
(pna (prec-n-assc tree)))
;; We sweep through the grammar to generate a canonical specification.
;; Note: the local rhs is used to hold RHS terms, but a
;; value of @code{'()} is used to signal "add rule", and a value of
;; @code{#f} is used to signal ``done, proceed to next rule.''
;; We use @code{tail} below to go through all remaining rules so that any
;; like LHS get absorbed before proceeding: This keeps LHS in sequence.
;; Note: code-comm and lone-comm are added to terminals so that they end
;; up in the match-table. The parser will skip these if the automoton has
;; no associated transitions for these. This allows users to parse for
;; comments in some rules but skip the rest.
(let iter ((ll '($start)) ; LHS list
(@l (list ; attributes per prod' rule
`((rhs . ,(vector start-symbol))
(ref . all) (act 1 $1))))
(tl '($code-comm $lone-comm $error $end)) ; set of terminals
(nl (list start-symbol)) ; set of non-terminals
;;
(head gram) ; head of unprocessed productions
(prox '()) ; proxy productions for MRA
(lhs #f) ; current LHS (symbol)
(tail '()) ; tail of grammar productions
(rhs-l '()) ; list of RHSs being processed
(attr '()) ; per-rule attributes (action, prec)
(pel '()) ; processed RHS terms: '$:if ...
(rhs #f)) ; elts to process: (terminal . '$:if) ...
(cond
((pair? rhs)
;; Capture info on RHS term.
(case (caar rhs)
((terminal)
(iter ll @l (add-el (cdar rhs) tl) nl head prox lhs tail
rhs-l attr (cons (atomize (cdar rhs)) pel) (cdr rhs)))
((non-terminal)
(iter ll @l tl (add-el (cdar rhs) nl) head prox lhs tail
rhs-l attr (cons (cdar rhs) pel) (cdr rhs)))
((action)
(if (pair? (cdr rhs))
;; mid-rule action: generate a proxy (car act is # args)
(let* ((sy (maksy))
(pr (make-mra-proxy sy pel (cdar rhs))))
(iter ll @l tl (cons sy nl) head (cons pr prox)
lhs tail rhs-l attr (cons sy pel) (cdr rhs)))
;; end-rule action
(iter ll @l tl nl head prox lhs tail
rhs-l (acons 'action (cdar rhs) attr) pel (cdr rhs))))
((proxy)
(let* ((sy (maksy))
(pf (cadar rhs)) ; proxy function
(p1 (pf sy (cddar rhs))))
(iter ll @l tl (cons sy nl) head (cons p1 prox) lhs
tail rhs-l attr (cons sy pel) (cdr rhs))))
((prec)
(iter ll @l (add-el (cdar rhs) tl) nl head prox lhs tail rhs-l
(acons 'prec (atomize (cdar rhs)) attr) pel (cdr rhs)))
((with)
(let* ((psy (maksy)) ; proxy symbol
(rhsx (cadar rhs)) ; symbol to expand
(p-l (map cdr (cddar rhs))) ; prune list
(p1 (list psy `((non-terminal . ,rhsx)
(action #f #f $1)))))
(iter ll @l tl (cons psy nl) head (cons p1 prox) lhs tail rhs-l
(acons 'with (cons psy p-l) attr) (cons psy pel) (cdr rhs))))
(else
(error (fmtstr "bug=~S" (caar rhs))))))
((null? rhs)
;; End of RHS items for current rule.
;; Add the p-rules items to the lists ll, rl, xl, and @@l.
;; @code{act} is now:
;; @itemize
;; @item for mid-rule-action: (narg ref code)
;; @item for end-rule-action: (#f ref code)
;; @end itemize
(let* ((ln (length pel))
(action (assq-ref attr 'action))
(with (assq-ref attr 'with))
(nrg (if action (or (car action) ln) ln)) ; number of args
(ref (if action (cadr action) #f))
(act (cond
((and action (cddr action)) (cddr action))
;; if error rule then default action is print err msg:
((memq '$error pel) '((display "syntax error\n")))
((zero? nrg) '((list)))
(else '($1)))))
(if with (simple-format #t "WITH WHAT?\n"))
(iter (cons lhs ll)
(cons (cons* (cons 'rhs (list->vector (reverse pel)))
(cons* 'act nrg act) (cons 'ref ref) attr) @l)
tl nl head prox lhs tail rhs-l attr pel #f)))
((pair? rhs-l)
;; Work through next RHS.
(iter ll @l tl nl head prox lhs tail
(cdr rhs-l) '() '() (car rhs-l)))
((pair? tail)
;; Check the next CAR of the tail. If it matches
;; the current LHS process it, else skip it.
(iter ll @l tl nl head prox lhs (cdr tail)
(if (eqv? (caar tail) lhs) (cdar tail) '())
attr pel #f))
((pair? prox)
;; If a proxy then we have ((lhs RHS) (lhs RHS))
(iter ll @l tl nl (cons (car prox) head) (cdr prox)
lhs tail rhs-l attr pel rhs))
((pair? head)
;; Check the next rule-set. If the lhs has aready
;; been processed, then skip. Otherwise, copy copy
;; to tail and process.
(let ((lhs (caar head)) (rhs-l (cdar head))
(rest (cdr head)))
(if (memq lhs ll)
(iter ll @l tl nl rest prox #f '() '() attr pel #f)
(iter ll @l tl nl rest prox lhs rest rhs-l attr pel rhs))))
(else
(let* ((al (reverse @l)) ; attribute list
(err-1 '()) ;; not used
;; symbol used as terminal and non-terminal:
(err-2 (gram-check-2 tl nl err-1))
;; non-terminal's w/o production rule:
(err-3 (gram-check-3 ll nl err-2))
;; TODO: which don't appear in OTHER RHS, e.g., (foo (foo))
(err-4 (gram-check-4 ll nl err-3))
;; todo: Check that with withs are not mixed
(err-l err-4))
(for-each (lambda (e) (fmterr "~A\n" e)) err-l)
(if (pair? (filter (lambda (s) (char=? #\* (string-ref s 0))) err-l))
#f
(list
;; most referenced
(cons 'non-terms nl)
(cons 'lhs-v (list->vector (reverse ll)))
(cons 'rhs-v (map-attr->vector al 'rhs))
(cons 'terminals tl)
(cons 'start start-symbol)
(cons 'attr (list
(cons 'expect (or (assq-ref tree 'expect) 0))
(cons 'notice (assq-ref tree 'notice))))
(cons 'prec (assq-ref pna 'prec)) ; lowest-to-highest
(cons 'assc (assq-ref pna 'assc))
(cons 'prp-v (map-attr->vector al 'prec)) ; per-rule precedence
(cons 'act-v (map-attr->vector al 'act))
(cons 'ref-v (map-attr->vector al 'ref))
(cons 'err-l err-l)))))))))
;;; === Code for processing the specification. ================================
;; @subsubheading Note
;; The fluid @code{*lalr-core*} is used during the machine generation
;; cycles to access core parameters of the specification. This includes
;; the list of non-terminals, the vector of left-hand side symbols and the
;; vector of vector of right-hand side symbols.
(define *lalr-core* (make-fluid #f))
;; This record holds the minimum data from the grammar needed to build the
;; machine from the grammar specification.
(define-record-type lalr-core-type
(make-lalr-core non-terms terminals start lhs-v rhs-v eps-l)
lalr-core-type?
(non-terms core-non-terms) ; list of non-terminals
(terminals core-terminals) ; list of non-terminals
(start core-start) ; start non-terminal
(lhs-v core-lhs-v) ; vec of left hand sides
(rhs-v core-rhs-v) ; vec of right hand sides
(eps-l core-eps-l)) ; non-terms w/ eps prod's
;; @deffn make-core spec => lalr-core-type
(define (make-core spec)
(make-lalr-core (assq-ref spec 'non-terms)
(assq-ref spec 'terminals)
(assq-ref spec 'start)
(assq-ref spec 'lhs-v)
(assq-ref spec 'rhs-v)
'()))
;; @deffn make-core/extras spec => lalr-core-type
;; Add list of symbols with epsilon productions.
(define (make-core/extras spec)
(let ((non-terms (assq-ref spec 'non-terms))
(terminals (assq-ref spec 'terminals))
(start (assq-ref spec 'start))
(lhs-v (assq-ref spec 'lhs-v))
(rhs-v (assq-ref spec 'rhs-v)))
(make-lalr-core non-terms terminals start lhs-v rhs-v
(find-eps non-terms lhs-v rhs-v))))
;; @section Routines
;; @deffn <? a b po => #t | #f
;; Given tokens @code{a} and @code{b} and partial ordering @code{po} report
;; if precedence of @code{b} is greater than @code{a}?
(define (<? a b po)
(if (member (cons a b) po) #t
(let iter ((po po))
(if (null? po) #f
(if (and (eqv? (caar po) a)
(<? (cdar po) b po))
#t
(iter (cdr po)))))))
;; @deffn prece a b po
;; Return precedence for @code{a,b} given the partial order @code{po} as
;; @code{#\<}, @code{#\>}, @code{#\=} or @code{#f}.
;; This is not a true partial order as we can have a<b and b<a => a=b.
;; @example
;; @code{(prece a a po)} => @code{#\=}.
;; @end example
(define (prece a b po)
(cond
((eqv? a b) #\=)
((eqv? a '$error) #\<)
((eqv? b '$error) #\>)
((<? a b po) (if (<? b a po) #\= #\<))
(else (if (<? b a po) #\> #f))))
;; @deffn non-terminal? symb
(define (non-terminal? symb)
(cond
((eqv? symb '$epsilon) #t)
((eqv? symb '$end) #f)
((eqv? symb '$@) #f)
((string? symb) #f)
(else
(memq symb (core-non-terms (fluid-ref *lalr-core*))))))
;; @deffn terminal? symb
(define (terminal? symb)
(not (non-terminal? symb)))
;; @deffn prule-range lhs => (start-ix . (1+ end-ix))
;; Find the range of productiion rules for the lhs.
;; If not found raise error.
(define (prule-range lhs)
;; If this needs to be really fast then we move to where lhs is an integer
;; and that used to index into a table that provides the ranges.
(let* ((core (fluid-ref *lalr-core*))
(lhs-v (core-lhs-v core))
(n (vector-length lhs-v))
(match? (lambda (ix symb) (eqv? (vector-ref lhs-v ix) symb))))
(cond
((terminal? lhs) '())
((eq? lhs '$epsilon) '())
(else
(let iter-st ((st 0))
;; Iterate to find the start index.
(if (= st n) '() ; not found
(if (match? st lhs)
;; Start found, now iteratate to find end index.
(let iter-nd ((nd st))
(if (= nd n) (cons st nd)
(if (not (match? nd lhs)) (cons st nd)
(iter-nd (1+ nd)))))
(iter-st (1+ st)))))))))
;; @deffn range-next rng -> rng
;; Given a range in the form of @code{(cons start (1+ end))} return the next
;; value or '() if at end. That is @code{(3 . 4)} => @code{'()}.
(define (range-next rng)
(if (null? rng) '()
(let ((nxt (cons (1+ (car rng)) (cdr rng))))
(if (= (car nxt) (cdr nxt)) '() nxt))))
;; @deffn range-last? rng
;; Predicate to indicate last p-rule in range.
;; If off end (i.e., null rng) then #f.
(define (range-last? rng)
(and (pair? rng) (= (1+ (car rng)) (cdr rng))))
;; @deffn lhs-symb prod-ix
;; Return the LHS symbol for the production at index @code{prod-id}.
(define (lhs-symb gx)
(vector-ref (core-lhs-v (fluid-ref *lalr-core*)) gx))
;; @deffn looking-at (p-rule-ix . rhs-ix)
;; Return symbol we are looking at for this item state.
;; If at the end (position = -1) (or rule is zero-length) then return
;; @code{'$epsilon}.
(define (looking-at item)
(let* ((core (fluid-ref *lalr-core*))
(rhs-v (core-rhs-v core))
(rule (vector-ref rhs-v (car item))))
(if (last-item? item)
'$epsilon
(vector-ref rule (cdr item)))))
;; @deffn first-item gx
;; Given grammar rule index return the first item.
;; This will return @code{(gx . 0)}, or @code{(gx . -1)} if the rule has
;; no RHS elements.
(define (first-item gx)
(let* ((core (fluid-ref *lalr-core*))
(rlen (vector-length (vector-ref (core-rhs-v core) gx))))
(cons gx (if (zero? rlen) -1 0))))
;; @deffn last-item? item
;; Predictate to indicate last item in (or end of) production rule.
(define (last-item? item)
(negative? (cdr item)))
;; @deffn next-item item
;; Return the next item in the production rule.
;; A position of @code{-1} means the end. If at end, then @code{'()}
(define (next-item item)
(let* ((core (fluid-ref *lalr-core*))
(gx (car item)) (rx (cdr item)) (rxp1 (1+ rx))
(rlen (vector-length (vector-ref (core-rhs-v core) gx))))
(cond
((negative? rx) '())
((eqv? rxp1 rlen) (cons gx -1))
(else (cons gx rxp1)))))
;; @deffn prev-item item
;; Return the previous item in the grammar.
;; prev (0 . 0) is currently (0 . 0)
(define (prev-item item)
(let* ((core (fluid-ref *lalr-core*))
(rhs-v (core-rhs-v core))
(p-ix (car item))
(p-ixm1 (1- p-ix))
(r-ix (cdr item))
(r-ixm1 (if (negative? r-ix)
(1- (vector-length (vector-ref rhs-v p-ix)))
(1- r-ix))))
(if (zero? r-ix)
(if (zero? p-ix) item ; start, i.e., (0 . 0)
(cons p-ixm1 -1)) ; prev p-rule
(cons p-ix r-ixm1))))
;; @deffn error-rule? gx => #t|#f
;; Predicate to indicate if gx rule has @code{$error} as rhs member.
(define (error-rule? gx)
(let* ((core (fluid-ref *lalr-core*))
(rhs-v (core-rhs-v core)))
(vector-any (lambda (e) (eqv? e '$error)) (vector-ref rhs-v gx))))
;; @deffn non-kernels symb => list of prule indices
;; Compute the set of non-kernel rules for symbol @code{symb}. If grammar
;; looks like
;; @example
;; 1: A => Bcd
;; ...
;; 5: B => Cde
;; ...
;; 7: B => Abe
;; @end example
;; @noindent
;; then @code{non-kernels 'A} results in @code{(1 5 7)}.
;; Note: To support pruning this routine will need to be rewritten.
(define (non-kernels symb)
(let* ((core (fluid-ref *lalr-core*))
(lhs-v (core-lhs-v core))
(rhs-v (core-rhs-v core))
(glen (vector-length lhs-v))
(lhs-symb (lambda (gx) (vector-ref lhs-v gx))))
(let iter ((rslt '()) ; result is set of p-rule indices
(done '()) ; symbols completed or queued
(next '()) ; next round of symbols to process
(curr (list symb)) ; this round of symbols to process
(gx 0)) ; p-rule index
(cond
((< gx glen)
(cond
((memq (lhs-symb gx) curr)
;; Add rhs to next and rslt if not already done.
(let* ((rhs1 (looking-at (first-item gx))) ; 1st-RHS-sym|$eps
(rslt1 (if (memq gx rslt) rslt (cons gx rslt)))
(done1 (if (memq rhs1 done) done (cons rhs1 done)))
(next1 (cond ((memq rhs1 done) next)
((terminal? rhs1) next)
(else (cons rhs1 next)))))
(iter rslt1 done1 next1 curr (1+ gx))))
(else
;; Nothing to check; process next rule.
(iter rslt done next curr (1+ gx)))))
((pair? next)
;; Start another sweep throught the grammar.
(iter rslt done '() next 0))
(else
;; Done, so return.
(reverse rslt))))))
;; @deffn expand-k-item => item-set
;; Expand a kernel-item into a list with the non-kernels.
(define (expand-k-item k-item)
(reverse
(fold (lambda (gx items) (cons (first-item gx) items))
(list k-item)
(non-kernels (looking-at k-item)))))
;; @deffn its-equal?
;; Helper for step1
(define (its-equal? its-1 its-2)
(let iter ((its1 its-1) (its2 its-2)) ; cdr to strip off the ind
(if (and (null? its1) (null? its2)) #t ; completed run through => #f
(if (or (null? its1) (null? its2)) #f ; lists not equal length => #f
(if (not (member (car its1) its-2)) #f ; mismatch => #f
(iter (cdr its1) (cdr its2)))))))
;; @deffn its-member its its-l
;; Helper for step1
;; If itemset @code{its} is a member of itemset list @code{its-l} return the
;; index, else return #f.
(define (its-member its its-l)
(let iter ((itsl its-l))
(if (null? itsl) #f
(if (its-equal? its (cdar itsl)) (caar itsl)
(iter (cdr itsl))))))
;; @deffn its-trans itemset => alist of (symb . itemset)
;; Compute transitions from an itemset. Thatis, map a list of kernel
;; items to a list of (symbol post-shift items).
;; @example
;; ((0 . 1) (2 . 3) => ((A (0 . 2) (2 . 4)) (B (2 . 4) ...))
;; @end example
(define (its-trans items)
(let iter ((rslt '()) ; result
(k-items items) ; items
(itl '())) ; one k-item w/ added non-kernels
(cond
((pair? itl)
(let* ((it (car itl)) ; item
(sy (looking-at it)) ; symbol
(nx (next-item it))
(sq (assq sy rslt))) ; if we have seen it
(cond
((eq? sy '$epsilon)
;; don't transition end-of-rule items
(iter rslt k-items (cdr itl)))
((not sq)
;; haven't seen this symbol yet
(iter (acons sy (list nx) rslt) k-items (cdr itl)))
((member nx (cdr sq))
;; repeat
(iter rslt k-items (cdr itl)))
(else
;; SY is in RSLT and item not yet in: add it.
(set-cdr! sq (cons nx (cdr sq)))
(iter rslt k-items (cdr itl))))))
((pair? k-items)
(iter rslt (cdr k-items) (expand-k-item (car k-items))))
(else
rslt))))
;; @deffn step1 [input-a-list] => p-mach-1
;; Compute the sets of LR(0) kernel items and the transitions associated with
;; spec. These are returned as vectors in the alist with keys @code{'kis-v}
;; and @code{'kix-v}, repspectively. Each entry in @code{kis-v} is a list of
;; items in the form @code{(px . rx)} where @code{px} is the production rule
;; index and @code{rx} is the index of the RHS symbol. Each entry in the
;; vector @code{kix-v} is an a-list with entries @code{(sy . kx)} where
;; @code{sy} is a (terminal or non-terminal) symbol and @code{kx} is the
;; index of the kernel itemset. The basic algorithm is discussed on
;; pp. 228-229 of the DB except that we compute non-kernel items on the fly
;; using @code{expand-k-item}. See Example 4.46 on p. 241 of the DB.
(define (step1 . rest)
(let* ((al-in (if (pair? rest) (car rest) '()))
(add-kset (lambda (upd kstz) ; give upd a ks-ix and add to kstz
(acons (1+ (caar kstz)) upd kstz)))
(init '(0 (0 . 0))))
(let iter ((ksets (list init)) ; w/ index
(ktrnz '()) ; ((symb src dst) (symb src dst) ...)
(next '()) ; w/ index
(todo (list init)) ; w/ index
(curr #f) ; current state ix
(trans '())) ; ((symb it1 it2 ...) (symb ...))
(cond
((pair? trans)
;; Check next symbol for transitions (symb . (item1 item2 ...)).
(let* ((dst (cdar trans)) ; destination item
(dst-ix (its-member dst ksets)) ; return ix else #f
(upd (if dst-ix '() (cons (1+ (caar ksets)) dst)))
(ksets1 (if dst-ix ksets (cons upd ksets)))
(next1 (if dst-ix next (cons upd next)))
(dsx (if dst-ix dst-ix (car upd))) ; dest state index
(ktrnz1 (cons (list (caar trans) curr dsx) ktrnz)))
(iter ksets1 ktrnz1 next1 todo curr (cdr trans))))
((pair? todo)
;; Process the next state (aka itemset).
(iter ksets ktrnz next (cdr todo) (caar todo) (its-trans (cdar todo))))
((pair? next)
;; Sweep throught the grammar again.
(iter ksets ktrnz '() next curr '()))
(else
(let* ((nkis (length ksets)) ; also (caar ksets)
(kisv (make-vector nkis #f))
(kitv (make-vector nkis '())))
;; Vectorize kernel sets
(for-each
(lambda (kis) (vector-set! kisv (car kis) (cdr kis)))
ksets)
;; Vectorize transitions (by src kx).
(for-each
(lambda (kit)
(vector-set! kitv (cadr kit)
(acons (car kit) (caddr kit)
(vector-ref kitv (cadr kit)))))
ktrnz)
;; Return kis-v, kernel itemsets, and kix-v transitions.
(cons* (cons 'kis-v kisv) (cons 'kix-v kitv) al-in)))))))
;; @deffn find-eps non-terms lhs-v rhs-v => eps-l
;; Generate a list of non-terminals which have epsilon productions.
(define (find-eps nterms lhs-v rhs-v)
(let* ((nprod (vector-length lhs-v))
(find-new
(lambda (e l)
(let iter ((ll l) (gx 0) (lhs #f) (rhs #()) (rx 0))
(cond
((< rx (vector-length rhs))
(if (and (memq (vector-ref rhs rx) nterms) ; non-term
(memq (vector-ref rhs rx) ll)) ; w/ eps prod
(iter ll gx lhs rhs (1+ rx)) ; yes: check next
(iter ll (1+ gx) #f #() 0))) ; no: next p-rule
((and lhs (= rx (vector-length rhs))) ; we have eps-prod
(iter (if (memq lhs ll) ll (cons lhs ll)) (1+ gx) #f #() 0))
((< gx nprod) ; check next p-rule if not on list
(if (memq (vector-ref lhs-v gx) ll)
(iter ll (1+ gx) #f #() 0)
(iter ll gx (vector-ref lhs-v gx) (vector-ref rhs-v gx) 0)))
(else ll))))))
(fixed-point find-new (find-new #f '()))))
;; @deffn merge1 v l
;; add v to l if not in l
(define (merge1 v l)
(if (memq v l) l (cons v l)))
;; @deffn merge2 v l al
;; add v to l if not in l or al
(define (merge2 v l al)
(if (memq v l) l (if (memq v al) l (cons v l))))
;; @deffn first symbol-list end-token-list
;; Return list of terminals starting the string @code{symbol-list}
;; (see DB, p. 188). If the symbol-list can generate epsilon then the
;; result will include @code{end-token-list}.
(define (first symbol-list end-token-list)
(let* ((core (fluid-ref *lalr-core*))
(eps-l (core-eps-l core)))
;; This loop strips off the leading symbol from stng and then adds to
;; todo list, which results in range of p-rules getting checked for
;; terminals.
(let iter ((rslt '()) ; terminals collected
(stng symbol-list) ; what's left of input string
(hzeps #t) ; if eps-prod so far
(done '()) ; non-terminals checked
(todo '()) ; non-terminals to assess
(p-range '()) ; range of p-rules to check
(item '())) ; item in production
(cond
((pair? item)
(let ((sym (looking-at item)))
(cond
((eq? sym '$epsilon) ; at end of rule, go next
(iter rslt stng hzeps done todo p-range '()))
((terminal? sym) ; terminal, log it
(iter (merge1 sym rslt) stng hzeps done todo p-range '()))
((memq sym eps-l) ; symbol has eps prod
(iter rslt stng hzeps (merge1 sym done) (merge2 sym todo done)
p-range (next-item item)))
(else ;; non-terminal, add to todo/done, goto next
(iter rslt stng hzeps
(merge1 sym done) (merge2 sym todo done) p-range '())))))
((pair? p-range) ; next one to do
;; run through next rule
(iter rslt stng hzeps done todo
(range-next p-range) (first-item (car p-range))))
((pair? todo)
(iter rslt stng hzeps done (cdr todo) (prule-range (car todo)) '()))
((and hzeps (pair? stng))
;; Last pass saw an $epsilon so check the next input symbol,
;; with saweps reset to #f.
(let* ((symb (car stng)) (stng1 (cdr stng)) (symbl (list symb)))
(if (terminal? symb)
(iter (cons symb rslt) stng1
(and hzeps (memq symb eps-l))
done todo p-range '())
(iter rslt stng1
(or (eq? symb '$epsilon) (memq symb eps-l))
symbl symbl '() '()))))
(hzeps
;; $epsilon passes all the way through.
;; If end-token-list provided use that.
(if (pair? end-token-list)
(lset-union eqv? rslt end-token-list)
(cons '$epsilon rslt)))
(else
rslt)))))
;; @deffn item->stng item => list-of-symbols
;; Convert item (e.g., @code{(1 . 2)}) to list of symbols to the end of the
;; production(?). If item is at the end of the rule then return
;; @code{'$epsilon}. The term "stng" is used to avoid confusion about the
;; term string.
(define (item->stng item)
(if (eqv? (cdr item) -1)
(list '$epsilon)
(let* ((core (fluid-ref *lalr-core*))
(rhs-v (core-rhs-v core))
(rhs (vector-ref rhs-v (car item))))
(let iter ((res '()) (ix (1- (vector-length rhs))))
(if (< ix (cdr item)) res
(iter (cons (vector-ref rhs ix) res) (1- ix)))))))
;; add (item . toks) to (front of) la-item-l
;; i.e., la-item-l is unmodified
(define (merge-la-item la-item-l item toks)
(let* ((pair (assoc item la-item-l))
(tokl (if (pair? pair) (cdr pair) '()))
(allt ;; union of toks and la-item-l toks
(let iter ((tl tokl) (ts toks))
(if (null? ts) tl
(iter (if (memq (car ts) tl) tl (cons (car ts) tl))
(cdr ts))))))
(if (not pair) (acons item allt la-item-l)
(if (eqv? tokl allt) la-item-l
(acons item allt la-item-l)))))
;; @deffn first-following item toks => token-list
;; For la-item A => x.By,z (where @code{item}, @code{toks}), this
;; procedure computes @code{FIRST(yz)}.
(define (first-following item toks)
(first (item->stng (next-item item)) toks))
;; @deffn closure la-item-l => la-item-l
;; Compute the closure of a list of la-items.
;; Ref: DB, Fig 4.38, Sec. 4.7, p. 232
(define (closure la-item-l)
;; Compute the fixed point of I, aka @code{la-item-l}, with procedure
;; for each item [A => x.By, a] in I
;; each production B => z in G
;; and each terminal b in FIRST(ya)
;; such that [B => .z, b] is not in I do
;; add [B => .z, b] to I
;; The routine @code{fixed-point} operates on one element of the input set.
(prune-assoc
(fixed-point
(lambda (la-item seed)
(let* ((item (car la-item)) (toks (cdr la-item)) (symb (looking-at item)))
(cond
((last-item? (car la-item)) seed)
((terminal? (looking-at (car la-item))) seed)
(else
(let iter ((seed seed) (pr (prule-range symb)))
(cond
((null? pr) seed)
(else
(iter (merge-la-item seed (first-item (car pr))
(first-following item toks))
(range-next pr)))))))))
la-item-l)))
;; @deffn kit-add kit-v tokens sx item
;; Add @code{tokens} to the list of lookaheads for the (kernel) @code{item}
;; in state @code{sx}. This is a helper for @code{step2}.
(define (kit-add kit-v tokens kx item)
(let* ((al (vector-ref kit-v kx)) ; a-list for k-set kx
(ar (assoc item al)) ; tokens for item
(sd (if (pair? ar) ; set difference
(lset-difference eqv? tokens (cdr ar))
tokens)))
(cond ; no entry, update entry, no update
((null? tokens) #f)
((not ar) (vector-set! kit-v kx (acons item tokens al)) #t)
((pair? sd) (set-cdr! ar (append sd (cdr ar))) #t)
(else #f))))
;; @deffn kip-add kip-v sx0 it0 sx1 it1
;; This is a helper for step2. It updates kip-v with a propagation from
;; state @code{sx0}, item @code{it0} to state @code{sx1}, item @code{it1}.
;; [kip-v sx0] -> (it0 . ((sx1 . it1)
(define (kip-add kip-v sx0 it0 sx1 it1)
(let* ((al (vector-ref kip-v sx0)) (ar (assoc it0 al)))
(cond
((not ar)
(vector-set! kip-v sx0 (acons it0 (list (cons sx1 it1)) al)) #t)
((member it1 (cdr ar)) #f)
(else
(set-cdr! ar (acons sx1 it1 (cdr ar))) #t))))
;; @deffn step2 p-mach-1 => p-mach-2
;; This implements steps 2 and 3 of Algorithm 4.13 on p. 242 of the DB.
;; The a-list @code{p-mach-1} includes the kernel itemsets and transitions
;; from @code{step1}. This routine adds two entries to the a-list:
;; the initial set of lookahead tokens in a vector associated with key
;; @code{'kit-v} and a vector of spontaneous propagations associated with
;; key @code{'kip-v}.
;; @example
;; for-each item I in some itemset
;; for-each la-item J in closure(I,#)
;; for-each token T in lookaheads(J)
;; if LA is #, then add to J propagate-to list
;; otherwise add T to spontaneously-generated list
;; @end example
(define (step2 p-mach)
(let* ((kis-v (assq-ref p-mach 'kis-v))
(kix-v (assq-ref p-mach 'kix-v)) ; transitions?
(nkset (vector-length kis-v)) ; number of k-item-sets
;; kernel-itemset tokens
(kit-v (make-vector nkset '())) ; sx => alist: (item latoks)
;; kernel-itemset propagations
(kip-v (make-vector nkset '()))) ; sx0 => ((ita (sx1a . it1a) (sx2a
(vector-set! kit-v 0 (closure (list (list '(0 . 0) '$end))))
(let iter ((kx -1) (kset '()))
(cond
((pair? kset)
(for-each
(lambda (la-item)
(let* ((item (car la-item)) ; closure item
(tokl (cdr la-item)) ; tokens
(sym (looking-at item)) ; transition symbol
(item1 (next-item item)) ; next item after sym
(sx1 (assq-ref (vector-ref kix-v kx) sym)) ; goto(I,sym)
(item0 (car kset))) ; kernel item
(kit-add kit-v (delq '$@ tokl) sx1 item1) ; spontaneous
(if (memq '$@ tokl) ; propagates
(kip-add kip-v kx item0 sx1 item1))))
(remove ;; todo: check this remove
(lambda (li) (last-item? (car li)))
(closure (list (cons (car kset) '($@))))))
(iter kx (cdr kset)))
((< (1+ kx) nkset)
(iter (1+ kx)
;; End-items don't shift, so don't propagate.
(remove last-item? (vector-ref kis-v (1+ kx)))))))
;;(when #f (pp-kip-v kip-v) (pp-kit-v kit-v)) ; for debugging
(cons* (cons 'kit-v kit-v) (cons 'kip-v kip-v) p-mach)))
;; debug for step2
(define (pp-kit ix kset)
(fmtout "~S:\n" ix)
(for-each
(lambda (item) (fmtout " ~A, ~S\n" (pp-item (car item)) (cdr item)))
kset))
(define (pp-kit-v kit-v)
(fmtout "spontaneous:\n")
(vector-for-each pp-kit kit-v))
(define (pp-kip ix kset)
(for-each
(lambda (x)
(fmtout "~S: ~A\n" ix (pp-item (car x)))
(for-each
(lambda (y) (fmtout " => ~S: ~A\n" (car y) (pp-item (cdr y))))
(cdr x)))
kset))
(define (pp-kip-v kip-v)
(fmtout "propagate:\n")
(vector-for-each pp-kip kip-v))
;; @deffn step3 p-mach-2 => p-mach-3
;; Execute nyacc step 3, where p-mach means ``partial machine''.
;; This implements step 4 of Algorithm 4.13 from the DB.
(define (step3 p-mach)
(let* ((kit-v (assq-ref p-mach 'kit-v))
(kip-v (assq-ref p-mach 'kip-v))
(nkset (vector-length kit-v)))
(let iter ((upd #t) ; token propagated?
(kx -1) ; current index
(ktal '()) ; (item . LA) list for kx
(toks '()) ; LA tokens being propagated
(item '()) ; from item
(prop '())) ; to items
(cond
((pair? prop)
;; Propagate lookaheads.
(let* ((sx1 (caar prop)) (it1 (cdar prop)))
(iter (or (kit-add kit-v toks sx1 it1) upd)
kx ktal toks item (cdr prop))))
((pair? ktal)
;; Process the next (item . tokl) in the alist ktal.
(iter upd kx (cdr ktal) (cdar ktal) (caar ktal)
(assoc-ref (vector-ref kip-v kx) (caar ktal))))
((< (1+ kx) nkset)
;; Process the next itemset.
(iter upd (1+ kx) (vector-ref kit-v (1+ kx)) '() '() '()))
(upd
;; Have updates, rerun.
(iter #f 0 '() '() '() '()))))
p-mach))
;; @deffn reductions kit-v sx => ((tokA gxA1 ...) (tokB gxB1 ...) ...)
;; This is a helper for @code{step4}.
;; Return an a-list of reductions for state @code{sx}.
;; The a-list pairs are make of a token and a list of prule indicies.
;; CHECK the following. We are brute-force using @code{closure} here.
;; It works, but there should be a better algorithm.
;; Note on reductions: We reduce if the kernel-item is an end-item or a
;; non-kernel item with an epsilon-production. That is, if we have a
;; kernel item of the form
;; @example
;; A => abc.
;; @end example
;; or if we have the non-kernel item of the form
;; @example
;; B => .de
;; @end example
;; where FIRST(de,#) includes #. See the second paragraph under ``Efficient
;; Construction of LALR Parsing Tables'' in DB Sec 4.7.
(define (old-reductions kit-v sx)
(let iter ((ral '()) ; result: reduction a-list
(lais (vector-ref kit-v sx)) ; la-item list
(toks '()) ; kernel la-item LA tokens
(itms '()) ; all items
(gx #f) ; rule reduced by tl
(tl '())) ; LA-token list
(cond
((pair? tl) ;; add (token . p-rule) to reduction list
(let* ((tk (car tl)) (rp (assq tk ral)))
(cond
;; already have this, skip to next token
((and rp (memq gx (cdr rp)))
(iter ral lais toks itms gx (cdr tl)))
(rp
;; have token, add prule
(set-cdr! rp (cons gx (cdr rp)))
(iter ral lais toks itms gx (cdr tl)))
(else
;; add token w/ prule
(iter (cons (list tk gx) ral) lais toks itms gx (cdr tl))))))
((pair? itms)
(if (last-item? (car itms))
;; last item, add it
(iter ral lais toks (cdr itms) (caar itms) toks)
;; skip to next
(iter ral lais toks (cdr itms) 0 '())))
((pair? lais) ;; process next la-item
(iter ral (cdr lais) (cdar lais) (expand-k-item (caar lais)) 0 '()))
(else ral))))
;; I think the above is broken because I'm not including the proper tail
;; string. The following just uses closure to do the job. It works but
;; may not be very efficient: seems a bit brute force.
(define (new-reductions kit-v sx)
(let iter ((ral '()) ; result: reduction a-list
(klais (vector-ref kit-v sx)) ; kernel la-item list
(laits '()) ; all la-items
(gx #f) ; rule reduced by tl
(tl '())) ; LA-token list
(cond
((pair? tl) ;; add (token . p-rule) to reduction list
(let* ((tk (car tl)) (rp (assq tk ral)))
(cond
((and rp (memq gx (cdr rp)))
;; already have this, skip to next token
(iter ral klais laits gx (cdr tl)))
(rp
;; have token, add prule
(set-cdr! rp (cons gx (cdr rp)))
(iter ral klais laits gx (cdr tl)))
(else
;; add token w/ prule
(iter (cons (list tk gx) ral) klais laits gx (cdr tl))))))
((pair? laits) ;; process a la-itemset
(if (last-item? (caar laits))
;; last item, add it
(iter ral klais (cdr laits) (caaar laits) (cdar laits))
;; else skip to next
(iter ral klais (cdr laits) 0 '())))
((pair? klais) ;; expand next kernel la-item
;; There is a cheaper way than closure to do this but for now ...
(iter ral (cdr klais) (closure (list (car klais))) 0 '()))
(else
ral))))
(define reductions new-reductions)
;; Generate parse-action-table from the shift a-list and reduce a-list.
;; This is a helper for @code{step4}. It converts a list of state transitions
;; and a list of reductions into a parse-action table of shift, reduce,
;; accept, shift-reduce conflict or reduce-reduce conflict.
;; The actions take the form:
;; @example
;; (shift . <dst-state>)
;; (reduce . <rule-index>)
;; (accept . 0)
;; (srconf . (<dst-state> . <p-rule>))
;; (rrconf . <list of p-rules indices>)
;; @end example
;; If a shift has multiple reduce conflicts we report only one reduction.
(define (gen-pat sft-al red-al)
(let iter ((res '()) (sal sft-al) (ral red-al))
(cond
((pair? sal)
(let* ((term (caar sal)) ; terminal
(goto (cdar sal)) ; target state
(redp (assq term ral)) ; a-list entry, may be removed
;;(redl (if redp (cdr redp) #f))) ; reductions on terminal
(redl (and=> redp cdr))) ; reductions on terminal
(cond
((and redl (pair? (cdr redl)))
;; This means we have a shift-reduce and reduce-reduce conflicts.
;; We record only one shift-reduce and keep the reduce-reduce.
(iter (cons (cons* term 'srconf goto (car redl)) res)
(cdr sal) ral))
(redl
;; The terminal (aka token) signals a single reduction. This means
;; we have one shift-reduce conflict. We have a chance to repair
;; the parser using precedence/associativity rules so we remove the
;; reduction from the reduction-list.
(iter (cons (cons* term 'srconf goto (car redl)) res)
(cdr sal) (delete redp ral)))
(else
;; The terminal (aka token) signals a shift only.
(iter (cons (cons* term 'shift goto) res)
(cdr sal) ral)))))
((pair? ral)
(let ((term (caar ral)) (rest (cdar ral)))
;; We keep 'accept as explict action. Another option is to reduce and
;; have 0-th p-rule action generate return from parser (via prompt?).
(iter
(cons (cons term
(cond ;; => action and arg(s)
((zero? (car rest)) (cons 'accept 0))
((zero? (car rest)) (cons 'reduce (car rest)))
((> (length rest) 1) (cons 'rrconf rest))
(else (cons 'reduce (car rest)))))
res) sal (cdr ral))))
(else res))))
;; @deffn step4 p-mach-0 => p-mach-1
;; This generates the parse action table from the itemsets and then applies
;; precedence and associativity rules to eliminate shift-reduce conflicts
;; where possible. The output includes the parse action table (entry
;; @code{'pat-v} and TBD (list of errors in @code{'err-l}).
;;.per-state: alist by symbol:
;; (symb <id>) if <id> > 0 SHIFT to <id>, else REDUCE by <id> else
;; so ('$end . 0) means ACCEPT!
;; but 0 for SHIFT and REDUCE, but reduce 0 is really ACCEPT
;; if reduce by zero we are done. so never hit state zero accept on ACCEPT?
;; For each state, the element of pat-v looks like
;; ((tokenA . (reduce . 79)) (tokenB . (reduce . 91)) ... )
(define (step4 p-mach)
(define (setup-assc assc)
(fold (lambda (al seed)
(append (x-flip al) seed)) '() assc))
(define (setup-prec prec)
(let iter ((res '()) (rl '()) (hd '()) (pl '()) (pll prec))
(cond
((pair? pl)
(let* ((p (car pl)) (hdp (x-comb hd p))
(pp (remove (lambda (p) (eqv? (car p) (cdr p))) (x-comb p p))))
(iter res (append rl hdp pp) (car pl) (cdr pl) pll)))
((pair? rl) (iter (append res rl) '() hd pl pll))
((pair? pll) (iter res rl '() (car pll) (cdr pll)))
(else res))))
(define (prev-sym act its)
(let* ((a act)
(tok (car a)) (sft (caddr a)) (red (cdddr a))
;; @code{pit} is the end-item in the p-rule to be reduced.
(pit (prev-item (prev-item (cons red -1))))
;; @code{psy} is the last symbol in the p-rule to be reduced.
(psy (looking-at pit)))
psy))
(let* ((kis-v (assq-ref p-mach 'kis-v)) ; states
(kit-v (assq-ref p-mach 'kit-v)) ; la-toks
(kix-v (assq-ref p-mach 'kix-v)) ; transitions
(assc (assq-ref p-mach 'assc)) ; associativity rules
(assc (setup-assc assc)) ; trying it
(prec (assq-ref p-mach 'prec)) ; precedence rules
(prec (setup-prec prec)) ; trying it
(nst (vector-length kis-v)) ; number of states
(pat-v (make-vector nst '())) ; parse-act tab /state
(rat-v (make-vector nst '())) ; removed-act tab /state
(gen-pat-ix (lambda (ix) ; pat from shifts & reduc's
(gen-pat (vector-ref kix-v ix) (reductions kit-v ix))))
(prp-v (assq-ref p-mach 'prp-v)) ; per-rule precedence
(tl (assq-ref p-mach 'terminals)) ; for error msgs
)
;; We run through each itemset.
;; @enumerate
;; @item We have a-list of symbols to shift state (i.e., @code{kix-v}).
;; @item We generate a list of tokens to reduction from @code{kit-v}.
;; @end enumerate
;; Q: should '$end be edited out of shifts?
;; kit-v is vec of a-lists of form ((item tok1 tok2 ...) ...)
;; turn to (tok1 item1 item2 ...)
(let iter ((ix 0) ; state index
(pat '()) ; parse-action table
(rat '()) ; removed-action table
(wrn '()) ; warnings on unsolicited removals
(ftl '()) ; fatal conflicts
(actl (gen-pat-ix 0))) ; action list
(cond
((pair? actl)
(case (cadar actl)
((shift reduce accept)
(iter ix (cons (car actl) pat) rat wrn ftl (cdr actl)))
((srconf)
(let* ((act (car actl))
(tok (car act)) (sft (caddr act)) (red (cdddr act))
(prp (vector-ref prp-v red))
(psy (prev-sym act (vector-ref kis-v ix)))
(preced (or (and prp (prece prp tok prec)) ; rule-based
(prece psy tok prec))) ; oper-based
(sft-a (cons* tok 'shift sft))
(red-a (cons* tok 'reduce red)))
(call-with-values
(lambda ()
;; Use precedence or, if =, associativity.
(case preced
((#\>)
(values red-a (cons sft-a 'pre) #f #f))
((#\<)
(values sft-a (cons red-a 'pre) #f #f))
((#\=) ;; Now use associativity
(case (assq-ref assc tok)
((left)
(values red-a (cons sft-a 'ass) #f #f))
((right)
(values sft-a (cons red-a 'ass) #f #f))
((nonassoc)
(values (cons* tok 'error red) #f #f (cons ix act)))
(else
(values sft-a (cons red-a 'def) (cons ix act) #f))))
(else ;; Or default, which is shift.
(values sft-a (cons red-a 'def) (cons ix act) #f))))
(lambda (a r w f)
(iter ix
(if a (cons a pat) pat)
(if r (cons r rat) rat)
(if w (cons w wrn) wrn)
(if f (cons f ftl) ftl)
(cdr actl))))))
((rrconf)
#;(fmterr "*** reduce-reduce conflict: in state ~A on ~A: ~A\n"
ix (obj->str (find-terminal (caar actl) tl)) (cddar actl))
(iter ix (cons (car actl) pat) rat wrn
(cons (cons ix (car actl)) ftl) (cdr actl)))
(else
(error "PROBLEM"))))
((null? actl)
(vector-set! pat-v ix pat)
(vector-set! rat-v ix rat)
(iter ix pat rat wrn ftl #f))
((< (1+ ix) nst)
(iter (1+ ix) '() '() wrn ftl (gen-pat-ix (1+ ix))))
(else
(let* ((attr (assq-ref p-mach 'attr))
(expect (assq-ref attr 'expect))) ; expected # srconf
(if (not (= (length wrn) expect))
(for-each (lambda (m) (fmterr "+++ warning: ~A\n" (conf->str m)))
(reverse wrn)))
(for-each
(lambda (m) (fmterr "*** fatal: ~A\n" (conf->str m)))
(reverse ftl))
))))
;; Return mach with parse-action and removed-action tables.
(cons* (cons 'pat-v pat-v) (cons 'rat-v rat-v) p-mach)))
;; @deffn conf->str cfl => string
;; map conflict (e.g., @code{('rrconf 1 . 2}) to string.
(define (conf->str cfl)
(let* ((st (list-ref cfl 0)) (tok (list-ref cfl 1)) (typ (list-ref cfl 2))
(core (fluid-ref *lalr-core*)) (terms (core-terminals core)))
(fmtstr "in state ~A, ~A conflict on ~A"
st
(case typ
((srconf) "shift-reduce")
((rrconf) "reduce-reduce")
(else "unknown"))
(obj->str (find-terminal tok terms)))))
;; @deffn gen-match-table mach => mach
;; Generate the match-table for a machine. The match table is a list of
;; pairs: the car is the token used in the grammar specification, the cdr
;; is the symbol that should be returned by the lexical analyzer.
;;
;; The match-table may be passed to
;; the lexical analyzer builder to identify strings or string-types as tokens.
;; The associated key in the machine is @code{mtab}.
;; @enumerate
;; @item
;; @sc{nyacc}-reserved symbols are provided as symbols
;; @example
;; $ident -> ($ident . $ident)
;; @end example
;; @item
;; Terminals used as symbols (@code{'comment} versus @code{"comment"}) are
;; provided as symbols. The spec parser will provide a warning if symbols
;; are used in both ways.
;; @item
;; Others are provided as strings.
;; @end enumerate
;; The procedure @code{hashify-machine} will convert the cdrs to integers.
;; Test: "$abc" => ("$abc" '$abc) '$abc => ('$abc . '$abc)
(define (gen-match-table mach)
(cons
(cons 'mtab (map (lambda (term) (cons term (atomize term)))
(assq-ref mach 'terminals)))
mach))
;; @deffn add-recovery-logic! mach => mach
;; Target of transition from @code{'$error} should have a default rule that
;; loops back.
(define (add-recovery-logic-1 mach)
(let* ((kis-v (assq-ref mach 'kis-v))
(rhs-v (assq-ref mach 'rhs-v))
(pat-v (assq-ref mach 'pat-v))
(n (vector-length pat-v))
)
(vector-for-each
(lambda (kx kis)
;;(fmtout "kis=~S\n " kis)
(for-each
(lambda (ki)
(let* ((pi (prev-item ki))
(rhs (vector-ref rhs-v (car pi))))
(when (and (not (negative? (cdr pi)))
(eqv? '$error (looking-at pi)))
(vector-set! pat-v kx
(append
(vector-ref pat-v kx)
`(($default shift . ,kx))))
#;(fmtout " => ~S\n" (vector-ref pat-v kx)))))
kis)
;;(fmtout "\n")
#f)
kis-v)
mach))
(define (add-recovery-logic! mach)
(let ((prev-core (fluid-ref *lalr-core*)))
(dynamic-wind
(lambda () (fluid-set! *lalr-core* (make-core/extras mach)))
(lambda () (add-recovery-logic-1 mach))
(lambda () (fluid-set! *lalr-core* prev-core)))))
;; to build parser, need:
;; pat-v - parse action table
;; ref-v - references
;; len-v - rule lengths
;; rto-v - hashed lhs symbols
;; to print itemsets need:
;; kis-v - itemsets
;; lhs-v - left hand sides
;; rhs-v - right hand sides
;; pat-v - action table
;; @deffn restart-spec spec start => spec
;; This generates a new spec with a different start.
;; @example
;; (restart-spec clang-spec 'expression) => cexpr-spec
;; @end example
(define (restart-spec spec start)
(let* ((rhs-v (vector-copy (assq-ref spec 'rhs-v))))
(vector-set! rhs-v 0 (vector start))
(cons* (cons 'start start)
(cons 'rhs-v rhs-v)
spec)))
;; @deffn make-lalr-machine spec => pgen
;; Generate a-list of items used for building/debugging parsers.
;; It might be useful to add hashify and compact with keyword arguments.
(define (make-lalr-machine spec)
(if (not spec) (error "make-lalr-machine: expecting valid specification"))
(let ((prev-core (fluid-ref *lalr-core*)))
(dynamic-wind
(lambda () (fluid-set! *lalr-core* (make-core/extras spec)))
(lambda ()
(let* ((sm1 (step1 spec))
(sm2 (step2 sm1))
(sm3 (step3 sm2))
(sm4 (step4 sm3))
(sm5 (gen-match-table sm4)))
(cons*
(cons 'len-v (vector-map (lambda (i v) (vector-length v))
(assq-ref sm5 'rhs-v)))
(cons 'rto-v (vector-copy (assq-ref sm5 'lhs-v)))
sm5)))
(lambda () (fluid-set! *lalr-core* prev-core)))))
;; for debugging
(define (make-LR0-machine spec)
(if (not spec) (error "make-LR0-machine: expecting valid specification"))
(let ((prev-core (fluid-ref *lalr-core*)))
(dynamic-wind
(lambda () (fluid-set! *lalr-core* (make-core/extras spec)))
(lambda () (step1 spec))
(lambda () (fluid-set! *lalr-core* prev-core)))))
;; @deffn with-spec spec proc arg ...
;; Execute with spec or mach.
(define (with-spec spec proc . args)
(if (not spec) (error "with-spec: expecting valid specification"))
(let ((prev-core (fluid-ref *lalr-core*)))
(dynamic-wind
(lambda () (fluid-set! *lalr-core* (make-core/extras spec)))
(lambda () (apply proc args))
(lambda () (fluid-set! *lalr-core* prev-core)))))
;; @deffn lalr-match-table mach => match-table
;; Get the match-table
(define (lalr-match-table mach)
(assq-ref mach 'mtab))
;; @deffn machine-compacted? mach => #t|#f
;; Indicate if the machine has been compacted.
;; TODO: needs update to deal with error recovery hooks.
(define (machine-compacted? mach)
;; Works by searching for $default phony-token.
(call-with-prompt 'got-it
;; Search for '$default. If not found return #f.
(lambda ()
(vector-for-each
(lambda (ix pat)
(for-each
(lambda (a) (if (or (eqv? (car a) '$default) (eqv? (car a) -1))
(abort-to-prompt 'got-it)))
pat))
(assq-ref mach 'pat-v))
#f)
;; otherwise, return #t.
(lambda () #t)))
;; @deffn compact-machine mach [#:keep 3] => mach
;; A "filter" to compact the parse table. For each state this will replace
;; the most populus set of reductions of the same production rule with a
;; default production. However, reductions triggered by keepers like
;; @code{'$error}, @code{'$lone-comm} or @code{'$lone-comm} are not counted.
;; The parser will want to treat errors and comments separately so that they
;; can be trapped (e.g., unaccounted comments are skipped).
(define* (compact-machine mach #:key (keep 3))
(let* ((pat-v (assq-ref mach 'pat-v))
(nst (vector-length pat-v))
(hashed (number? (caar (vector-ref pat-v 0)))) ; been hashified?
(reduce? (if hashed
(lambda (a) (and (number? a) (negative? a)))
(lambda (a) (eq? 'reduce (car a)))))
(reduce-pr (if hashed abs cdr))
(reduce-to? (if hashed
(lambda (a r) (eqv? (- r) a))
(lambda (a r) (and (eq? 'reduce (car a))
(eqv? r (cdr a))))))
(mk-default (if hashed
(lambda (r) (cons -1 (- r)))
(lambda (r) `($default reduce . ,r))))
(mtab (assq-ref mach 'mtab))
(keepers (list (assq-ref mtab '$lone-comm)
(assq-ref mtab '$code-comm)
(assq-ref mtab '$error))))
;; Keep an a-list mapping reduction prod-rule => count.
(let iter ((sx nst) (trn-l #f) (cnt-al '()) (p-max '(0 . 0)))
(cond
((pair? trn-l)
(cond
((not (reduce? (cdar trn-l)))
;; A shift, so not a candidate for default reduction.
(iter sx (cdr trn-l) cnt-al p-max))
((memq (caar trn-l) keepers)
;; Don't consider keepers because these will not be included.
(iter sx (cdr trn-l) cnt-al p-max))
(else
;; A reduction, so update the count for reducing this prod-rule.
(let* ((ix (reduce-pr (cdar trn-l)))
(cnt (1+ (or (assq-ref cnt-al ix) 0)))
(cnt-p (cons ix cnt)))
(iter sx (cdr trn-l) (cons cnt-p cnt-al)
(if (> cnt (cdr p-max)) cnt-p p-max))))))
((null? trn-l)
;; We have processed all transitions. If more than @code{keep} common
;; reductions then generate default rule to replace those.
(if (> (cdr p-max) keep)
(vector-set!
pat-v sx
(fold-right
(lambda (trn pat) ;; transition action
;; If not a comment and reduces to the most-popular prod-rule
;; then transfer to the default transition.
(if (and (not (memq (car trn) keepers))
(reduce-to? (cdr trn) (car p-max)))
pat
(cons trn pat)))
(list (mk-default (car p-max))) ;; default is last
(vector-ref pat-v sx))))
(iter sx #f #f #f))
((positive? sx) ;; next state
(iter (1- sx) (vector-ref pat-v (1- sx)) '() '(0 . 0)))))
mach))
;;.@section Using hash tables
;; The lexical analyzer will generate tokens. The parser generates state
;; transitions based on these tokens. When we build a lexical analyzer
;; (via @code{make-lexer}) we provide a list of strings to detect along with
;; associated tokens to return to the parser. By default the tokens returned
;; are symbols or characters. But these could as well be integers. Also,
;; the parser uses symbols to represent non-terminals, which are also used
;; to trigger state transitions. We could use integers instead of symbols
;; and characters by mapping via a hash table. We will bla bla bla.
;; There are also standard tokens we need to worry about. These are
;; @enumerate
;; @item the @code{$end} marker
;; @item identifiers (using the symbolic token @code{$ident}
;; @item non-negative integers (using the symbolic token @code{$fixed})
;; @item non-negative floats (using the symbolic token @code{$float})
;; @item @code{$default} => 0
;; @end enumerate
;; And action
;; @enumerate
;; @item positive => shift
;; @item negative => reduce
;; @item zero => accept
;; @end enumerate
;; However, if these are used they should appear in the spec's terminal list.
;; For the hash table we use positive integers for terminals and negative
;; integers for non-terminals. To apply such a hash table we need to:
;; @enumerate
;; @item from the spec's list of terminals (aka tokens), generate a list of
;; terminal to integer pairs (and vice versa)
;; @item from the spec's list of non-terminals generate a list of symbols
;; to integers and vice versa.
;; @item Go through the parser-action table and convert symbols and characters
;; to integers
;; @item Go through the XXX list passed to the lexical analyizer and replace
;; symbols and characters with integers.
;; @end enumerate
;; One issue we need to deal with is separating out the identifier-like
;; terminals (aka keywords) from those that are not identifier-like. I guess
;; this should be done as part of @code{make-lexer}, by filtering the token
;; list through the ident-reader.
;; NOTE: The parser is hardcoded to assume that the phony token for the
;; default (reduce) action is @code{'$default} for unhashed machine or
;; @code{-1} for a hashed machine.
;; NEW: need to add reduction of ERROR
;; @deffn machine-hashed? mach => #t|#f
;; Indicate if the machine has been hashed.
(define (machine-hashed? mach)
;; If hashed, the parse action for rule 0 will always be a number.
(number? (caar (vector-ref (assq-ref mach 'pat-v) 0))))
;; @deffn hashify-machine mach => mach
(define (hashify-machine mach)
(if (machine-hashed? mach) mach
(let* ((terminals (assq-ref mach 'terminals))
(non-terms (assq-ref mach 'non-terms))
(lhs-v (assq-ref mach 'lhs-v))
(sm ;; = (cons sym->int int->sym)
(let iter ((si (list (cons '$default -1)))
(is (list (cons -1 '$default)))
(ix 1) (tl terminals) (nl non-terms))
(if (null? nl) (cons (reverse si) (reverse is))
(let* ((s (atomize (if (pair? tl) (car tl) (car nl))))
(tl1 (if (pair? tl) (cdr tl) tl))
(nl1 (if (pair? tl) nl (cdr nl))))
(iter (acons s ix si) (acons ix s is) (1+ ix) tl1 nl1)))))
(sym->int (lambda (s) (assq-ref (car sm) s)))
;;
(pat-v0 (assq-ref mach 'pat-v))
(npat (vector-length pat-v0))
(pat-v1 (make-vector npat '())))
;; replace symbol/chars with integers
(let iter1 ((ix 0))
(unless (= ix npat)
(let iter2 ((al1 '()) (al0 (vector-ref pat-v0 ix)))
(if (null? al0) (vector-set! pat-v1 ix (reverse al1))
(let* ((a0 (car al0))
;; tk: token; ac: action; ds: destination
(tk (car a0)) (ac (cadr a0)) (ds (cddr a0))
;; t: encoded token; d: encoded destination
(t (sym->int tk))
(d (case ac
((shift) ds) ((reduce) (- ds))
((accept) 0) (else #f))))
(unless t
(fmterr "~S ~S ~S\n" tk ac ds)
(error "expect something"))
(iter2 (acons t d al1) (cdr al0)))))
(iter1 (1+ ix))))
;;
(cons*
(cons 'pat-v pat-v1)
(cons 'siis sm) ;; sm = (cons sym->int int->sym)
(cons 'mtab
(let iter ((mt1 '()) (mt0 (assq-ref mach 'mtab)))
(if (null? mt0) (reverse mt1)
(iter (cons (cons (caar mt0) (sym->int (cdar mt0))) mt1)
(cdr mt0)))))
;; reduction symbols = lhs:
(cons 'rto-v (vector-map (lambda (i v) (sym->int v)) lhs-v))
mach))))
;; === grammar/machine printing ======
;; @deffn elt->str elt terms => string
(define (elt->str elt terms)
(or (and=> (find-terminal elt terms) obj->str)
(symbol->string elt)))
;; @deffn pp-rule indent gx [port]
;; Pretty-print a production rule.
(define (pp-rule il gx . rest)
(let* ((port (if (pair? rest) (car rest) (current-output-port)))
(core (fluid-ref *lalr-core*))
(lhs (vector-ref (core-lhs-v core) gx))
(rhs (vector-ref (core-rhs-v core) gx))
(tl (core-terminals core)))
(display (substring " " 0 (min il 20)) port)
(fmt port "~A =>" lhs)
(vector-for-each (lambda (ix e) (fmt port " ~A" (elt->str e tl))) rhs)
(newline port)))
;; @deffn pp-item item => string
;; This could be called item->string.
;; This needs terminals to work correctly, like pp-lalr-grammar.
(define (pp-item item)
(let* ((core (fluid-ref *lalr-core*))
(tl (core-terminals core))
(gx (car item))
(lhs (vector-ref (core-lhs-v core) gx))
(rhs (vector-ref (core-rhs-v core) gx))
(rhs-len (vector-length rhs)))
(apply
string-append
(let iter ((rx 0) (sl (list (fmtstr "~S =>" lhs))))
(if (= rx rhs-len)
(append sl (if (= -1 (cdr item)) '(" .") '()))
(iter (1+ rx)
(append
sl (if (= rx (cdr item)) '(" .") '())
(let ((e (vector-ref rhs rx)))
(list (string-append " " (elt->str e tl)))))))))))
;; @deffn pp-lalr-notice spec [port]
(define (pp-lalr-notice spec . rest)
(let* ((port (if (pair? rest) (car rest) (current-output-port)))
(notice (assq-ref (assq-ref spec 'attr) 'notice))
(lines (if notice (string-split notice #\newline) '())))
(for-each (lambda (l) (simple-format port " ~A\n" l)) lines)
(newline)))
;; @deffn pp-lalr-grammar spec [port]
;; Pretty-print the grammar to the specified port, or current output.
(define (pp-lalr-grammar spec . rest)
(let* ((port (if (pair? rest) (car rest) (current-output-port)))
(lhs-v (assq-ref spec 'lhs-v))
(rhs-v (assq-ref spec 'rhs-v))
(nrule (vector-length lhs-v))
(act-v (assq-ref spec 'act-v))
;;(prp-v (assq-ref mach 'prp-v)) ; per-rule precedence
(terms (assq-ref spec 'terminals))
(prev-core (fluid-ref *lalr-core*)))
(fluid-set! *lalr-core* (make-core spec)) ; OR dynamic-wind ???
;; Print out the grammar.
(do ((i 0 (1+ i))) ((= i nrule))
(let* ((lhs (vector-ref lhs-v i)) (rhs (vector-ref rhs-v i)))
(if #f
(pp-rule 0 i)
(begin
(fmt port "~A ~A =>" i lhs)
(vector-for-each
(lambda (ix e) (fmt port " ~A" (elt->str e terms)))
rhs)
;;(fmt port "\t~S" (vector-ref act-v i))
(newline port)))))
(newline port)
(fluid-set! *lalr-core* prev-core)))
;; @deffn pp-lalr-machine mach [port]
;; Print the states of the parser with items and shift/reduce actions.
(define (pp-lalr-machine mach . rest)
(let* ((port (if (pair? rest) (car rest) (current-output-port)))
(lhs-v (assq-ref mach 'lhs-v))
(rhs-v (assq-ref mach 'rhs-v))
(nrule (vector-length lhs-v))
(pat-v (assq-ref mach 'pat-v))
(rat-v (assq-ref mach 'rat-v))
(kis-v (assq-ref mach 'kis-v))
(kit-v (assq-ref mach 'kit-v))
(nst (vector-length kis-v)) ; number of states
(i->s (or (and=> (assq-ref mach 'siis) cdr) '()))
(terms (assq-ref mach 'terminals))
(prev-core (fluid-ref *lalr-core*)))
(fluid-set! *lalr-core* (make-core mach))
;; Print out the itemsets and shift reduce actions.
(do ((i 0 (1+ i))) ((= i nst))
(let* ((state (vector-ref kis-v i))
(pat (vector-ref pat-v i))
(rat (if rat-v (vector-ref rat-v i) '())))
(fmt port "~A:" i) ; itemset index (aka state index)
(for-each
(lambda (k-item)
(for-each ; item, print it
(lambda (item)
(fmt port "\t~A" (pp-item item))
;; show lookaheads:
(if (and #f (negative? (cdr item)) kit-v (equal? item k-item))
(fmt port " ~A"
(map (lambda (tok) (elt->str tok terms))
(assoc-ref (vector-ref kit-v i) k-item))))
(fmt port "\n"))
(expand-k-item k-item)))
state)
(for-each ; action, print it
(lambda (act)
(if (pair? (cdr act))
(let ((sy (car act)) (pa (cadr act)) (gt (cddr act)))
(case pa
((srconf)
(fmt port "\t\t~A => CONFLICT: shift ~A, reduce ~A\n"
(elt->str sy terms) (car gt) (cdr gt)))
((rrconf)
(fmt port "\t\t~A => CONFLICT: reduce ~A\n"
(elt->str sy terms)
(string-join (map number->string gt) ", reduce ")))
(else
(fmt port "\t\t~A => ~A ~A\n" (elt->str sy terms) pa gt))))
(let* ((sy (car act)) (p (cdr act))
(pa (cond ((eq? #f p) 'CONFLICT)
((positive? p) 'shift)
((negative? p) 'reduce)
(else 'accept)))
(gt (and=> p abs)))
(fmt port "\t\t~A => ~A ~A\n"
(elt->str (assq-ref i->s sy) terms)
pa gt))))
pat)
(for-each ; action, print it
(lambda (ra)
;; FIX: should indicate if precedence removed user rule or default
(fmt port "\t\t[~A => ~A ~A] REMOVED by ~A\n"
(elt->str (caar ra) terms) (cadar ra) (cddar ra)
(case (cdr ra)
((pre) "precedence")
((ass) "associativity")
((def) "default shift")
(else (cdr ra)))))
rat)
(newline)))
(fluid-set! *lalr-core* prev-core)
(values)))
;; === output routines ===============
(use-modules (ice-9 pretty-print))
(use-modules (ice-9 regex))
(define (write-notice mach port)
(let* ((comm-leader ";; ")
(notice (assq-ref (assq-ref mach 'attr) 'notice))
(lines (if notice (string-split notice #\newline) '())))
(for-each
(lambda (l) (fmt port "~A~A\n" comm-leader l))
lines)
(if (pair? lines) (newline port))))
(define (string-sub str pat repl)
(let ((m (string-match pat str)))
(if m
(regexp-substitute #f m 'pre repl 'post)
str)))
;; @deffn write-lalr-tables mach filename [#:lang output-lang]
;; For example,
;; @example
;; write-lalr-tables mach "tables.scm"
;; write-lalr-tables mach "tables.tcl" #:lang 'tcl
;; @end example
(define* (write-lalr-tables mach filename #:key (lang 'scheme))
(define (write-table mach name port)
(fmt port "(define ~A\n " name)
(ugly-print (assq-ref mach name) port)
(fmt port ")\n\n"))
(call-with-output-file filename
(lambda (port)
(fmt port ";; ~A\n\n" (string-sub filename ".new$" ""))
(write-notice mach port)
(write-table mach 'len-v port)
(write-table mach 'pat-v port)
(write-table mach 'rto-v port)
(write-table mach 'mtab port)
(display ";;; end tables" port)
(newline port))))
;; @deffn write-lalr-actions mach filename [#:lang output-lang]
;; For example,
;; @example
;; write-lalr-actions mach "actions.scm"
;; write-lalr-actions mach "actions.tcl" #:lang 'tcl
;; @end example
(define* (write-lalr-actions mach filename #:key (lang 'scheme))
(define (pp-rule/ts gx)
(let* ((core (fluid-ref *lalr-core*))
(lhs (vector-ref (core-lhs-v core) gx))
(rhs (vector-ref (core-rhs-v core) gx))
(tl (core-terminals core))
(line (string-append
(symbol->string lhs) " => "
(string-join
(map (lambda (elt) (elt->str elt tl))
(vector->list rhs))
" "))))
(if (> (string-length line) 72)
(string-append (substring/shared line 0 69) "...")
line)))
(define (NEW-pp-rule/ts gx)
;; TBD: use start for zeroth rule
(let* ((core (fluid-ref *lalr-core*))
(lhs (vector-ref (core-lhs-v core) gx))
(rhs (vector-ref (core-rhs-v core) gx))
(tl (core-terminals core))
(line (string-append
(symbol->string lhs) " => "
(string-join
(map (lambda (elt) (elt->str elt tl))
(vector->list rhs))
" "))))
(if (> (string-length line) 72)
(string-append (substring/shared line 0 69) "...")
line)))
(define (write-actions mach port)
(with-fluid*
*lalr-core* (make-core mach)
(lambda ()
(fmt port "(define act-v\n (vector\n")
(vector-for-each
(lambda (gx actn)
(fmt port " ;; ~A\n" (pp-rule/ts gx))
(pretty-print (wrap-action actn) port #:per-line-prefix " "))
(assq-ref mach 'act-v))
(fmt port " ))\n\n"))))
(call-with-output-file filename
(lambda (port)
(fmt port ";; ~A\n\n" (string-sub filename ".new$" ""))
(write-notice mach port)
(write-actions mach port)
(display ";;; end tables" port)
(newline port))))
;; @end itemize
;;; --- last line ---