nyacc: C99 CPP for code is better

This commit is contained in:
Matt Wette 2017-02-05 07:52:44 -08:00 committed by Jan Nieuwenhuizen
parent e534225328
commit d03ea06c84
11 changed files with 641 additions and 526 deletions

View file

@ -53,10 +53,10 @@
(parse-c99
#:inc-dirs (string-split (getenv "C_INCLUDE_PATH") #\:)
#:cpp-defs '(
("__GNUC__" . "0")
("__NYACC__" . "1")
("VERSION" . "0.4")
("PREFIX" . "\"\"")
"__GNUC__=0"
"__NYACC__=1"
"VERSION=\"0.4\""
"PREFIX=\"\""
)
#:xdef? gnuc-xdef?
#:mode 'code

View file

@ -15,97 +15,21 @@
;;; You should have received a copy of the GNU General Public License
;;; along with this program. If not, see <http://www.gnu.org/licenses/>.
;; Notes on the code design may be found in doc/nyacc/lang/c99-hg.info
;; @section The C99 Parser Body
;; This code provides the front end to the C99 parser, including the lexical
;; analyzer and optional CPP processing. In @code{'file} mode the lex'er
;; passes CPP statements to the parser; in @code{'code} mode the lex'er
;; parses and evaluates the CPP statements. In the case of included files
;; (e.g., via @code{#include <file.h>}) the include files are parsed if
;; not in the @code{td-dict}. The @code{td-dict} is a dictionary that maps
;; include file names to typedefs (e.g., @code{stdio.h} to @code{FILE}).
;; not in @code{inc-help}. The a-list @code{inc-help} maps
;; include file names to typenames (e.g., @code{stdio.h} to @code{FILE}) and
;; CPP defines (e.g., "INT_MAX=12344").
(use-modules ((srfi srfi-9) #:select (define-record-type)))
(use-modules ((sxml xpath) #:select (sxpath)))
(define c99-std-dict
'(("alloca.h")
("complex.h" "complex" "imaginary")
("ctype.h")
("fenv.h" "fenv_t" "fexcept_t")
("float.h" "float_t")
("inttypes.h"
"int8_t" "uint8_t" "int16_t" "uint16_t" "int32_t" "uint32_t"
"int64_t" "uint64_t" "uintptr_t" "intptr_t" "intmax_t" "uintmax_t"
"int_least8_t" "uint_least8_t" "int_least16_t" "uint_least16_t"
"int_least32_t" "uint_least32_t" "int_least64_t" "uint_least64_t"
"imaxdiv_t")
("limits.h")
("math.h")
("regex.h" "regex_t" "regmatch_t")
("setjmp.h" "jmp_buf")
("signal.h" "sig_atomic_t")
("stdarg.h" "va_list")
("stddef.h" "ptrdiff_t" "size_t" "wchar_t")
("stdint.h"
"int8_t" "uint8_t" "int16_t" "uint16_t" "int32_t" "uint32_t"
"int64_t" "uint64_t" "uintptr_t" "intptr_t" "intmax_t" "uintmax_t"
"int_least8_t" "uint_least8_t" "int_least16_t" "uint_least16_t"
"int_least32_t" "uint_least32_t" "int_least64_t" "uint_least64_t")
("stdio.h" "FILE" "size_t")
("stdlib.h" "div_t" "ldiv_t" "lldiv_t" "wchar_t")
("string.h" "size_t")
("strings.h" "size_t")
("time.h" "time_t" "clock_t" "size_t")
("unistd.h" "size_t" "ssize_t" "div_t" "ldiv_t")
("wchar.h" "wchar_t" "wint_t" "mbstate_t" "size_t")
("wctype.h" "wctrans_t" "wctype_t" "wint_t")
))
;; @subsubsection CPP if-then-else Logic Block (ITLB) Processing
;; The parser needs to have a "CPI" (CPP processing info) stack to deal with
;; types (re)defined in multiple branches of a #if...#endif statement chain.
;; If we are in "code" mode then we may be skipping code so need to track
;; when to shift and when not to.
;;
;; The state is contained in a stack @code{ppxs}
;; States are
;; @table code
;; @item skip-done
;; skip code
;; @item skip-look
;; skipping code, but still looking for true at this level
;; @item keep
;; keep code
;; @item skip1-pop
;; skip one token and pop skip-stack
;; @end table
;; Also, if we want to pass on all the sections of an ITLB to the parser
;; we need to remove typedef names because a typedef may appear multiple
;; times, as in
;; @example
;; #ifdef SIXTYFOURBIT
;; typedef short int32_t;
;; #else
;; typedef long int32_t;
;; #endif
;; @end example
;; @noindent
;; To achieve this we keep a stack of valid typedefs. On @code{#if} we push,
;; on @code{#elif} we shift (i.e., pop, then push) and on @code{#endif} we pop.
;;
;; The grammar looks like
;; @example
;; (code
;; ("if" cond code "endif")
;; ("if" cond code "else" code "endif")
;; ("if" cond code elif-list "endif")
;; ("if" cond code elif-list "else" code "endif")
;; (other))
;; (elif-list
;; ("elif" cond code)
;; (elif-list "elif" cond code))
;; @end example
;; @noindent
(use-modules (ice-9 regex))
(define-record-type cpi
(make-cpi-1)
@ -113,28 +37,73 @@
(debug cpi-debug set-cpi-debug!) ; debug #t #f
(defines cpi-defs set-cpi-defs!) ; #defines
(incdirs cpi-incs set-cpi-incs!) ; #includes
(tn-dict cpi-tynd set-cpi-tynd!) ; typename dict (("<x>" foo_t ..
(inc-tynd cpi-itynd set-cpi-itynd!) ; a-l of incfile => typenames
(inc-defd cpi-idefd set-cpi-idefd!) ; a-l of incfile => defines
(ptl cpi-ptl set-cpi-ptl!) ; parent typename list
(ctl cpi-ctl set-cpi-ctl!) ; current typename list
)
(define (make-cpi debug defines incdirs tn-dict)
;;.@deffn split-cppdef defstr => (<name> . <repl>)|((<name> <args> . <repl>)|#f
;; Convert define string to a dict item. Examples:
;; @example
;; "ABC=123" => '("ABC" . "123")
;; "MAX(X,Y)=((X)>(Y)?(X):(Y))" => ("MAX" ("X" "Y") . "((X)>(Y)?(X):(Y))")
;; @end example
(define split-cppdef
(let ((rx1 (make-regexp "^([A-Za-z0-9_]+)\\([^)]*\\)=(.*)$"))
(rx2 (make-regexp "^([A-Za-z0-9_]+)=(.*)$")))
(lambda (defstr)
(let* ((m1 (regexp-exec rx1 defstr))
(m2 (or m1 (regexp-exec rx2 defstr))))
(cond
((regexp-exec rx1 defstr) =>
(lambda (m)
(let* ((s1 (match:substring m1 1))
(s2 (match:substring m1 2))
(s3 (match:substring m1 3)))
(cons s1 (cons s2 s3)))))
((regexp-exec rx2 defstr) =>
(lambda (m)
(let* ((s1 (match:substring m2 1))
(s2 (match:substring m2 2)))
(cons s1 s2))))
(else #f))))))
;; @deffn make-cpi debug defines incdirs inchelp
(define (make-cpi debug defines incdirs inchelp)
;; convert inchelp into inc-file->typenames and inc-file->defines
;; Any entry for an include file which contains `=' is considered
;; a define; otherwise, the entry is a typename.
(define (split-helper helper)
(let ((file (car helper)))
(let iter ((tyns '()) (defs '()) (ents (cdr helper)))
(cond
((null? ents) (values (cons file tyns) (cons file defs)))
((split-cppdef (car ents)) =>
(lambda (def) (iter tyns (cons def defs) (cdr ents))))
(else (iter (cons (car ents) tyns) defs (cdr ents)))))))
(let* ((cpi (make-cpi-1)))
(set-cpi-debug! cpi debug) ; print states debug
(set-cpi-defs! cpi defines) ; list of define strings??
(set-cpi-incs! cpi incdirs) ; list of include dir's
(set-cpi-tynd! cpi tn-dict) ; typename dict by include-file name
(set-cpi-ptl! cpi '()) ; list of lists of typedef strings
(set-cpi-ctl! cpi '()) ; list of typedef strings
(set-cpi-ptl! cpi '()) ; list of lists of typenames
(set-cpi-ctl! cpi '()) ; list of typenames
;; itynd idefd:
(let iter ((itynd '()) (idefd '()) (helpers inchelp))
(cond ((null? helpers)
(set-cpi-itynd! cpi itynd)
(set-cpi-idefd! cpi idefd))
(else
(call-with-values
(lambda () (split-helper (car helpers)))
(lambda (ityns idefs)
(iter (cons ityns itynd) (cons idefs idefd) (cdr helpers)))))))
cpi))
(define *info* (make-fluid #f))
;; given tyns
;; cadr is next level
;; caar is list of sibs
;; search (caar car tyns), then (caar cadr tyns), then ...
;; @deffn typename? name
;; Called by lexer to determine if symbol is a typename.
;; Check current sibling for each generation.
@ -297,8 +266,11 @@
;; Return the first (tval . lval) pair not excluded by the CPP.
(lambda ()
(define (exec-cpp-stmts?) ; exec (vs pass to parser) CPP stmts?
(define (exec-cpp?) ; exec (vs pass to parser) CPP stmts?
(eqv? mode 'code))
(define (cpp-flow? keyw)
(memq keyw '(if elif else)))
(define (add-define tree)
(let* ((tail (cdr tree))
@ -323,43 +295,69 @@
(lambda (key fmt . args)
(report-error fmt args)
(throw 'c99-error "CPP error"))))
(define (eval-cpp-stmt stmt)
(define (inc-stmt->file stmt)
(let* ((arg (cadr stmt)) (len (string-length arg)))
(substring arg 1 (1- len))))
(define (inc-file->path file)
(find-file-in-dirl file (cpi-incs info)))
(define (eval-cpp-stmt-1 stmt)
(case (car stmt)
;; includes
((include)
(let* ((parg (cadr stmt)) (leng (string-length parg))
(file (substring parg 1 (1- leng)))
(path (find-file-in-dirl file (cpi-incs info)))
(tynd (assoc-ref (cpi-tynd info) file)))
(tyns (assoc-ref (cpi-itynd info) file))
(defs (assoc-ref (cpi-idefd info) file))
)
(cond
(tynd (for-each add-typename tynd)) ; in dot-h dict
((not path) (p-err "not found: ~S" file))
((exec-cpp-stmts?) (push-input (open-input-file path)))
(else ; include as tree
(tyns ; use include helper
(for-each add-typename tyns)
(set-cpi-defs! info (append defs (cpi-defs info))))
((not path) ; file not found
(p-err "not found: ~S" file))
((exec-cpp?) ; include in-place
(push-input (open-input-file path)))
(else ; include as tree
(let* ((tree (with-input-from-file path run-parse)))
(if (not tree) (p-err "included from ~S" path))
(for-each add-define (xp1 tree)) ; add def's
(set! stmt (append stmt (list tree)))))))
(if (exec-cpp-stmts?) (set! ppxs (cons 'skip1-pop ppxs))))
(if (exec-cpp?) (set! ppxs (cons 'skip1-pop ppxs))))
((define)
(add-define stmt)
(if (exec-cpp-stmts?) (set! ppxs (cons 'skip1-pop ppxs))))
(if (exec-cpp?) (set! ppxs (cons 'skip1-pop ppxs))))
((undef)
(rem-define (cadr stmt))
(if (exec-cpp-stmts?) (set! ppxs (cons 'skip1-pop ppxs))))
(if (exec-cpp?) (set! ppxs (cons 'skip1-pop ppxs))))
((error)
(if (exec-cpp-stmts?)
(if (exec-cpp?)
(report-error "error: #error ~A" (cdr stmt))))
((pragma) ;; std: implementation-defined if expanded
#t)
;;((pragma) #t) need to work
(else
(error "bad cpp flow stmt")))
(case (car stmt)
((pragma) (cons 'cpp-pragma (cdr stmt)))
(else (cons 'cpp-stmt stmt))))
(define (eval-cpp-flow-1 stmt)
(case mode
((file)
(case (car ppxs)
((keep) #t)
(else #t)))
((code)
#t))
(case (car stmt)
;; control flow
((if) ;; covers (if ifdef ifndef)
(cond
((exec-cpp-stmts?)
((exec-cpp?)
(let ((val (eval-cpp-cond-text (cadr stmt))))
(simple-format #t "if ~S=> ~S\n" (cadr stmt) val)
;;(simple-format #t "if ~S=> ~S\n" (cadr stmt) val)
(cond
((not val) (p-err "unresolved: ~S" (cadr stmt)))
((zero? val) (set! ppxs (cons* 'skip1-pop 'skip-look ppxs)))
@ -367,7 +365,7 @@
(else (cpi-push))))
((elif)
(cond
((exec-cpp-stmts?)
((exec-cpp?)
(let ((val (eval-cpp-cond-text (cadr stmt))))
(cond
((not val)
@ -382,7 +380,7 @@
(else (cpi-shift))))
((else)
(cond
((exec-cpp-stmts?)
((exec-cpp?)
(cond
((eq? 'skip-look (car ppxs))
(set! ppxs (cons* 'skip1-pop 'keep (cdr ppxs))))
@ -391,30 +389,74 @@
(else (cpi-shift))))
((endif)
(cond
((exec-cpp-stmts?)
((exec-cpp?)
(set! ppxs (cons 'skip1-pop (cdr ppxs))))
(else (cpi-pop))))
(else
(error "unhandled cpp stmt")))
(error "bad cpp flow stmt")))
(case (car stmt)
((pragma) (cons 'cpp-pragma (cdr stmt)))
(else (cons 'cpp-stmt stmt))))
(define (eval-cpp-line line)
;;(simple-format #t "eval-cpp-line: ~S\n" line)
(define (eval-cpp-stmt-1/code stmt)
(case (car stmt)
;; actions
((include)
(let* ((file (inc-stmt->file stmt))
(path (inc-file->path file)))
(if (not path) (p-err "not found: ~S" file))
(push-input (open-input-file path))))
((define) (add-define stmt))
((undef) (rem-define (cadr stmt)))
((error) (report-error "error: #error ~A" (cdr stmt)))
((pragma) #t) ;; ignore for now
;; control flow: states are {skip-look, keep, skip-done}
((if) ;; and ifdef ifndef
(let ((val (eval-cpp-cond-text (cadr stmt))))
;;(simple-format #t "if ~S=> ~S\n" (cadr stmt) val)
(if (not val) (p-err "unresolved: ~S" (cadr stmt)))
(if (eq? 'keep (car ppxs))
(if (zero? val)
(set! ppxs (cons 'skip-look ppxs))
;; keep if keeping, skip if skipping, ??? if skip-look
(set! ppxs (cons (car ppxs) ppxs)))
(set! ppxs (cons 'skip-done ppxs)))))
((elif)
(let ((val (eval-cpp-cond-text (cadr stmt))))
;;(simple-format #t "elif ~S=> ~S\n" (cadr stmt) val)
(if (not val) (p-err "unresolved: ~S" (cadr stmt)))
(if (eq? 'keep (car ppxs))
(if (zero? val)
(set! ppxs (cons 'skip-look ppxs))
;; keep if keeping, skip if skipping, ??? if skip-look
(set! ppxs (cons* (car ppxs) ppxs)))
(set! ppxs (cons 'skip-done ppxs)))))
((else)
;;(simple-format #t "else\n")
(if (eqv? 'skip-look (car ppxs))
(set! ppxs (cons 'keep (cdr ppxs)))))
((endif)
(set! ppxs (cdr ppxs)))
(else
(error "bad cpp flow stmt"))))
(define (eval-cpp-stmt/code stmt)
;;(simple-format #t "eval-cpp-stmt: ~S\n" stmt)
(with-throw-handler
'cpp-error
(lambda () (eval-cpp-stmt (read-cpp-stmt line)))
(lambda () (eval-cpp-stmt-1/code stmt))
(lambda (key fmt . rest)
(display "body.399\n")
(report-error fmt rest)
(throw 'c99-error "CPP error"))))
(define (eval-cpp-stmt/file stmt)
(throw 'c99-error "not implemented"))
;; Composition of @code{read-cpp-line} and @code{eval-cpp-line}.
;; We should not be doing this!
(define (read-cpp ch)
(and=> (read-cpp-line ch) eval-cpp-line))
(define (read-cpp-stmt ch)
(and=> (read-cpp-line ch) cpp-line->stmt))
(define (read-token)
(let iter ((ch (read-char)))
@ -424,20 +466,27 @@
((eq? ch #\newline) (set! bol #t) (iter (read-char)))
((char-set-contains? c:ws ch) (iter (read-char)))
(bol
(cond
((read-comm ch bol) => assc-$)
((read-cpp ch) =>
(lambda (res) ;; if '() stmt expanded so re-read
;;(simple-format #t "read-cpp => ~S\n" res)
(if (pair? res) (assc-$ res) (iter (read-char)))))
(else (set! bol #f) (iter ch))))
(set! bol #f)
(cond ;; things that depend on bol only
((read-comm ch #t) => assc-$)
((read-cpp-stmt ch) =>
(lambda (stmt)
;;(simple-format #t "read-cpp-stmt => ~S\n" stmt)
(case mode
((code) ;; but what about #pragma - ignore for now
(eval-cpp-stmt/code stmt)
(iter (read-char)))
((file)
(eval-cpp-stmt/file stmt)
(assc-$ stmt)))))
(else (iter ch))))
((read-ident ch) =>
(lambda (name)
;;(simple-format #t "read-ident=>~S\n" name)
(let ((symb (string->symbol name)))
(cond
((and (x-def? name mode)
(expand-cpp-mref name (cpi-defs info)))
(expand-cpp-macro-ref name (cpi-defs info)))
=> (lambda (st)
;;(simple-format #t "body: st=~S\n" st)
(push-input (open-input-string st))
@ -451,7 +500,7 @@
((read-c-num ch) => assc-$)
((read-c-string ch) => assc-$)
((read-c-chlit ch) => assc-$)
((read-comm ch bol) => assc-$)
((read-comm ch #f) => assc-$)
((read-chseq ch) => identity)
((assq-ref chrtab ch) => (lambda (t) (cons t (string ch))))
((eqv? ch #\\) ;; C allows \ at end of line to continue
@ -462,6 +511,7 @@
;; Loop between reading tokens and skipping tokens via CPP logic.
(let iter ((pair (read-token)))
;;(simple-format #t "iter ~S\n" (car ppxs)) (sleep 1)
(case (car ppxs)
((keep)
;;(simple-format #t "lx=>~S\n" pair)
@ -472,5 +522,5 @@
(set! ppxs (cdr ppxs))
(iter (read-token)))))
)))))
;; --- last line ---

View file

@ -19,11 +19,11 @@
(define-module (nyacc lang c99 cpp)
#:export (parse-cpp-stmt
read-cpp-stmt
cpp-line->stmt
parse-cpp-expr
eval-cpp-expr
cpp-expand-text
expand-cpp-mref
expand-cpp-macro-ref
)
#:use-module (nyacc parse)
#:use-module (nyacc lex)
@ -37,16 +37,6 @@
(use-modules (ice-9 syncase)))
(mes))
#|
#define #undef #include #if #ifdef #ifndef #else #endif #elif
#line defined #-operator ##-operator #pragma #error
todo:
pragma
#-op ##-op
provide dict of #defines
provide util to expand defines
|#
;; @deffn read-ellipsis ch
;; read ellipsis
(define (read-ellipsis ch)
@ -55,8 +45,42 @@ todo:
((char=? ch #\.) (read-char) (read-char) "...") ; assumes correct syntax
(else #f)))
;; @deffn cpp-define => #f|???
;; @deffn cpp-define => (define (name "ADD") (args "X" "Y") (repl "X+Y"))
(define (cpp-define)
(define (p-args la) ;; parse args
(if (eq? la #\()
(let iter ((args '()) (la (skip-il-ws (read-char))))
(cond
((eq? la #\)) (reverse args))
((read-c-ident la) =>
(lambda (arg)
(iter (cons arg args) (skip-il-ws (read-char)))))
((read-ellipsis la) =>
(lambda (arg)
(iter (cons arg args) (skip-il-ws (read-char)))))
((eq? la #\,)
(iter args (skip-il-ws (read-char))))))
(begin (if (char? la) (unread-char la)) #f))) ;; CLEANUP
(define (p-rest la) ;; parse rest
(cond ((eof-object? la) "")
(else
(if (not (char=? #\=)) (unread-char ch)) ; handle ABC=DEF
(drain-input (current-input-port)))))
(let* ((name (read-c-ident (skip-il-ws (read-char))))
(args (or (p-args (read-char)) '()))
(repl (p-rest (skip-il-ws (read-char)))))
(if (pair? args)
`(define (name ,name) (args ,args) (repl ,repl))
`(define (name ,name) (repl ,repl)))))
;; where @code{...} is
;; @code{(name "ABC") (repl "123")} or
;; @code{(name "ABC") (args "X" "Y") (repl "X+Y")}
(define (x-cpp-define)
;; The (weak?) parse architecture is "unread la argument if no match"
(letrec
((p-cppd ;; parse all
@ -64,7 +88,7 @@ todo:
(let* ((iden (read-c-ident (skip-il-ws (read-char))))
;; "define ABC(ARG)" not the same as "define ABC (ARG)"
(args (or (p-args (read-char)) '()))
(rest (or (p-rest (skip-il-ws (read-char))) " ")))
(rest (p-rest (skip-il-ws (read-char)))))
(if (pair? args)
`(define (name ,iden) ,(cons 'args args) (repl ,rest))
`(define (name ,iden) (repl ,rest))))))
@ -85,8 +109,10 @@ todo:
(begin (if (char? la) (unread-char la)) #f)))) ;; CLEANUP
(p-rest ;; parse rest
(lambda (la)
(cond ((char? la) (unread-char la) (drain-input (current-input-port)))
(else #f)))))
(cond ((eof-object? la) "")
(else
(if (not (char=? #\=)) (unread-char ch)) ; handle ABC=DEF
(drain-input (current-input-port)))))))
(p-cppd)))
;; @deffn cpp-include
@ -99,7 +125,7 @@ todo:
(iter (cons ch cl) (read-char))))))
`(include ,path)))
;; @deffn read-cpp-stmt line defs => (stmt-type text)
;; @deffn cpp-line->stmt line defs => (stmt-type text)
;; Parse a line from a CPP statement and return a parse tree.
;; @example
;; (parse-cpp-stmt "define X 123") => (define "X" "123")
@ -108,7 +134,7 @@ todo:
;; @end example
;; To evaluate the @code{if} statements use @code{parse-cpp-expr} and
;; @code{eval-cpp-expr}.
(define (read-cpp-stmt line)
(define (cpp-line->stmt line)
(define (rd-ident) (read-c-ident (skip-il-ws (read-char))))
(define (rd-num) (and=> (read-c-num (skip-il-ws (read-char))) cdr))
(define (rd-rest) (let ((ch (skip-il-ws (read-char))))
@ -137,6 +163,340 @@ todo:
(cons 'mtab mtab) (cons 'act-v act-v))))
;; Provide gen-cpp-lexer parse-cpp-expr eval-cpp-expr:
(include-from-path "nyacc/lang/c99/cppbody.scm")
;;(include-from-path "nyacc/lang/c99/cppbody.scm")
;; --- last line ---
;;; nyacc/lang/c99/cppbody.scm
;;;
;;; Copyright (C) 2016-2017 Matthew R. Wette
;;;
;;; This program is free software: you can redistribute it and/or modify
;;; it under the terms of the GNU General Public License as published by
;;; the Free Software Foundation, either version 3 of the License, or
;;; (at your option) any later version.
;;;
;;; This program is distributed in the hope that it will be useful,
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;;; GNU General Public License for more details.
;;;
;;; You should have received a copy of the GNU General Public License
;;; along with this program. If not, see <http://www.gnu.org/licenses/>.
(use-modules (ice-9 match))
(define c99-std-defs
'("__DATE__" "__FILE__" "__LINE__" "__STDC__" "__STDC_HOSTED__"
"__STDC_VERSION__" "__TIME__"))
(define (c99-std-def? str)
(let iter ((defs c99-std-defs))
(cond
((null? defs) #f)
((string=? (car defs) str) #t)
(else (iter (cdr defs))))))
(define (c99-std-val str)
(cond
((string=? str "__DATE__") "M01 01 2001")
((string=? str "__FILE__") "(unknown)")
((string=? str "__LINE__") 0)
((string=? str "__STDC__") 1)
((string=? str "__STDC_HOSTED__") 0)
((string=? "__STDC_VERSION__") 201701)
((string=? "__TIME__") "00:00:00")
(else #f)))
(define (cpp-err fmt . args)
(apply throw 'cpp-error fmt args))
;;.@deffn skip-il-ws ch
;; Skip in-line whitespace
(define skip-il-ws
(let ((il-ws (list->char-set '(#\space #\tab))))
(lambda (ch)
(cond
((eof-object? ch) ch)
((char-set-contains? il-ws ch) (skip-il-ws (read-char)))
(else ch)))))
;; Since we want to be able to get CPP statements with comment in tact
;; (e.g., for passing to @code{pretty-print-c99}) we need to remove
;; comments when parsing CPP expressions. We convert a comm-reader
;; into a comm-skipper here. And from that generate a lexer generator.
(define cpp-comm-skipper
(let ((reader (make-comm-reader '(("/*" . "*/")))))
(lambda (ch)
(reader ch #f))))
;; generate a lexical analyzer per string
(define gen-cpp-lexer
(make-lexer-generator mtab #:comm-skipper cpp-comm-skipper))
;; @deffn parse-cpp-expr text => tree
;; Given a string returns a cpp parse tree. This is called by
;; @code{eval-cpp-expr}. The text will have had all CPP defined symbols
;; expanded already so no identifiers should appear in the text.
;; A @code{cpp-error} will be thrown if a parse error occurs.
(define (parse-cpp-expr text)
(with-throw-handler
'nyacc-error
(lambda ()
(with-input-from-string text
(lambda () (raw-parser (gen-cpp-lexer)))))
(lambda (key fmt . args)
(apply throw 'cpp-error fmt args))))
;; @deffn eval-cpp-expr tree dict => datum
;; Evaluate a tree produced from @code{parse-cpp-expr}.
;; The tree passed to this routine is
(define (eval-cpp-expr tree dict)
(letrec
((tx (lambda (tr ix) (list-ref tr ix)))
(tx1 (lambda (tr) (tx tr 1)))
(ev (lambda (ex ix) (eval-expr (list-ref ex ix))))
(ev1 (lambda (ex) (ev ex 1))) ; eval expr in arg 1
(ev2 (lambda (ex) (ev ex 2))) ; eval expr in arg 2
(ev3 (lambda (ex) (ev ex 3))) ; eval expr in arg 3
(eval-expr
(lambda (tree)
(case (car tree)
((fixed) (string->number (tx1 tree)))
((char) (char->integer (tx1 tree)))
((defined) (if (assoc-ref dict (tx1 tree)) 1 0))
((pre-inc post-inc) (1+ (ev1 tree)))
((pre-dec post-dec) (1- (ev1 tree)))
((pos) (ev1 tree))
((neg) (- (ev1 tree)))
((bw-not) (bitwise-not (ev1 tree)))
((not) (if (zero? (ev1 tree)) 1 0))
((mul) (* (ev1 tree) (ev2 tree)))
((div) (/ (ev1 tree) (ev2 tree)))
((mod) (modulo (ev1 tree) (ev2 tree)))
((add) (+ (ev1 tree) (ev2 tree)))
((sub) (- (ev1 tree) (ev2 tree)))
((lshift) (bitwise-arithmetic-shift-left (ev1 tree) (ev2 tree)))
((rshift) (bitwise-arithmetic-shift-right (ev1 tree) (ev2 tree)))
((lt) (if (< (ev1 tree) (ev2 tree)) 1 0))
((le) (if (<= (ev1 tree) (ev2 tree)) 1 0))
((gt) (if (> (ev1 tree) (ev2 tree)) 1 0))
((ge) (if (>= (ev1 tree) (ev2 tree)) 1 0))
((equal) (if (= (ev1 tree) (ev2 tree)) 1 0))
((noteq) (if (= (ev1 tree) (ev2 tree)) 0 1))
((bw-or) (bitwise-ior (ev1 tree) (ev2 tree)))
((bw-xor) (bitwise-xor (ev1 tree) (ev2 tree)))
((bw-and) (bitwise-and (ev1 tree) (ev2 tree)))
((or) (if (and (zero? (ev1 tree)) (zero? (ev2 tree))) 0 1))
((and) (if (or (zero? (ev1 tree)) (zero? (ev2 tree))) 0 1))
((cond-expr) (if (zero? (ev1 tree)) (ev3 tree) (ev2 tree)))
((ident) (cpp-err "undefined identifier: ~S" (cadr tree)))
(else (error "incomplete implementation"))))))
(eval-expr tree)))
;; Note: scan-cpp-input scans replacement text. When identifiers are found
;; they are tested for expansion as follows:
;; @enumerate
;; @item If already expanded, then ignore.
;; @item If followed by @code{(}, then use @code{collect-args} to get the
;; arguments and ...
;; @item Otherwise insert the replacement text and continue scanning (at
;; first character of new replacement text.
;; @end enumerate
;; @deffn rtokl->string tokl => string
;; Convert reverse token-list to string.
(define (rtokl->string tokl)
;; need to cover: comm ident string arg
;;(let iter ((stl '()) (chl '()) (nxt #f) (tkl tokl)) ;; more efficient
(let iter ((stl '()) (tkl tokl))
(match tkl
('()
(apply string-append stl))
((('arg . arg) 'dhash (key . val) . rest)
(iter (cons (string-append val arg) stl) (list-tail tkl 3)))
(((key . val) 'dhash ('arg . arg) . rest)
(iter (cons (string-append arg val) stl) (list-tail tkl 3)))
((('arg . arg) 'hash . rest)
(iter (cons (string-append "\"" arg "\"") stl) (list-tail tkl 2)))
((('comm . val) . rest)
(iter (cons (string-append "/*" val " */") stl) (cdr tkl)))
((('ident . rval) ('ident . lval) . rest)
(iter (cons* " " rval stl) (cdr tkl)))
(((key . val) . rest)
(iter (cons val stl) (cdr tkl)))
(('space . rest)
(iter (cons " " stl) rest))
(((? char? ch) . rest)
(iter (cons (string ch) stl) rest))
(otherwise
(error "no match" tkl)))))
;; @deffn scan-cpp-input argd used dict end-tok => string
;; Process replacement text from the input port and generate a (reversed)
;; token-list. If end-tok, stop at, and push back, @code{,} or @code{)}.
;; If end-tok is @code{,} then read until @code{,} or @code{(}.
;; The argument @var{argd} is a dictionary (argument name, argument
;; value) pairs which will be expanded as needed. This routine is called
;; by collect-args, expand-cpp-repl and cpp-expand-text.
(define (scan-cpp-input argd dict used end-tok)
;; Works like this: scan for tokens (comments, parens, strings, char's, etc).
;; Tokens are collected in a (reverse ordered) list (tkl) and merged together
;; to a string on return using @code{rtokl->string}.
;; Turn reverse chl into a string and insert it into the string list stl.
(define (add-chl chl stl)
(if (null? chl) stl (cons (list->string (reverse chl)) stl)))
(define conjoin string-append)
;; We just scanned "defined", now need to scan the arg to inhibit expansion.
;; For example, we have scanned "defined"; we now scan "(FOO)" or "FOO", and
;; return "defined(FOO)". We use ec (end-char) as terminal char:
;; #\) if starts with #( or #\nul if other.
(define (scan-defined-arg)
(let* ((ch (skip-il-ws (read-char)))
(ec (if (char=? ch #\() #\) #\null)))
(let iter ((chl '(#\()) (ec ec) (ch (if (char=? ec #\)) (read-char) ch)))
(cond
((eof-object? ch)
(if (char=? ec #\null)
(string-append "defined" (list->string (reverse (cons #\) chl))))
(cpp-err "illegal argument to `defined'")))
((char-set-contains? c:ir ch)
(iter (cons ch chl) ec (read-char)))
((char=? ec #\))
(if (char=? #\) (skip-il-ws ch))
(string-append "defined" (list->string (reverse (cons #\) chl))))
(cpp-err "garbage in argument to `defined'")))
((char=? ec #\null) ;; past identifier
(string-append "defined" (list->string (reverse (cons #\) chl)))))
(else
(cpp-err "illegal argument to `defined'"))))))
(let iter ((tkl '()) ; token list (as list of strings)
(lvl 0) ; level
(ch (read-char))) ; next character
(cond
;; have item to add, but first add in char's
;;(nxt (iter (cons nxt (add-chl chl tkl)) '() #f lvl ch))
;; If end of string or see end-ch at level 0, then return.
((eof-object? ch) (rtokl->string tkl))
((and (eqv? end-tok ch) (zero? lvl))
(unread-char ch) (rtokl->string tkl))
((and end-tok (char=? #\) ch) (zero? lvl))
(unread-char ch) (rtokl->string tkl))
((read-c-comm ch #f) =>
(lambda (cp) (iter (acons `comm (cdr cp) tkl) lvl (read-char))))
((char-set-contains? c:ws ch)
(if (and (pair? tkl) (char? (car tkl)))
(iter (cons 'space tkl) lvl (read-char))
(iter tkl lvl (read-char))))
((char=? #\( ch) (iter (cons ch tkl) (1+ lvl) (read-char)))
((char=? #\) ch) (iter (cons ch tkl) (1- lvl) (read-char)))
((char=? #\# ch)
(let ((ch (read-char)))
(if (eqv? ch #\#)
(iter (cons 'dhash tkl) lvl (read-char))
(iter (cons 'hash tkl) lvl ch))))
((read-c-string ch) =>
(lambda (st) (iter (acons 'string st tkl) lvl (read-char))))
((read-c-ident ch) =>
(lambda (iden)
(if (equal? iden "defined")
;; "defined" is a special case
(let ((arg (scan-defined-arg)))
(iter (acons 'defined arg tkl) lvl (read-char)))
;; otherwise ...
(let* ((aval (assoc-ref argd iden)) ; lookup argument
(rval (assoc-ref dict iden))) ; lookup macro def
(cond
((member iden used) ; name used
(iter (cons iden tkl) lvl (read-char)))
(aval ; arg ref
(iter (acons 'arg aval tkl) lvl (read-char)))
((string? rval) ; cpp repl
(iter (acons 'string rval tkl) lvl (read-char)))
((pair? rval) ; cpp macro
(let* ((argl (car rval)) (text (cdr rval))
(argd (collect-args argl argd dict used))
(newl (expand-cpp-repl text argd dict (cons iden used))))
(iter (acons 'string newl tkl) lvl (read-char))))
(else ; normal identifier
(iter (acons 'ident iden tkl) lvl (read-char))))))))
(else
(iter (cons ch tkl) lvl (read-char))))))
;; @deffn collect-args argl argd dict used => argd
;; to be documented
;; I think argd is a passthrough for scan-cpp-input
;; argl: list of formal arguments in #define
;; argd: used? (maybe just a pass-through for scan-cpp-input
;; dict: dict of macro defs
;; used: list of already expanded macros
;; TODO clean this up
;; should be looking at #\( and eat up to matching #\)
(define (collect-args argl argd dict used)
(let iter ((argl argl) (argv '()) (ch (skip-il-ws (read-char))))
;; ch should always be #\(, #\, or #\)
(cond
((eqv? ch #\)) (reverse argv))
((null? argl) (cpp-err "arg count"))
((and (null? (cdr argl)) (string=? (car argl) "..."))
(let ((val (scan-cpp-input argd dict used #\))))
(iter (cdr argl) (acons "__VA_ARGS__" val argv) (read-char))))
((or (eqv? ch #\() (eqv? ch #\,))
(let ((val (scan-cpp-input argd dict used #\,)))
(iter (cdr argl) (acons (car argl) val argv) (read-char))))
(else (error "coding error, ch=" ch)))))
;; @deffn expand-cpp-repl
;; to be documented
(define (expand-cpp-repl repl argd dict used)
(with-input-from-string repl
(lambda () (scan-cpp-input argd dict used #f))))
;; @deffn cpp-expand-text text dict => string
(define (cpp-expand-text text dict)
(with-input-from-string text
(lambda () (scan-cpp-input '() dict '() #f))))
;; @deffn expand-cpp-macro-ref ident dict => repl|#f
;; Given an identifier seen in C99 input, this checks for associated
;; definition in @var{dict} (generated from CPP defines). If found,
;; the expansion is returned as a string. If @var{ident} refers
;; to a macro with arguments, then the arguments will be read from the
;; current input. The format of the @code{dict} entries are
;; @example
;; ("ABC" . "123")
;; ("MAX" ("X" "Y") . "((X)>(Y)?(X):(Y))")
;; @end example
(define (expand-cpp-macro-ref ident dict . rest)
(let ((used (if (pair? rest) (car rest) '()))
(rval (assoc-ref dict ident)))
(cond
((not rval) #f)
((string=? rval "C99_ANY") #f) ; don't expand: could be anything
((member ident used) ident)
((string? rval)
(let ((expd (expand-cpp-repl rval '() dict (cons ident used))))
expd))
((pair? rval)
(let* ((argl (car rval)) (repl (cdr rval))
(argd (collect-args argl '() dict '()))
(expd (expand-cpp-repl repl argd dict (cons ident used))))
expd)))))
;;; --- last line ---

View file

@ -1,329 +0,0 @@
;;; nyacc/lang/c99/cppbody.scm
;;;
;;; Copyright (C) 2016-2017 Matthew R. Wette
;;;
;;; This program is free software: you can redistribute it and/or modify
;;; it under the terms of the GNU General Public License as published by
;;; the Free Software Foundation, either version 3 of the License, or
;;; (at your option) any later version.
;;;
;;; This program is distributed in the hope that it will be useful,
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;;; GNU General Public License for more details.
;;;
;;; You should have received a copy of the GNU General Public License
;;; along with this program. If not, see <http://www.gnu.org/licenses/>.
(use-modules (ice-9 match))
(define c99-std-defs
'("__DATE__" "__FILE__" "__LINE__" "__STDC__" "__STDC_HOSTED__"
"__STDC_VERSION__" "__TIME__"))
(define (c99-std-def? str)
(let iter ((defs c99-std-defs))
(cond
((null? defs) #f)
((string=? (car defs) str) #t)
(else (iter (cdr defs))))))
(define (c99-std-val str)
(cond
((string=? str "__DATE__") "M01 01 2001")
((string=? str "__FILE__") "(unknown)")
((string=? str "__LINE__") 0)
((string=? str "__STDC__") 1)
((string=? str "__STDC_HOSTED__") 0)
((string=? "__STDC_VERSION__") 201701)
((string=? "__TIME__") "00:00:00")
(else #f)))
(define (cpp-err fmt . args)
(apply throw 'cpp-error fmt args))
;;.@deffn skip-il-ws ch
;; Skip in-line whitespace
(define skip-il-ws
(let ((il-ws (list->char-set '(#\space #\tab))))
(lambda (ch)
(cond
((eof-object? ch) ch)
((char-set-contains? il-ws ch) (skip-il-ws (read-char)))
(else ch)))))
;; Since we want to be able to get CPP statements with comment in tact
;; (e.g., for passing to @code{pretty-print-c99}) we need to remove
;; comments when parsing CPP expressions. We convert a comm-reader
;; into a comm-skipper here. And from that generate a lexer generator.
(define cpp-comm-skipper
(let ((reader (make-comm-reader '(("/*" . "*/")))))
(lambda (ch)
(reader ch #f))))
;; generate a lexical analyzer per string
(define gen-cpp-lexer
(make-lexer-generator mtab #:comm-skipper cpp-comm-skipper))
;; @deffn parse-cpp-expr text => tree
;; Given a string returns a cpp parse tree. This is called by
;; @code{eval-cpp-expr}. The text will have had all CPP defined symbols
;; expanded already so no identifiers should appear in the text.
;; A @code{cpp-error} will be thrown if a parse error occurs.
(define (parse-cpp-expr text)
(with-throw-handler
'nyacc-error
(lambda ()
(with-input-from-string text
(lambda () (raw-parser (gen-cpp-lexer)))))
(lambda (key fmt . args)
(apply throw 'cpp-error fmt args))))
;; @deffn eval-cpp-expr tree dict => datum
;; Evaluate a tree produced from @code{parse-cpp-expr}.
;; The tree passed to this routine is
(define (eval-cpp-expr tree dict)
(letrec
((tx (lambda (tr ix) (list-ref tr ix)))
(tx1 (lambda (tr) (tx tr 1)))
(ev (lambda (ex ix) (eval-expr (list-ref ex ix))))
(ev1 (lambda (ex) (ev ex 1))) ; eval expr in arg 1
(ev2 (lambda (ex) (ev ex 2))) ; eval expr in arg 2
(ev3 (lambda (ex) (ev ex 3))) ; eval expr in arg 3
(eval-expr
(lambda (tree)
(case (car tree)
((fixed) (string->number (tx1 tree)))
((char) (char->integer (tx1 tree)))
((defined) (if (assoc-ref dict (tx1 tree)) 1 0))
((pre-inc post-inc) (1+ (ev1 tree)))
((pre-dec post-dec) (1- (ev1 tree)))
((pos) (ev1 tree))
((neg) (- (ev1 tree)))
((bw-not) (bitwise-not (ev1 tree)))
((not) (if (zero? (ev1 tree)) 1 0))
((mul) (* (ev1 tree) (ev2 tree)))
((div) (/ (ev1 tree) (ev2 tree)))
((mod) (modulo (ev1 tree) (ev2 tree)))
((add) (+ (ev1 tree) (ev2 tree)))
((sub) (- (ev1 tree) (ev2 tree)))
((lshift) (bitwise-arithmetic-shift-left (ev1 tree) (ev2 tree)))
((rshift) (bitwise-arithmetic-shift-right (ev1 tree) (ev2 tree)))
((lt) (if (< (ev1 tree) (ev2 tree)) 1 0))
((le) (if (<= (ev1 tree) (ev2 tree)) 1 0))
((gt) (if (> (ev1 tree) (ev2 tree)) 1 0))
((ge) (if (>= (ev1 tree) (ev2 tree)) 1 0))
((equal) (if (= (ev1 tree) (ev2 tree)) 1 0))
((noteq) (if (= (ev1 tree) (ev2 tree)) 0 1))
((bw-or) (bitwise-ior (ev1 tree) (ev2 tree)))
((bw-xor) (bitwise-xor (ev1 tree) (ev2 tree)))
((bw-and) (bitwise-and (ev1 tree) (ev2 tree)))
((or) (if (and (zero? (ev1 tree)) (zero? (ev2 tree))) 0 1))
((and) (if (or (zero? (ev1 tree)) (zero? (ev2 tree))) 0 1))
((cond-expr) (if (zero? (ev1 tree)) (ev3 tree) (ev2 tree)))
((ident) (cpp-err "undefined identifier: ~S" (cadr tree)))
(else (error "incomplete implementation"))))))
(eval-expr tree)))
;; Note: scan-cpp-input scans replacement text. When identifiers are found
;; they are tested for expansion as follows:
;; @enumerate
;; @item If already expanded, then ignore.
;; @item If followed by @code{(}, then use @code{collect-args} to get the
;; arguments and ...
;; @item Otherwise insert the replacement text and continue scanning (at
;; first character of new replacement text.
;; @end enumerate
;; @deffn rtokl->string tokl => string
;; Convert reverse token-list to string.
(define (rtokl->string tokl)
;; need to cover: comm ident string arg
;;(let iter ((stl '()) (chl '()) (nxt #f) (tkl tokl)) ;; more efficient
(let iter ((stl '()) (tkl tokl))
(match tkl
('()
(apply string-append stl))
((('arg . arg) 'dhash (key . val) . rest)
(iter (cons (string-append val arg) stl) (list-tail tkl 3)))
(((key . val) 'dhash ('arg . arg) . rest)
(iter (cons (string-append arg val) stl) (list-tail tkl 3)))
((('arg . arg) 'hash . rest)
(iter (cons (string-append "\"" arg "\"") stl) (list-tail tkl 2)))
((('comm . val) . rest)
(iter (cons (string-append "/*" val " */") stl) (cdr tkl)))
((('ident . rval) ('ident . lval) . rest)
(iter (cons* " " rval stl) (cdr tkl)))
(((key . val) . rest)
(iter (cons val stl) (cdr tkl)))
(('space . rest)
(iter (cons " " stl) rest))
(((? char? ch) . rest)
(iter (cons (string ch) stl) rest))
(otherwise
(error "no match" tkl)))))
;; @deffn scan-cpp-input argd used dict end-tok => string
;; Process replacement text from the input port and generate a (reversed)
;; token-list. If end-tok, stop at, and push back, @code{,} or @code{)}.
;; If end-tok is @code{,} then read until @code{,} or @code{(}.
;; The argument @var{argd} is a dictionary (argument name, argument
;; value) pairs which will be expanded as needed. This routine is called
;; by collect-args, expand-cpp-repl and cpp-expand-text.
(define (scan-cpp-input argd dict used end-tok)
;; Works like this: scan for tokens (comments, parens, strings, char's, etc).
;; Tokens are collected in a (reverse ordered) list (tkl) and merged together
;; to a string on return using @code{rtokl->string}.
;; Turn reverse chl into a string and insert it into the string list stl.
(define (add-chl chl stl)
(if (null? chl) stl (cons (list->string (reverse chl)) stl)))
(define conjoin string-append)
;; We just scanned "defined", now need to scan the arg to inhibit expansion.
;; For example, we have scanned "defined"; we now scan "(FOO)" or "FOO", and
;; return "defined(FOO)". We use ec (end-char) as terminal char:
;; #\) if starts with #( or #\nul if other.
(define (scan-defined-arg)
(let* ((ch (skip-il-ws (read-char)))
(ec (if (char=? ch #\() #\) #\null)))
(let iter ((chl '(#\()) (ec ec) (ch (if (char=? ec #\)) (read-char) ch)))
(cond
((eof-object? ch)
(if (char=? ec #\null)
(string-append "defined" (list->string (reverse (cons #\) chl))))
(cpp-err "illegal argument to `defined'")))
((char-set-contains? c:ir ch)
(iter (cons ch chl) ec (read-char)))
((char=? ec #\))
(if (char=? #\) (skip-il-ws ch))
(string-append "defined" (list->string (reverse (cons #\) chl))))
(cpp-err "garbage in argument to `defined'")))
((char=? ec #\null) ;; past identifier
(string-append "defined" (list->string (reverse (cons #\) chl)))))
(else
(cpp-err "illegal argument to `defined'"))))))
(let iter ((tkl '()) ; token list (as list of strings)
(lvl 0) ; level
(ch (read-char))) ; next character
(cond
;; have item to add, but first add in char's
;;(nxt (iter (cons nxt (add-chl chl tkl)) '() #f lvl ch))
;; If end of string or see end-ch at level 0, then return.
((eof-object? ch) (rtokl->string tkl))
((and (eqv? end-tok ch) (zero? lvl))
(unread-char ch) (rtokl->string tkl))
((and end-tok (char=? #\) ch) (zero? lvl))
(unread-char ch) (rtokl->string tkl))
((read-c-comm ch #f) =>
(lambda (cp) (iter (acons `comm (cdr cp) tkl) lvl (read-char))))
((char-set-contains? c:ws ch)
(if (and (pair? tkl) (char? (car tkl)))
(iter (cons 'space tkl) lvl (read-char))
(iter tkl lvl (read-char))))
((char=? #\( ch) (iter (cons ch tkl) (1+ lvl) (read-char)))
((char=? #\) ch) (iter (cons ch tkl) (1- lvl) (read-char)))
((char=? #\# ch)
(let ((ch (read-char)))
(if (eqv? ch #\#)
(iter (cons 'dhash tkl) lvl (read-char))
(iter (cons 'hash tkl) lvl ch))))
((read-c-string ch) =>
(lambda (st) (iter (acons 'string st tkl) lvl (read-char))))
((read-c-ident ch) =>
(lambda (iden)
(if (equal? iden "defined")
;; "defined" is a special case
(let ((arg (scan-defined-arg)))
(iter (acons 'defined arg tkl) lvl (read-char)))
;; otherwise ...
(let* ((aval (assoc-ref argd iden)) ; lookup argument
(rval (assoc-ref dict iden))) ; lookup macro def
(cond
((member iden used) ; name used
(iter (cons iden tkl) lvl (read-char)))
(aval ; arg ref
(iter (acons 'arg aval tkl) lvl (read-char)))
((string? rval) ; cpp repl
(iter (acons 'string rval tkl) lvl (read-char)))
((pair? rval) ; cpp macro
(let* ((argl (car rval)) (text (cdr rval))
(argd (collect-args argl argd dict used))
(newl (expand-cpp-repl text argd dict (cons iden used))))
(iter (acons 'string newl tkl) lvl (read-char))))
(else ; normal identifier
(iter (acons 'ident iden tkl) lvl (read-char))))))))
(else
(iter (cons ch tkl) lvl (read-char))))))
;; @deffn collect-args argl argd dict used => argd
;; to be documented
;; I think argd is a passthrough for scan-cpp-input
;; argl: list of formal arguments in #define
;; argd: used? (maybe just a pass-through for scan-cpp-input
;; dict: dict of macro defs
;; used: list of already expanded macros
;; TODO clean this up
;; should be looking at #\( and eat up to matching #\)
(define (collect-args argl argd dict used)
(let iter ((argl argl) (argv '()) (ch (skip-il-ws (read-char))))
;; ch should always be #\(, #\, or #\)
(cond
((eqv? ch #\)) (reverse argv))
((null? argl) (cpp-err "arg count"))
((and (null? (cdr argl)) (string=? (car argl) "..."))
(let ((val (scan-cpp-input argd dict used #\))))
(iter (cdr argl) (acons "__VA_ARGS__" val argv) (read-char))))
((or (eqv? ch #\() (eqv? ch #\,))
(let ((val (scan-cpp-input argd dict used #\,)))
(iter (cdr argl) (acons (car argl) val argv) (read-char))))
(else (error "coding error, ch=" ch)))))
;; @deffn expand-cpp-repl
;; to be documented
(define (expand-cpp-repl repl argd dict used)
(with-input-from-string repl
(lambda () (scan-cpp-input argd dict used #f))))
;; @deffn cpp-expand-text text dict => string
(define (cpp-expand-text text dict)
(with-input-from-string text
(lambda () (scan-cpp-input '() dict '() #f))))
;; @deffn expand-cpp-mref ident dict => repl|#f
;; Given an identifier seen in C99 input, this checks for associated
;; definition in @var{dict} (generated from CPP defines). If found,
;; the expansion is returned as a string. If @var{ident} refers
;; to a macro with arguments, then the arguments will be read from the
;; current input.
(define (expand-cpp-mref ident dict . rest)
(let ((used (if (pair? rest) (car rest) '()))
(rval (assoc-ref dict ident)))
(cond
((not rval) #f)
((member ident used) ident)
((string? rval)
(let ((expd (expand-cpp-repl rval '() dict (cons ident used))))
expd))
((pair? rval)
(let* ((argl (car rval)) (repl (cdr rval))
(argd (collect-args argl '() dict '()))
(expd (expand-cpp-repl repl argd dict (cons ident used))))
expd)))))
;;; --- last line ---

View file

@ -719,13 +719,12 @@
;; external-declaration => "extern" '$string "{" external-declaration-li...
(lambda ($5 $4 $3 $2 $1 . $rest)
`(extern-block
,$2
(extern-begin ,$2)
,@(sx-tail (tl->list $4) 1)
(extern-end)))
;; external-declaration => ";"
(lambda ($1 . $rest)
`(decl (@ (not-C99 . "GNU C"))))
`(decl (@ (extension . "GNU C"))))
;; function-definition => declaration-specifiers declarator declaration-...
(lambda ($4 $3 $2 $1 . $rest)
`(knr-fctn-defn

View file

@ -719,13 +719,12 @@
;; external-declaration => "extern" '$string "{" external-declaration-li...
(lambda ($5 $4 $3 $2 $1 . $rest)
`(extern-block
,$2
(extern-begin ,$2)
,@(sx-tail (tl->list $4) 1)
(extern-end)))
;; external-declaration => ";"
(lambda ($1 . $rest)
`(decl (@ (not-C99 . "GNU C"))))
`(decl (@ (extension . "GNU C"))))
;; function-definition => declaration-specifiers declarator declaration-...
(lambda ($4 $3 $2 $1 . $rest)
`(knr-fctn-defn

View file

@ -625,9 +625,10 @@
(cpp-statement)
(pragma)
("extern" $string "{" external-declaration-list "}"
($$ `(extern-block ,$2 (extern-begin ,$2)
,@(sx-tail (tl->list $4) 1) (extern-end))))
(";" ($$ `(decl (@ (not-C99 . "GNU C")))))
($$ `(extern-block (extern-begin ,$2)
,@(sx-tail (tl->list $4) 1)
(extern-end))))
(";" ($$ `(decl (@ (extension . "GNU C")))))
)
(function-definition

View file

@ -18,11 +18,7 @@
;; C parser
(define-module (nyacc lang c99 parser)
#:export (parse-c99
def-xdef? c99-std-dict
gen-c-lexer
gen-gcc-defs
)
#:export (parse-c99)
#:use-module (nyacc lex)
#:use-module (nyacc parse)
#:use-module (nyacc lang util)
@ -69,22 +65,24 @@
;; Default mode is @code{'code}.
;; @example
;; (with-input-from-file "abc.c"
;; (parse-c #:cpp-defs '(("ABC" . "123"))
;; (parse-c #:cpp-defs '("ABC=123"))
;; #:inc-dirs (append '("." "./incs" "/usr/include") c99-std-dict)
;; #:td-dict '(("myinc.h" "foo_t" "bar_t"))
;; #:inc-help '(("myinc.h" "foo_t" "bar_t"))
;; #:mode 'file))
;; @end example
(define* (parse-c99 #:key
(cpp-defs '()) ; CPP defines
(inc-dirs '()) ; include dirs
(td-dict '()) ; typedef dictionary
(inc-help '()) ; include helpers
(mode 'code) ; mode: 'file or 'code
(xdef? #f) ; pred to determine expand
(debug #f)) ; debug
(catch
'c99-error
(lambda ()
(let ((info (make-cpi debug cpp-defs (cons "." inc-dirs) td-dict)))
(if (and (pair? cpp-defs) (pair? (car cpp-defs)))
(error "usage deprecated: use #:cpp-defs '(\"ABC=123\")"))
(let ((info (make-cpi debug cpp-defs (cons "." inc-dirs) inc-help)))
(with-fluid*
*info* info
(lambda ()
@ -94,26 +92,8 @@
(report-error fmt rest)
#f)))
(define parse-c parse-c99)
(use-modules (ice-9 rdelim))
(use-modules (ice-9 popen))
(use-modules (ice-9 regex))
;; @deffn gen-gcc-defs args => '(("ABC" . "123") ...)
;; Generate a list of default defines produced by gcc.
(define gen-gcc-defs
;; @code{"gcc -dM -E"} will generate lines like @code{"#define ABC 123"}.
;; We generate and return a list like @code{'(("ABC" . "123") ...)}.
(let ((rx (make-regexp "#define\\s+(\\S+)\\s+(.*)")))
(lambda (args)
(map
(lambda (l)
(let ((m (regexp-exec rx l)))
(cons (match:substring m 1) (match:substring m 2))))
(let ((ip (open-input-pipe "gcc -dM -E - </dev/null")))
(let iter ((lines '()) (line (read-line ip 'trim)))
(if (eof-object? line) lines
(iter (cons line lines) (read-line ip 'trim)))))))))
;;(use-modules (ice-9 rdelim))
;;(use-modules (ice-9 popen))
;;(use-modules (ice-9 regex))
;; --- last line ---

View file

@ -521,6 +521,7 @@
((cpp-stmt . ,rest)
(cpp-ppx (sx-ref tree 1)))
((extern-block ,begin ,guts ,end) (ppx begin) (ppx guts) (ppx end))
((extern-begin ,lang) (sf "extern \"~A\" {\n" lang))
((extern-end) (sf "}\n"))

View file

@ -18,7 +18,11 @@
;; C parser utilities
(define-module (nyacc lang c99 util1)
#:export (remove-inc-trees merge-inc-trees! elifify)
#:export (c99-std-help
gen-gcc-defs
remove-inc-trees
merge-inc-trees!
elifify)
#:use-module (nyacc lang util)
#:use-module ((srfi srfi-1) #:select (append-reverse))
#:use-module (srfi srfi-2) ;; and-let*
@ -26,6 +30,58 @@
#:use-module (sxml match)
)
;; include-helper for C99 std
(define c99-std-help
'(("alloca.h")
("complex.h" "complex" "imaginary" "_Imaginary_I=C99_ANY" "I=C99_ANY")
("ctype.h")
("fenv.h" "fenv_t" "fexcept_t")
("float.h" "float_t" "FLT_MAX=C99_ANY" "DBL_MAX=C99_ANY")
("inttypes.h"
"int8_t" "uint8_t" "int16_t" "uint16_t" "int32_t" "uint32_t"
"int64_t" "uint64_t" "uintptr_t" "intptr_t" "intmax_t" "uintmax_t"
"int_least8_t" "uint_least8_t" "int_least16_t" "uint_least16_t"
"int_least32_t" "uint_least32_t" "int_least64_t" "uint_least64_t"
"imaxdiv_t")
("limits.h"
"INT_MIN=C99_ANY" "INT_MAX=C99_ANY" "LONG_MIN=C99_ANY" "LONG_MAX=C99_ANY")
("math.h")
("regex.h" "regex_t" "regmatch_t")
("setjmp.h" "jmp_buf")
("signal.h" "sig_atomic_t")
("stdarg.h" "va_list")
("stddef.h" "ptrdiff_t" "size_t" "wchar_t")
("stdint.h"
"int8_t" "uint8_t" "int16_t" "uint16_t" "int32_t" "uint32_t"
"int64_t" "uint64_t" "uintptr_t" "intptr_t" "intmax_t" "uintmax_t"
"int_least8_t" "uint_least8_t" "int_least16_t" "uint_least16_t"
"int_least32_t" "uint_least32_t" "int_least64_t" "uint_least64_t")
("stdio.h" "FILE" "size_t")
("stdlib.h" "div_t" "ldiv_t" "lldiv_t" "wchar_t")
("string.h" "size_t")
("strings.h" "size_t")
("time.h" "time_t" "clock_t" "size_t")
("unistd.h" "size_t" "ssize_t" "div_t" "ldiv_t")
("wchar.h" "wchar_t" "wint_t" "mbstate_t" "size_t")
("wctype.h" "wctrans_t" "wctype_t" "wint_t")
))
;; @deffn gen-gcc-defs args [#:CC "clang"] => '(("ABC" . "123") ...)
;; Generate a list of default defines produced by gcc (or clang).
(define gen-gcc-defs
;; @code{"gcc -dM -E"} will generate lines like @code{"#define ABC 123"}.
;; We generate and return a list like @code{'(("ABC" . "123") ...)}.
(let ((rx (make-regexp "#define\\s+(\\S+)\\s+(.*)")))
(lambda* (args #:key (CC "gcc"))
(map
(lambda (l)
(let ((m (regexp-exec rx l)))
(cons (match:substring m 1) (match:substring m 2))))
(let ((ip (open-input-pipe (string-append CC " -dM -E - </dev/null"))))
(let iter ((lines '()) (line (read-line ip 'trim)))
(if (eof-object? line) lines
(iter (cons line lines) (read-line ip 'trim)))))))))
;; @item remove-inc-trees tree
;; Remove the trees included with cpp-include statements.
;; @example

View file

@ -18,7 +18,7 @@
;; C parser
(define-module (nyacc lang c99 xparser)
#:export (parse-cx parse-c99x)
#:export (parse-c99x)
#:use-module (nyacc lex)
#:use-module (nyacc parse)
#:use-module (nyacc lang util)
@ -45,13 +45,13 @@
(let ((info (fluid-ref *info*)))
(raw-parser (gen-c-lexer) #:debug (cpi-debug info))))
;; @item parse-c99x [#:cpp-defs def-a-list] [#:debug bool]
;; @item parse-c99x [#:cpp-defs defs] [#:debug bool]
;; This needs to be explained in some detail.
;; [#:tyns '("foo_t")]
(define* (parse-c99x expr-string
#:key
(cpp-defs '()) ; CPP defines
(tn-dict '()) ; typedef dictionary
(inc-help '()) ; include helper
(xdef? #f) ; pred to determine expand
(debug #f) ; debug?
(tyns '())) ; defined typenames
@ -60,7 +60,7 @@
(catch
'c99-error
(lambda ()
(let ((info (make-cpi debug cpp-defs '(".") tn-dict)))
(let ((info (make-cpi debug cpp-defs '(".") inc-help)))
(set-cpi-ptl! info (cons tyns (cpi-ptl info)))
(with-fluid*
*info* info
@ -71,6 +71,4 @@
(report-error fmt rest)
#f)))))
(define parse-cx parse-c99x)
;; --- last line ---