;;; lang/c/cpp.scm ;;; ;;; Copyright (C) 2015-2017 Matthew R. Wette ;;; ;;; This program is free software: you can redistribute it and/or modify ;;; it under the terms of the GNU General Public License as published by ;;; the Free Software Foundation, either version 3 of the License, or ;;; (at your option) any later version. ;;; ;;; This program is distributed in the hope that it will be useful, ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;;; GNU General Public License for more details. ;;; ;;; You should have received a copy of the GNU General Public License ;;; along with this program. If not, see . ;; C preprocessor. This is not complete. (define-module (nyacc lang c99 cpp) #:export (parse-cpp-stmt cpp-line->stmt parse-cpp-expr eval-cpp-expr cpp-expand-text expand-cpp-macro-ref ) #:use-module (nyacc parse) #:use-module (nyacc lex) #:use-module (nyacc lang util) #:use-module (rnrs arithmetic bitwise) #:use-module (ice-9 match) ) (cond-expand (guile-2) (guile (use-modules (ice-9 syncase))) (mes)) (define c99-std-defs '("__DATE__" "__FILE__" "__LINE__" "__STDC__" "__STDC_HOSTED__" "__STDC_VERSION__" "__TIME__")) (define (c99-std-def? str) (let iter ((defs c99-std-defs)) (cond ((null? defs) #f) ((string=? (car defs) str) #t) (else (iter (cdr defs)))))) (define (c99-std-val str) (cond ((string=? str "__DATE__") "M01 01 2001") ((string=? str "__FILE__") "(unknown)") ((string=? str "__LINE__") 0) ((string=? str "__STDC__") 1) ((string=? str "__STDC_HOSTED__") 0) ((string=? "__STDC_VERSION__") 201701) ((string=? "__TIME__") "00:00:00") (else #f))) ;; @deffn read-ellipsis ch ;; read ellipsis (define (read-ellipsis ch) (cond ((eof-object? ch) #f) ((char=? ch #\.) (read-char) (read-char) "...") ; assumes correct syntax (else #f))) ;; @deffn cpp-define => (define (name "ADD") (args "X" "Y") (repl "X+Y")) ;; output is like ;; @code{(name "ABC") (repl "123")} or ;; @code{(name "ABC") (args "X" "Y") (repl "X+Y")} (define (cpp-define) (define (p-args la) ;; parse args (if (eq? la #\() (let iter ((args '()) (la (skip-il-ws (read-char)))) (cond ((eq? la #\)) (reverse args)) ((read-c-ident la) => (lambda (arg) (iter (cons arg args) (skip-il-ws (read-char))))) ((read-ellipsis la) => (lambda (arg) (iter (cons arg args) (skip-il-ws (read-char))))) ((eq? la #\,) (iter args (skip-il-ws (read-char)))))) (begin (if (char? la) (unread-char la)) #f))) (define (p-rest la) ;; parse rest (cond ((eof-object? la) "") (else (unread-char la) (drain-input (current-input-port))))) (let* ((name (read-c-ident (skip-il-ws (read-char)))) (args (or (p-args (read-char)) '())) (repl (p-rest (skip-il-ws (read-char))))) (if (pair? args) `(define (name ,name) (args . ,args) (repl ,repl)) `(define (name ,name) (repl ,repl))))) ;; @deffn cpp-include ;; Parse CPP include statement. (define (cpp-include) (let* ((beg-ch (skip-il-ws (read-char))) (end-ch (if (eq? beg-ch #\<) #\> #\")) (path (let iter ((cl (list beg-ch)) (ch (read-char))) (if (eq? ch end-ch) (list->string (reverse (cons ch cl))) (iter (cons ch cl) (read-char)))))) `(include ,path))) ;; @deffn cpp-line->stmt line defs => (stmt-type text) ;; Parse a line from a CPP statement and return a parse tree. ;; @example ;; (parse-cpp-stmt "define X 123") => (define "X" "123") ;; (parse-cpp-stmt "if defined(A) && defined(B) && defined(C)" ;; => (if "defined(A) && defined(B) && defined(C)") ;; @end example ;; To evaluate the @code{if} statements use @code{parse-cpp-expr} and ;; @code{eval-cpp-expr}. (define (cpp-line->stmt line) (define (rd-ident) (read-c-ident (skip-il-ws (read-char)))) (define (rd-num) (and=> (read-c-num (skip-il-ws (read-char))) cdr)) (define (rd-rest) (let ((ch (skip-il-ws (read-char)))) (if (not (eof-object? ch)) (unread-char ch)) (drain-input (current-input-port)))) (with-input-from-string line (lambda () (let ((cmd (string->symbol (read-c-ident (skip-il-ws (read-char)))))) (case cmd ((include) (cpp-include)) ((define) (cpp-define)) ((undef) `(undef ,(rd-ident))) ((ifdef) `(if ,(string-append "defined(" (rd-ident) ")" (rd-rest)))) ((ifndef) `(if ,(string-append "!defined(" (rd-ident) ")" (rd-rest)))) ((if elif else endif line error pragma) (list cmd (rd-rest))) (else '(unknown ""))))))) (include-from-path "nyacc/lang/c99/mach.d/cpptab.scm") (include-from-path "nyacc/lang/c99/mach.d/cppact.scm") (define raw-parser (make-lalr-parser (list (cons 'len-v len-v) (cons 'pat-v pat-v) (cons 'rto-v rto-v) (cons 'mtab mtab) (cons 'act-v act-v)))) (define (cpp-err fmt . args) (apply throw 'cpp-error fmt args)) ;;.@deffn skip-il-ws ch ;; Skip in-line whitespace (define skip-il-ws (let ((il-ws (list->char-set '(#\space #\tab)))) (lambda (ch) (cond ((eof-object? ch) ch) ((char-set-contains? il-ws ch) (skip-il-ws (read-char))) (else ch))))) ;; Since we want to be able to get CPP statements with comment in tact ;; (e.g., for passing to @code{pretty-print-c99}) we need to remove ;; comments when parsing CPP expressions. We convert a comm-reader ;; into a comm-skipper here. And from that generate a lexer generator. (define cpp-comm-skipper (let ((reader (make-comm-reader '(("/*" . "*/"))))) (lambda (ch) (reader ch #f)))) ;; generate a lexical analyzer per string (define gen-cpp-lexer (make-lexer-generator mtab #:comm-skipper cpp-comm-skipper)) ;; @deffn parse-cpp-expr text => tree ;; Given a string returns a cpp parse tree. This is called by ;; @code{eval-cpp-expr}. The text will have had all CPP defined symbols ;; expanded already so no identifiers should appear in the text. ;; A @code{cpp-error} will be thrown if a parse error occurs. (define (parse-cpp-expr text) (with-throw-handler 'nyacc-error (lambda () (with-input-from-string text (lambda () (raw-parser (gen-cpp-lexer))))) (lambda (key fmt . args) (apply throw 'cpp-error fmt args)))) ;; @deffn eval-cpp-expr tree dict => datum ;; Evaluate a tree produced from @code{parse-cpp-expr}. ;; The tree passed to this routine is (define (eval-cpp-expr tree dict) (letrec ((tx (lambda (tr ix) (list-ref tr ix))) (tx1 (lambda (tr) (tx tr 1))) (ev (lambda (ex ix) (eval-expr (list-ref ex ix)))) (ev1 (lambda (ex) (ev ex 1))) ; eval expr in arg 1 (ev2 (lambda (ex) (ev ex 2))) ; eval expr in arg 2 (ev3 (lambda (ex) (ev ex 3))) ; eval expr in arg 3 (eval-expr (lambda (tree) (case (car tree) ((fixed) (string->number (tx1 tree))) ((char) (char->integer (tx1 tree))) ((defined) (if (assoc-ref dict (tx1 tree)) 1 0)) ((pre-inc post-inc) (1+ (ev1 tree))) ((pre-dec post-dec) (1- (ev1 tree))) ((pos) (ev1 tree)) ((neg) (- (ev1 tree))) ((bw-not) (bitwise-not (ev1 tree))) ((not) (if (zero? (ev1 tree)) 1 0)) ((mul) (* (ev1 tree) (ev2 tree))) ((div) (/ (ev1 tree) (ev2 tree))) ((mod) (modulo (ev1 tree) (ev2 tree))) ((add) (+ (ev1 tree) (ev2 tree))) ((sub) (- (ev1 tree) (ev2 tree))) ((lshift) (bitwise-arithmetic-shift-left (ev1 tree) (ev2 tree))) ((rshift) (bitwise-arithmetic-shift-right (ev1 tree) (ev2 tree))) ((lt) (if (< (ev1 tree) (ev2 tree)) 1 0)) ((le) (if (<= (ev1 tree) (ev2 tree)) 1 0)) ((gt) (if (> (ev1 tree) (ev2 tree)) 1 0)) ((ge) (if (>= (ev1 tree) (ev2 tree)) 1 0)) ((equal) (if (= (ev1 tree) (ev2 tree)) 1 0)) ((noteq) (if (= (ev1 tree) (ev2 tree)) 0 1)) ((bw-or) (bitwise-ior (ev1 tree) (ev2 tree))) ((bw-xor) (bitwise-xor (ev1 tree) (ev2 tree))) ((bw-and) (bitwise-and (ev1 tree) (ev2 tree))) ((or) (if (and (zero? (ev1 tree)) (zero? (ev2 tree))) 0 1)) ((and) (if (or (zero? (ev1 tree)) (zero? (ev2 tree))) 0 1)) ((cond-expr) (if (zero? (ev1 tree)) (ev3 tree) (ev2 tree))) ((ident) (cpp-err "undefined identifier: ~S" (cadr tree))) (else (error "incomplete implementation")))))) (eval-expr tree))) ;; Note: scan-cpp-input scans replacement text. When identifiers are found ;; they are tested for expansion as follows: ;; @enumerate ;; @item If already expanded, then ignore. ;; @item If followed by @code{(}, then use @code{collect-args} to get the ;; arguments and ... ;; @item Otherwise insert the replacement text and continue scanning (at ;; first character of new replacement text. ;; @end enumerate ;; @deffn rtokl->string tokl => string ;; Convert reverse token-list to string. (define (rtokl->string tokl) ;; need to cover: comm ident string arg ;;(let iter ((stl '()) (chl '()) (nxt #f) (tkl tokl)) ;; more efficient (let iter ((stl '()) (tkl tokl)) (match tkl ('() (apply string-append stl)) ((('arg . arg) 'dhash (key . val) . rest) (iter (cons (string-append val arg) stl) (list-tail tkl 3))) (((key . val) 'dhash ('arg . arg) . rest) (iter (cons (string-append arg val) stl) (list-tail tkl 3))) ((('arg . arg) 'hash . rest) (iter (cons (string-append "\"" arg "\"") stl) (list-tail tkl 2))) ((('comm . val) . rest) (iter (cons (string-append "/*" val " */") stl) (cdr tkl))) ((('ident . rval) ('ident . lval) . rest) (iter (cons* " " rval stl) (cdr tkl))) (((key . val) . rest) (iter (cons val stl) (cdr tkl))) (('space . rest) (iter (cons " " stl) rest)) (((? char? ch) . rest) (iter (cons (string ch) stl) rest)) (otherwise (error "no match" tkl))))) ;; @deffn scan-cpp-input argd used dict end-tok => string ;; Process replacement text from the input port and generate a (reversed) ;; token-list. If end-tok, stop at, and push back, @code{,} or @code{)}. ;; If end-tok is @code{,} then read until @code{,} or @code{(}. ;; The argument @var{argd} is a dictionary (argument name, argument ;; value) pairs which will be expanded as needed. This routine is called ;; by collect-args, expand-cpp-repl and cpp-expand-text. (define (scan-cpp-input argd dict used end-tok) ;; Works like this: scan for tokens (comments, parens, strings, char's, etc). ;; Tokens are collected in a (reverse ordered) list (tkl) and merged together ;; to a string on return using @code{rtokl->string}. ;; Turn reverse chl into a string and insert it into the string list stl. (define (add-chl chl stl) (if (null? chl) stl (cons (list->string (reverse chl)) stl))) ;; We just scanned "defined", now need to scan the arg to inhibit expansion. ;; For example, we have scanned "defined"; we now scan "(FOO)" or "FOO", and ;; return "defined(FOO)". We use ec (end-char) as terminal char: ;; #\) if starts with #( or #\nul if other. (define (scan-defined-arg) (let* ((ch (skip-il-ws (read-char))) (ec (if (char=? ch #\() #\) #\null))) (let iter ((chl '(#\()) (ec ec) (ch (if (char=? ec #\)) (read-char) ch))) (cond ((eof-object? ch) (if (char=? ec #\null) (string-append "defined" (list->string (reverse (cons #\) chl)))) (cpp-err "illegal argument to `defined'"))) ((char-set-contains? c:ir ch) (iter (cons ch chl) ec (read-char))) ((char=? ec #\)) (if (char=? #\) (skip-il-ws ch)) (string-append "defined" (list->string (reverse (cons #\) chl)))) (cpp-err "garbage in argument to `defined'"))) ((char=? ec #\null) ;; past identifier (string-append "defined" (list->string (reverse (cons #\) chl))))) (else (cpp-err "illegal argument to `defined'")))))) ;; token list is list of ;; 1) characters as char ;; 2) identifiers as string ;; 3) strings as '(string . ) ;; 4) 'hash 'dhash (let iter ((tkl '()) ; token list of (lvl 0) ; level (ch (read-char))) ; next character (cond ;; have item to add, but first add in char's ;;(nxt (iter (cons nxt (add-chl chl tkl)) '() #f lvl ch)) ;; If end of string or see end-ch at level 0, then return. ((eof-object? ch) (rtokl->string tkl)) ((and (eqv? end-tok ch) (zero? lvl)) (unread-char ch) (rtokl->string tkl)) ((and end-tok (char=? #\) ch) (zero? lvl)) (unread-char ch) (rtokl->string tkl)) ((read-c-comm ch #f) => (lambda (cp) (iter (acons `comm (cdr cp) tkl) lvl (read-char)))) ((char-set-contains? c:ws ch) (if (and (pair? tkl) (char? (car tkl))) (iter (cons 'space tkl) lvl (read-char)) (iter tkl lvl (read-char)))) ((char=? #\( ch) (iter (cons ch tkl) (1+ lvl) (read-char))) ((char=? #\) ch) (iter (cons ch tkl) (1- lvl) (read-char))) ((char=? #\# ch) (let ((ch (read-char))) (if (eqv? ch #\#) (iter (cons 'dhash tkl) lvl (read-char)) (iter (cons 'hash tkl) lvl ch)))) ((read-c-string ch) => (lambda (st) (iter (acons 'string (cdr st) tkl) lvl (read-char)))) ((read-c-ident ch) => (lambda (iden) (if (equal? iden "defined") ;; "defined" is a special case (let ((arg (scan-defined-arg))) (iter (acons 'defined arg tkl) lvl (read-char))) ;; otherwise ... (let* ((aval (assoc-ref argd iden)) ; lookup argument (rval (assoc-ref dict iden))) ; lookup macro def (cond ((member iden used) ; name used (iter (cons iden tkl) lvl (read-char))) (aval ; arg ref (iter (acons 'arg aval tkl) lvl (read-char))) ((string? rval) ; cpp repl (iter (acons 'string rval tkl) lvl (read-char))) ((pair? rval) ; cpp macro (let* ((argl (car rval)) (text (cdr rval)) (argd (collect-args argl argd dict used)) (newl (expand-cpp-repl text argd dict (cons iden used)))) (iter (acons 'string newl tkl) lvl (read-char)))) (else ; normal identifier (iter (acons 'ident iden tkl) lvl (read-char)))))))) (else (iter (cons ch tkl) lvl (read-char)))))) ;; @deffn collect-args argl argd dict used => argd ;; to be documented ;; I think argd is a passthrough for scan-cpp-input ;; argl: list of formal arguments in #define ;; argd: used? (maybe just a pass-through for scan-cpp-input ;; dict: dict of macro defs ;; used: list of already expanded macros ;; TODO clean this up ;; should be looking at #\( and eat up to matching #\) (define (collect-args argl argd dict used) (let iter ((argl argl) (argv '()) (ch (skip-il-ws (read-char)))) ;; ch should always be #\(, #\, or #\) (cond ((eqv? ch #\)) (reverse argv)) ((null? argl) (cpp-err "arg count")) ((and (null? (cdr argl)) (string=? (car argl) "...")) (let ((val (scan-cpp-input argd dict used #\)))) (iter (cdr argl) (acons "__VA_ARGS__" val argv) (read-char)))) ((or (eqv? ch #\() (eqv? ch #\,)) (let ((val (scan-cpp-input argd dict used #\,))) (iter (cdr argl) (acons (car argl) val argv) (read-char)))) (else (error "coding error, ch=" ch))))) ;; @deffn expand-cpp-repl ;; to be documented (define (expand-cpp-repl repl argd dict used) (with-input-from-string repl (lambda () (scan-cpp-input argd dict used #f)))) ;; @deffn cpp-expand-text text dict => string (define (cpp-expand-text text dict) (with-input-from-string text (lambda () (scan-cpp-input '() dict '() #f)))) ;; @deffn expand-cpp-macro-ref ident dict => repl|#f ;; Given an identifier seen in C99 input, this checks for associated ;; definition in @var{dict} (generated from CPP defines). If found, ;; the expansion is returned as a string. If @var{ident} refers ;; to a macro with arguments, then the arguments will be read from the ;; current input. The format of the @code{dict} entries are ;; @example ;; ("ABC" . "123") ;; ("MAX" ("X" "Y") . "((X)>(Y)?(X):(Y))") ;; @end example (define (expand-cpp-macro-ref ident dict . rest) (let ((used (if (pair? rest) (car rest) '())) (rval (assoc-ref dict ident))) (cond #;((string=? ident "C99_ANY") #f) ; don't expand: could be anything #;((string=? ident "__FILE__") (string-append "\"" (or (port-filename (current-input-port)) "(unknown)") "\"")) #;((string=? ident "__LINE__") (1+ (port-line (current-input-port)))) ;; ((not rval) #f) ((member ident used) ident) ((string? rval) (let ((expd (expand-cpp-repl rval '() dict (cons ident used)))) expd)) ((pair? rval) (let* ((argl (car rval)) (repl (cdr rval)) (argd (collect-args argl '() dict '())) (expd (expand-cpp-repl repl argd dict (cons ident used)))) expd))))) ;;; --- last line ---