From 07a5f716fcae1dffd299f25091f81d5f47451ada Mon Sep 17 00:00:00 2001 From: Jan Nieuwenhuizen Date: Sun, 24 Jul 2016 12:08:21 +0200 Subject: [PATCH] lalr paren test --- GNUmakefile | 9 ++++ lib/lalr.mes | 8 +++ lib/lalr.scm | 11 +++- paren.scm | 138 +++++++++++++++++++++++++++++++++++++++++++++++++++ scm.mes | 23 +++++++++ test.mes | 5 +- 6 files changed, 191 insertions(+), 3 deletions(-) create mode 100644 lib/lalr.mes create mode 100644 paren.scm diff --git a/GNUmakefile b/GNUmakefile index 4aa50c21..1db5cf96 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -83,3 +83,12 @@ record: all cat scm.mes syntax.mes lib/record.mes lib/record.scm lib/srfi/srfi-9.scm record.mes |./mes +paren: all + cat scm.mes syntax.mes lib/srfi/srfi-0.scm lib/record.mes lib/record.scm lib/srfi/srfi-9.scm lib/lalr.mes lib/lalr.scm paren.scm | ./mes +# #echo '___P((()))' + +paren.test: lib/lalr.scm paren.scm + cat $^ > $@ + +guile-paren: paren.test + echo '___P((()))' | guile -s $^ diff --git a/lib/lalr.mes b/lib/lalr.mes new file mode 100644 index 00000000..9e0d56b2 --- /dev/null +++ b/lib/lalr.mes @@ -0,0 +1,8 @@ + +(define pprint display) +(define lalr-keyword? symbol?) +(define-macro (BITS-PER-WORD) 30) +(define-macro (logical-or x . y) `(logior ,x ,@y)) +(define-macro (lalr-error msg obj) `(error ,msg ,obj)) +(define (note-source-location lvalue tok) lvalue) +(define *eoi* -1) diff --git a/lib/lalr.scm b/lib/lalr.scm index 0e486a35..56a5f83d 100644 --- a/lib/lalr.scm +++ b/lib/lalr.scm @@ -20,7 +20,6 @@ (define *lalr-scm-version* "2.5.0") - (cond-expand ;; -- Gambit-C @@ -79,6 +78,7 @@ ;; -- Guile (guile + (display "GUILE") (newline) (use-modules (ice-9 pretty-print)) (use-modules (srfi srfi-9)) @@ -98,8 +98,15 @@ (mes (display "MES!") (newline) - ) + (define pprint display) + (define lalr-keyword? symbol?) + (define-macro (BITS-PER-WORD) 30) + (define-macro (logical-or x . y) `(logior ,x ,@y)) + (define-macro (lalr-error msg obj) `(error ,msg ,obj)) + (define (note-source-location lvalue tok) lvalue) + ) + ;; -- Kawa (kawa (require 'pretty-print) diff --git a/paren.scm b/paren.scm new file mode 100644 index 00000000..7304fa92 --- /dev/null +++ b/paren.scm @@ -0,0 +1,138 @@ +;;; Read C source code, breaking it into the following types of tokens: +;;; the identifier ___P, other identifiers, left and right parentheses, +;;; and any other non-spacing character. White space (space, tab, and +;;; newline characters) is never a token and may come between any two +;;; tokens, before the first, or after the last. + +;;; Whenever the identifier ___P is seen, read a left parenthesis +;;; followed by a body (zero or more tokens) followed by a right +;;; parenthesis. If the body contains parentheses they must be properly +;;; paired. Other tokens in the body, including ___P, have no effect. +;;; Count the deepest nesting level used in the body. Count the maximum +;;; deepest level (of all the bodies seen so far). + +;;; At the end of the file, print the maximum deepest level, or 0 if no +;;; bodies were found. + + +;;; Global variables used by lexical analyzer and parser. +;;; The lexical analyzer needs them to print the maximum level at the +;;; end of the file. + +(define depth 0) +(define max-depth 0) + +;;; Lexical analyzer. Passes tokens to the parser. + +(define (paren-depth-lexer errorp) + (lambda () + + ;; Utility functions, for identifying characters, skipping any + ;; amount of white space, or reading multicharacter tokens. + + (letrec ((char-whitespace? + (lambda (c) + (or (char=? c #\space) + (char=? c #\tab) + (char=? c #\newline)))) + (skip-whitespace + (lambda () + (let loop ((c (peek-char))) + (if (and (not (eof-object? c)) + (char-whitespace? c)) + (begin (read-char) + (loop (peek-char))))))) + + (char-in-id? + (lambda (c) + (or (char-alphabetic? c) + (char=? c #\_)))) + (read-___P-or-other-id + (lambda (l) + (let ((c (peek-char))) + (if (char-in-id? c) + (read-___P-or-other-id (cons (read-char) l)) + ;; else + (if (equal? l '(#\P #\_ #\_ #\_)) + '___P + ;; else + 'ID)))))) + + ;; The lexer function. + + (skip-whitespace) + (let loop ((c (read-char))) + (cond + ((eof-object? c) (begin (display "max depth ") + (display max-depth) + (newline) + '*eoi*)) + ((char-whitespace? c) (begin (errorp "didn't expect whitespace " c) + (loop (read-char)))) + ((char-in-id? c) (read-___P-or-other-id (list c))) + ((char=? c #\() 'LPAREN) + ((char=? c #\)) 'RPAREN) + (else 'CHAR)))))) + +;;; Parser. + +(define paren-depth-parser + (lalr-parser + + ;; Options. + + ;; (expect: 0) ;; even one conflict is an error + + ;; List of terminal tokens. + + (CHAR LPAREN RPAREN ID ___P) + + ;; Grammar rules. + + (file (newfile tokens)) + (newfile () : (begin (set! depth 0) + (set! max-depth 0))) + + (tokens (tokens token) + (token)) + + ;; When not after a ___P, the structure of the file is unimportant. + (token (CHAR) + (LPAREN) + (RPAREN) + (ID) + + ;; But after a ___P, we start counting parentheses. + (___P newexpr in LPAREN exprs RPAREN out) + (___P newexpr in LPAREN RPAREN out)) + (newexpr () : (set! depth 0)) + + ;; Inside an expression, ___P is treated like all other identifiers. + ;; Only parentheses do anything very interesting. I'm assuming Lalr + ;; will enforce the pairing of parentheses, so my in and out actions + ;; don't check for too many or too few closing parens. + + (exprs (exprs expr) + (expr)) + + (expr (CHAR) + (in LPAREN exprs RPAREN out) + (in LPAREN RPAREN out) + (ID) + (___P)) + (in () : (begin (set! depth (+ depth 1)) + (if (> depth max-depth) + (set! max-depth depth)))) + (out () : (set! depth (- depth 1))))) + +;;; Main program. + +(define paren-depth + (let ((errorp + (lambda args + (for-each display args) + (newline)))) + (lambda () + (paren-depth-parser (paren-depth-lexer errorp) errorp)))) + +(paren-depth) diff --git a/scm.mes b/scm.mes index 3b3ba447..ca29cf44 100755 --- a/scm.mes +++ b/scm.mes @@ -134,6 +134,11 @@ (define assv assq) +(define (assoc key alist) + (cond ((null? alist) #f) + ((equal? key (caar alist)) (car alist)) + (#t (assoc key (cdr alist))))) + (define (memq x lst) (cond ((null? lst) #f) ((eq? x (car lst)) lst) @@ -198,8 +203,26 @@ (define else #t) +(define (error who . rest) + (display "error:") + (display who) + (display ":") + (display rest) + (display newline)) + +(define (syntax-error message . rest) + (display "syntax-error:") + (display message) + (display ":") + (display rest) + (newline)) + ;; srfi-1 (define (last-pair lst) (let loop ((lst lst)) (if (or (null? lst) (null? (cdr lst))) lst (loop (cdr lst))))) + +(define (reverse lst) + (if (null? lst) '() + (append (reverse (cdr lst)) (cons (car lst) '())))) diff --git a/test.mes b/test.mes index ca15199d..fba7371d 100644 --- a/test.mes +++ b/test.mes @@ -140,6 +140,7 @@ (pass-if "assq-ref 2" (seq? (assq-ref '((b . 1) (c . 2)) 'a) #f)) (pass-if "assq-set!" (sequal? (assq-set! '((b . 1)) 'a 0) '((a . 0) (b . 1)))) (pass-if "assq-set! 2" (sequal? (assq-set! '((a . 0)) 'a 1) '((a . 1)))) +(pass-if "assoc" (sequal? (assoc '(a . 0) '((a . 0) (b . 1) ((a . 0) aa))) '((a . 0) aa))) ;; works, but debugging is foo ;; (cond ((defined? 'loop2) @@ -224,7 +225,7 @@ #t)) (pass-if "last-pair " (sequal? (last-pair '(1 2 3 4)) '(4))) -(pass-if "last-pair 2" (eq? (last-pair '()) '())) +(pass-if "last-pair 2" (seq? (last-pair '()) '())) ;; (pass-if "circular-list? " ;; (seq? ;; (let ((x (list 1 2 3 4))) @@ -232,6 +233,8 @@ ;; (circular-list? x)) ;; #t)) +(pass-if "reverse" (sequal? (reverse '(1 2 3)) '(3 2 1))) + (pass-if "cond-expand" (sequal? (cond-expand (foobar #f) (mes (display ": pass: *YAY*") 'mes) (guile (display ": pass: *GUILE*") 'mes)) 'mes)) (begin (define *top-begin-a* '*top-begin-a*))