diff --git a/mes.c b/mes.c index 963f321a..ba143d7e 100644 --- a/mes.c +++ b/mes.c @@ -30,13 +30,9 @@ #define DEBUG 0 #define FIXED_PRIMITIVES 1 -#define READER 0 -#if READER -int ARENA_SIZE = 1000000; -#else int ARENA_SIZE = 100000; -#endif + int MAX_ARENA_SIZE = 20000000; int GC_SAFETY = 100; @@ -1204,10 +1200,8 @@ SCM load_env (SCM a) ///((internal)) { r0 =a; -#if 1 //!READER g_stdin = fopen ("module/mes/read-0.mes", "r"); g_stdin = g_stdin ? g_stdin : fopen (PREFIX "module/mes/read-0.mes", "r"); -#endif if (!g_function) r0 = mes_builtins (r0); r3 = read_input_file_env (r0); g_stdin = stdin; diff --git a/module/mes/read-0.mes b/module/mes/read-0.mes index eebedd2b..88fd9940 100644 --- a/module/mes/read-0.mes +++ b/module/mes/read-0.mes @@ -20,14 +20,9 @@ ;;; Commentary: -;;; read-0.mes - bootstrap reader from Scheme. Use -;;; ./mes --dump < module/mes/read-0.mes > read-0.mo -;;; to read, garbage collect, and dump this reader; then -;;; ./mes --load < tests/gc-3.test -;;; to use this reader to read and run the minimal gc-3.test -;;; TODO: complete this reader, remove reader from C. - -;;; copy of mes/read-0.mes, comment-out read-input-file +;;; read-0.mes - bootstrap reader. This file is read by a minimal +;;; core reader. It only supports s-exps and line-comments; quotes, +;;; character literals, string literals cannot be used here. ;;; Code: @@ -129,8 +124,8 @@ (append2 (cddar clauses) (list (caar clauses))) (list (cons (quote lambda) (cons (list) (car clauses))))) (list (cons (quote lambda) (cons (list) (car clauses))))) - (if (pair? (cdr clauses)) - (cons (quote cond) (cdr clauses)))))) + (if (pair? (cdr clauses)) + (cons (quote cond) (cdr clauses)))))) (define (eat-whitespace) ((lambda (c) @@ -141,7 +136,7 @@ ((eq? c 12) (read-byte) (eat-whitespace)) ((eq? c 13) (read-byte) (eat-whitespace)) ((eq? c 59) (begin (read-line-comment (read-byte)) - (eat-whitespace))) + (eat-whitespace))) ((eq? c 35) (begin (read-byte) (cond ((eq? (peek-byte) 33) (read-byte) @@ -163,10 +158,6 @@ (read-block-comment s (read-byte))) (read-block-comment s (read-byte)))) - ;; (define (read-hex c) - ;; (if (eq? c 10) c - ;; (read-line-comment (read-byte)))) - (define (read-line-comment c) (if (eq? c 10) c (read-line-comment (read-byte)))) @@ -179,70 +170,143 @@ (cons w (read-list a)))) (read-word (read-byte) (list) a)))) - ;;(define (read-string)) + (define-macro (and . x) + (if (null? x) #t + (if (null? (cdr x)) (car x) + (list (quote if) (car x) (cons (quote and) (cdr x)) + #f)))) + (define-macro (or . x) + (if (null? x) #f + (if (null? (cdr x)) (car x) + (list (quote if) (car x) (car x) + (cons (quote or) (cdr x)))))) + (define (not x) + (if x #f #t)) + + (define (list->symbol lst) (make-symbol lst)) + + (define (read-character) + (define (read-octal c p n) + (if (not (and (> p 47) (< p 56))) n + (read-octal (read-byte) (peek-byte) (+ (ash n 3) (- p 48))))) + + (define (read-name c p n) + (define (lookup-char n) + (cond ((assq n (quote ((*foe* . -1) + (lun . 0) + (mrala . 7) + (ecapskcab . 8) + (bat . 9) + (enilwen . 10) + (batv . 11) + (egap . 12) + (nruter . 13) + (ecaps . 32)))) => cdr) + (#t (display (quote char-not-supported:)) (display n) (newline) (exit 1)))) + (if (not (and (> p 96) (< p 123))) (integer->char (lookup-char (list->symbol (cons (integer->char c) n)))) + (read-name (read-byte) (peek-byte) (cons (integer->char c) n)))) + + ((lambda (c p) + (cond ((and (> c 47) (< c 56) (> p 47) (< p 56)) + (integer->char (read-octal c p (- c 48)))) + ((and (> c 96) (< c 123) (> p 96) (< p 123)) (read-name c p (list))) + (#t (integer->char c)))) + (read-byte) (peek-byte))) + + (define (read-hex) + (define (calc c) + (cond ((and (> c 64) (< c 71)) (+ (- c 65) 10)) + ((and (> c 96) (< c 103)) (+ (- c 97) 10)) + ((and (> c 47) (< c 58)) (- c 48)) + (#t 0))) + (define (read-hex c p n) + (if (not (or (and (> p 64) (< p 71)) + (and (> p 96) (< p 103)) + (and (> p 47) (< p 58)))) (+ (ash n 4) (calc c)) + (read-hex (read-byte) (peek-byte) (+ (ash n 4) (calc c))))) + ((lambda (c p) + (read-hex c p 0)) + (read-byte) (peek-byte))) + + (define (read-string) + (define (append-char s c) + (append2 s (cons (integer->char c) (list)))) + (define (read-string c p s) + (cond + ((and (eq? c 92) (or (eq? p 92) (eq? p 34))) + ((lambda (c) + (read-string (read-byte) (peek-byte) (append-char s c))) + (read-byte))) + ((and (eq? c 92) (eq? p 110)) + (read-byte) + (read-string (read-byte) (peek-byte) (append-char s 10))) + ((eq? c 34) s) + ((eq? c -1) (display (quote EOF-in-string)) (newline) (exit 1)) + (#t (read-string (read-byte) (peek-byte) (append-char s c))))) + (list->string (read-string (read-byte) (peek-byte) (list)))) + (define (lookup-char c a) (lookup (cons (integer->char c) (list)) a)) (define (read-word c w a) (cond - ((eq? c 32) (if (null? w) (read-word (read-byte) (list) a) - (lookup w a))) - ((eq? c 10) (read-word 32 w a)) - ((eq? c 9) (read-word 32 w a)) - ((eq? c 12) (read-word 32 w a)) - ((eq? c 34) (if (null? w) (read-string) - (begin (unread-byte c) (lookup w a)))) - ((eq? c 35) (cond - ((eq? (peek-byte) 33) (begin (read-byte) - (read-block-comment 33 (read-byte)) + ((eq? c 32) (if (null? w) (read-word (read-byte) (list) a) + (lookup w a))) + ((eq? c 10) (read-word 32 w a)) + ((eq? c 9) (read-word 32 w a)) + ((eq? c 12) (read-word 32 w a)) + ((eq? c 34) (if (null? w) (read-string) + (begin (unread-byte c) (lookup w a)))) + ((eq? c 35) (cond + ((eq? (peek-byte) 33) (begin (read-byte) + (read-block-comment 33 (read-byte)) + (read-word (read-byte) w a))) + ((eq? (peek-byte) 124) (begin (read-byte) + (read-block-comment 124 (read-byte)) (read-word (read-byte) w a))) - ((eq? (peek-byte) 124) (begin (read-byte) - (read-block-comment 124 (read-byte)) - (read-word (read-byte) w a))) - ((eq? (peek-byte) 40) (read-byte) (list->vector (read-list a))) - ((eq? (peek-byte) 92) (read-byte) (read-character)) - ((eq? (peek-byte) 120) (read-byte) (read-hex)) - ((eq? (peek-byte) 44) - (read-byte) - (cond ((eq? (peek-byte) 64) - (read-byte) - (cons (lookup (symbol->list (quote unsyntax-splicing)) a) - (cons (read-word (read-byte) w a) (list)))) - (#t - (cons (lookup (symbol->list (quote unsyntax)) a) - (cons (read-word (read-byte) w a) (list)))))) - ((eq? (peek-byte) 39) (read-byte) - (cons (lookup (cons (integer->char 35) (cons (integer->char 39) (list))) a) - (cons (read-word (read-byte) w a) (list)))) - ((eq? (peek-byte) 59) (read-byte) - (read-word (read-byte) w a) - (read-word (read-byte) w a)) - ((eq? (peek-byte) 96) (read-byte) - (cons (lookup (cons (integer->char 35) (cons (integer->char 96) (list))) a) - (cons (read-word (read-byte) w a) (list)))) - (#t (read-word (read-byte) (append2 w (cons (integer->char c) (list))) a)))) - ((eq? c 39) (if (null? w) (cons (lookup (cons (integer->char c) (list)) a) - (cons (read-word (read-byte) w a) (list))) - (begin (unread-byte c) (lookup w a)))) - ((eq? c 40) (if (null? w) (read-list a) - (begin (unread-byte c) (lookup w a)))) - ((eq? c 41) (if (null? w) (cons (lookup (cons (integer->char c) (list)) a) - (cons (read-word (read-byte) w a) (list))) - (begin (unread-byte c) (lookup w a)))) - ((eq? c 44) (cond - ((eq? (peek-byte) 64) (begin (read-byte) - (cons - (lookup (symbol->list (quote unquote-splicing)) a) - (cons (read-word (read-byte) w a) (list))))) - (#t (cons (lookup-char c a) (cons (read-word (read-byte) w a) - (list)))))) - ((eq? c 96) (cons (lookup-char c a) (cons (read-word (read-byte) w a) (list)))) - ((eq? c 59) (read-line-comment c) (read-word 10 w a)) - ((eq? c -1) (list)) - (#t (read-word (read-byte) (append2 w (cons (integer->char c) (list))) a)))) + ((eq? (peek-byte) 40) (read-byte) (list->vector (read-list a))) + ((eq? (peek-byte) 92) (read-byte) (read-character)) + ((eq? (peek-byte) 120) (read-byte) (read-hex)) + ((eq? (peek-byte) 44) + (read-byte) + (cond ((eq? (peek-byte) 64) + (read-byte) + (cons (lookup (symbol->list (quote unsyntax-splicing)) a) + (cons (read-word (read-byte) w a) (list)))) + (#t + (cons (lookup (symbol->list (quote unsyntax)) a) + (cons (read-word (read-byte) w a) (list)))))) + ((eq? (peek-byte) 39) (read-byte) + (cons (lookup (cons (integer->char 35) (cons (integer->char 39) (list))) a) + (cons (read-word (read-byte) w a) (list)))) + ((eq? (peek-byte) 59) (read-byte) + (read-word (read-byte) w a) + (read-word (read-byte) w a)) + ((eq? (peek-byte) 96) (read-byte) + (cons (lookup (cons (integer->char 35) (cons (integer->char 96) (list))) a) + (cons (read-word (read-byte) w a) (list)))) + (#t (read-word (read-byte) (append2 w (cons (integer->char c) (list))) a)))) + ((eq? c 39) (if (null? w) (cons (lookup (cons (integer->char c) (list)) a) + (cons (read-word (read-byte) w a) (list))) + (begin (unread-byte c) (lookup w a)))) + ((eq? c 40) (if (null? w) (read-list a) + (begin (unread-byte c) (lookup w a)))) + ((eq? c 41) (if (null? w) (cons (lookup (cons (integer->char c) (list)) a) + (cons (read-word (read-byte) w a) (list))) + (begin (unread-byte c) (lookup w a)))) + ((eq? c 44) (cond + ((eq? (peek-byte) 64) (begin (read-byte) + (cons + (lookup (symbol->list (quote unquote-splicing)) a) + (cons (read-word (read-byte) w a) (list))))) + (#t (cons (lookup-char c a) (cons (read-word (read-byte) w a) + (list)))))) + ((eq? c 96) (cons (lookup-char c a) (cons (read-word (read-byte) w a) (list)))) + ((eq? c 59) (read-line-comment c) (read-word 10 w a)) + ((eq? c -1) (list)) + (#t (read-word (read-byte) (append2 w (cons (integer->char c) (list))) a)))) ((lambda (p) - ;;(display (quote scheme-program=)) (display p) (newline) (begin-env p (current-module))) (read-input-file))) diff --git a/reader.c b/reader.c index c54404ce..de062b22 100644 --- a/reader.c +++ b/reader.c @@ -36,13 +36,6 @@ unread_char (SCM c) return ungetchar (VALUE (c)); } -int -read_block_comment (int s, int c) -{ - if (c == s && peekchar () == '#') return getchar (); - return read_block_comment (s, getchar ()); -} - int read_line_comment (int c) { @@ -68,125 +61,14 @@ read_word (int c, SCM w, SCM a) if (c == ')' && w == cell_nil) {ungetchar (c); return cell_nil;} if (c == ')') {ungetchar (c); return lookup (w, a);} if (c == ';') {read_line_comment (c); return read_word ('\n', w, a);} -#if READER - if (c == '"' && w == cell_nil) return read_string (); - if (c == '"') {ungetchar (c); return lookup (w, a);} - if (c == ',' && peekchar () == '@') {getchar (); return cons (lookup (STRING (cell_symbol_unquote_splicing), a), - cons (read_word (getchar (), w, a), - cell_nil));} - if ((c == '\'' - || c == '`' - || c == ',') - && w == cell_nil) {return cons (lookup_char (c, a), - cons (read_word (getchar (), w, a), - cell_nil));} - if (c == '#' && peekchar () == ',' && w == cell_nil) { - getchar (); - if (peekchar () == '@'){getchar (); return cons (lookup (STRING (cell_symbol_unsyntax_splicing), a), - cons (read_word (getchar (), w, a), - cell_nil));} - return cons (lookup (STRING (cell_symbol_unsyntax), a), cons (read_word (getchar (), w, a), cell_nil)); - } - if (c == '#' && (peekchar () == '\'' || peekchar () == '`') && w == cell_nil) { - c = getchar (); - return cons (lookup (cons (make_char ('#'), cons (make_char (c), cell_nil)), a), - cons (read_word (getchar (), w, a), cell_nil));} - if (c == '#' && peekchar () == 'x') {getchar (); return read_hex ();} - if (c == '#' && peekchar () == '\\') {getchar (); return read_character ();} - if (c == '#' && w == cell_nil && peekchar () == '(') {getchar (); return list_to_vector (read_list (a));} - if (c == '#' && peekchar () == ';') {getchar (); read_word (getchar (), w, a); return read_word (getchar (), w, a);} - if (c == '#' && (peekchar () == '!' || peekchar () == '|')) {c = getchar (); read_block_comment (c, getchar ()); return read_word (getchar (), w, a);} -#endif //READER return read_word (getchar (), append2 (w, cons (make_char (c), cell_nil)), a); } -SCM -read_character () -{ - int c = getchar (); - if (c >= '0' && c <= '7' - && peekchar () >= '0' && peekchar () <= '7') { - c = c - '0'; - while (peekchar () >= '0' && peekchar () <= '7') { - c <<= 3; - c += getchar () - '0'; - } - } - else if (c >= 'a' && c <= 'z' - && peekchar () >= 'a' && peekchar () <= 'z') { - char buf[10]; - char *p = buf; - *p++ = c; - while (peekchar () >= 'a' && peekchar () <= 'z') { - *p++ = getchar (); - } - *p = 0; - if (!strcmp (buf, char_nul.name)) c = char_nul.value; - else if (!strcmp (buf, char_alarm.name)) c = char_alarm.value; - else if (!strcmp (buf, char_backspace.name)) c = char_backspace.value; - else if (!strcmp (buf, char_tab.name)) c = char_tab.value; - else if (!strcmp (buf, char_newline.name)) c = char_newline.value; - else if (!strcmp (buf, char_vtab.name)) c = char_vtab.value; - else if (!strcmp (buf, char_page.name)) c = char_page.value; - else if (!strcmp (buf, char_return.name)) c = char_return.value; - else if (!strcmp (buf, char_space.name)) c = char_space.value; - else { - fprintf (stderr, "char not supported: %s\n", buf); - assert (!"char not supported"); - } - } - return make_char (c); -} - -SCM -read_hex () -{ - int n = 0; - int c = peekchar (); - while ((c >= '0' && c <= '9') - || (c >= 'A' && c <= 'F') - || (c >= 'a' && c <= 'f')) { - n <<= 4; - if (c >= 'a') n += c - 'a' + 10; - else if (c >= 'A') n += c - 'A' + 10; - else n+= c - '0'; - getchar (); - c = peekchar (); - } - return make_number (n); -} - -SCM -append_char (SCM x, int i) -{ - return append2 (x, cons (make_char (i), cell_nil)); -} - -SCM -read_string () -{ - SCM p = cell_nil; - int c = getchar (); - while (true) { - if (c == '"') break; - if (c == '\\' && peekchar () == '\\') p = append_char (p, getchar ()); - else if (c == '\\' && peekchar () == '"') p = append_char (p, getchar ()); - else if (c == '\\' && peekchar () == 'n') {getchar (); p = append_char (p, '\n');} - else if (c == EOF) assert (!"EOF in string"); - else p = append_char (p, c); - c = getchar (); - } - return make_string (p); -} - int eat_whitespace (int c) { while (c == ' ' || c == '\t' || c == '\n' || c == '\f') c = getchar (); if (c == ';') return eat_whitespace (read_line_comment (c)); -#if READER - if (c == '#' && (peekchar () == '!' || peek_char () == '|')) {c=getchar (); read_block_comment (c, getchar ()); return eat_whitespace (getchar ());} -#endif return c; } diff --git a/tests/scm.test b/tests/scm.test index c936b367..a000aa54 100755 --- a/tests/scm.test +++ b/tests/scm.test @@ -74,7 +74,7 @@ exit $? (pass-if "string-ref" (seq? (string-ref "hello world" 4) #\o)) (pass-if "eq?" (not (eq? (string-append "a" "b" "c") "abc"))) (pass-if "string-length" (seq? (string-length (string-append "a" "b" "c")) 3)) -(pass-if "string->list" (sequal? (string->list "abc\n") '(#\a #\b #\c #\newline))) +(pass-if-equal "string->list" '(#\a #\b #\c #\newline) (string->list "abc\n")) (pass-if "char" (seq? (char->integer #\A) 65)) (pass-if "char 2" (seq? (char->integer #\101) (char->integer #\A))) (pass-if "char 3" (seq? (integer->char 10) #\newline))