core: Remove most of reader.

* reader.c (append_char, read_block_comment, read_character, read_hex,
  read_string): Remove.
  (eat_whitespace, read_word)[READER]: Remove.
* mes.c (list_to_symbol): New function.
* module/mes/read-0.mes (list->symbol, read-character, read-hex,
  read-string): New functions.
This commit is contained in:
Jan Nieuwenhuizen 2016-12-22 23:42:28 +01:00
parent a0caca6409
commit ddfaa05149
4 changed files with 137 additions and 197 deletions

8
mes.c
View file

@ -30,13 +30,9 @@
#define DEBUG 0
#define FIXED_PRIMITIVES 1
#define READER 0
#if READER
int ARENA_SIZE = 1000000;
#else
int ARENA_SIZE = 100000;
#endif
int MAX_ARENA_SIZE = 20000000;
int GC_SAFETY = 100;
@ -1204,10 +1200,8 @@ SCM
load_env (SCM a) ///((internal))
{
r0 =a;
#if 1 //!READER
g_stdin = fopen ("module/mes/read-0.mes", "r");
g_stdin = g_stdin ? g_stdin : fopen (PREFIX "module/mes/read-0.mes", "r");
#endif
if (!g_function) r0 = mes_builtins (r0);
r3 = read_input_file_env (r0);
g_stdin = stdin;

View file

@ -20,14 +20,9 @@
;;; Commentary:
;;; read-0.mes - bootstrap reader from Scheme. Use
;;; ./mes --dump < module/mes/read-0.mes > read-0.mo
;;; to read, garbage collect, and dump this reader; then
;;; ./mes --load < tests/gc-3.test
;;; to use this reader to read and run the minimal gc-3.test
;;; TODO: complete this reader, remove reader from C.
;;; copy of mes/read-0.mes, comment-out read-input-file
;;; read-0.mes - bootstrap reader. This file is read by a minimal
;;; core reader. It only supports s-exps and line-comments; quotes,
;;; character literals, string literals cannot be used here.
;;; Code:
@ -129,8 +124,8 @@
(append2 (cddar clauses) (list (caar clauses)))
(list (cons (quote lambda) (cons (list) (car clauses)))))
(list (cons (quote lambda) (cons (list) (car clauses)))))
(if (pair? (cdr clauses))
(cons (quote cond) (cdr clauses))))))
(if (pair? (cdr clauses))
(cons (quote cond) (cdr clauses))))))
(define (eat-whitespace)
((lambda (c)
@ -141,7 +136,7 @@
((eq? c 12) (read-byte) (eat-whitespace))
((eq? c 13) (read-byte) (eat-whitespace))
((eq? c 59) (begin (read-line-comment (read-byte))
(eat-whitespace)))
(eat-whitespace)))
((eq? c 35) (begin (read-byte)
(cond ((eq? (peek-byte) 33)
(read-byte)
@ -163,10 +158,6 @@
(read-block-comment s (read-byte)))
(read-block-comment s (read-byte))))
;; (define (read-hex c)
;; (if (eq? c 10) c
;; (read-line-comment (read-byte))))
(define (read-line-comment c)
(if (eq? c 10) c
(read-line-comment (read-byte))))
@ -179,70 +170,143 @@
(cons w (read-list a))))
(read-word (read-byte) (list) a))))
;;(define (read-string))
(define-macro (and . x)
(if (null? x) #t
(if (null? (cdr x)) (car x)
(list (quote if) (car x) (cons (quote and) (cdr x))
#f))))
(define-macro (or . x)
(if (null? x) #f
(if (null? (cdr x)) (car x)
(list (quote if) (car x) (car x)
(cons (quote or) (cdr x))))))
(define (not x)
(if x #f #t))
(define (list->symbol lst) (make-symbol lst))
(define (read-character)
(define (read-octal c p n)
(if (not (and (> p 47) (< p 56))) n
(read-octal (read-byte) (peek-byte) (+ (ash n 3) (- p 48)))))
(define (read-name c p n)
(define (lookup-char n)
(cond ((assq n (quote ((*foe* . -1)
(lun . 0)
(mrala . 7)
(ecapskcab . 8)
(bat . 9)
(enilwen . 10)
(batv . 11)
(egap . 12)
(nruter . 13)
(ecaps . 32)))) => cdr)
(#t (display (quote char-not-supported:)) (display n) (newline) (exit 1))))
(if (not (and (> p 96) (< p 123))) (integer->char (lookup-char (list->symbol (cons (integer->char c) n))))
(read-name (read-byte) (peek-byte) (cons (integer->char c) n))))
((lambda (c p)
(cond ((and (> c 47) (< c 56) (> p 47) (< p 56))
(integer->char (read-octal c p (- c 48))))
((and (> c 96) (< c 123) (> p 96) (< p 123)) (read-name c p (list)))
(#t (integer->char c))))
(read-byte) (peek-byte)))
(define (read-hex)
(define (calc c)
(cond ((and (> c 64) (< c 71)) (+ (- c 65) 10))
((and (> c 96) (< c 103)) (+ (- c 97) 10))
((and (> c 47) (< c 58)) (- c 48))
(#t 0)))
(define (read-hex c p n)
(if (not (or (and (> p 64) (< p 71))
(and (> p 96) (< p 103))
(and (> p 47) (< p 58)))) (+ (ash n 4) (calc c))
(read-hex (read-byte) (peek-byte) (+ (ash n 4) (calc c)))))
((lambda (c p)
(read-hex c p 0))
(read-byte) (peek-byte)))
(define (read-string)
(define (append-char s c)
(append2 s (cons (integer->char c) (list))))
(define (read-string c p s)
(cond
((and (eq? c 92) (or (eq? p 92) (eq? p 34)))
((lambda (c)
(read-string (read-byte) (peek-byte) (append-char s c)))
(read-byte)))
((and (eq? c 92) (eq? p 110))
(read-byte)
(read-string (read-byte) (peek-byte) (append-char s 10)))
((eq? c 34) s)
((eq? c -1) (display (quote EOF-in-string)) (newline) (exit 1))
(#t (read-string (read-byte) (peek-byte) (append-char s c)))))
(list->string (read-string (read-byte) (peek-byte) (list))))
(define (lookup-char c a)
(lookup (cons (integer->char c) (list)) a))
(define (read-word c w a)
(cond
((eq? c 32) (if (null? w) (read-word (read-byte) (list) a)
(lookup w a)))
((eq? c 10) (read-word 32 w a))
((eq? c 9) (read-word 32 w a))
((eq? c 12) (read-word 32 w a))
((eq? c 34) (if (null? w) (read-string)
(begin (unread-byte c) (lookup w a))))
((eq? c 35) (cond
((eq? (peek-byte) 33) (begin (read-byte)
(read-block-comment 33 (read-byte))
((eq? c 32) (if (null? w) (read-word (read-byte) (list) a)
(lookup w a)))
((eq? c 10) (read-word 32 w a))
((eq? c 9) (read-word 32 w a))
((eq? c 12) (read-word 32 w a))
((eq? c 34) (if (null? w) (read-string)
(begin (unread-byte c) (lookup w a))))
((eq? c 35) (cond
((eq? (peek-byte) 33) (begin (read-byte)
(read-block-comment 33 (read-byte))
(read-word (read-byte) w a)))
((eq? (peek-byte) 124) (begin (read-byte)
(read-block-comment 124 (read-byte))
(read-word (read-byte) w a)))
((eq? (peek-byte) 124) (begin (read-byte)
(read-block-comment 124 (read-byte))
(read-word (read-byte) w a)))
((eq? (peek-byte) 40) (read-byte) (list->vector (read-list a)))
((eq? (peek-byte) 92) (read-byte) (read-character))
((eq? (peek-byte) 120) (read-byte) (read-hex))
((eq? (peek-byte) 44)
(read-byte)
(cond ((eq? (peek-byte) 64)
(read-byte)
(cons (lookup (symbol->list (quote unsyntax-splicing)) a)
(cons (read-word (read-byte) w a) (list))))
(#t
(cons (lookup (symbol->list (quote unsyntax)) a)
(cons (read-word (read-byte) w a) (list))))))
((eq? (peek-byte) 39) (read-byte)
(cons (lookup (cons (integer->char 35) (cons (integer->char 39) (list))) a)
(cons (read-word (read-byte) w a) (list))))
((eq? (peek-byte) 59) (read-byte)
(read-word (read-byte) w a)
(read-word (read-byte) w a))
((eq? (peek-byte) 96) (read-byte)
(cons (lookup (cons (integer->char 35) (cons (integer->char 96) (list))) a)
(cons (read-word (read-byte) w a) (list))))
(#t (read-word (read-byte) (append2 w (cons (integer->char c) (list))) a))))
((eq? c 39) (if (null? w) (cons (lookup (cons (integer->char c) (list)) a)
(cons (read-word (read-byte) w a) (list)))
(begin (unread-byte c) (lookup w a))))
((eq? c 40) (if (null? w) (read-list a)
(begin (unread-byte c) (lookup w a))))
((eq? c 41) (if (null? w) (cons (lookup (cons (integer->char c) (list)) a)
(cons (read-word (read-byte) w a) (list)))
(begin (unread-byte c) (lookup w a))))
((eq? c 44) (cond
((eq? (peek-byte) 64) (begin (read-byte)
(cons
(lookup (symbol->list (quote unquote-splicing)) a)
(cons (read-word (read-byte) w a) (list)))))
(#t (cons (lookup-char c a) (cons (read-word (read-byte) w a)
(list))))))
((eq? c 96) (cons (lookup-char c a) (cons (read-word (read-byte) w a) (list))))
((eq? c 59) (read-line-comment c) (read-word 10 w a))
((eq? c -1) (list))
(#t (read-word (read-byte) (append2 w (cons (integer->char c) (list))) a))))
((eq? (peek-byte) 40) (read-byte) (list->vector (read-list a)))
((eq? (peek-byte) 92) (read-byte) (read-character))
((eq? (peek-byte) 120) (read-byte) (read-hex))
((eq? (peek-byte) 44)
(read-byte)
(cond ((eq? (peek-byte) 64)
(read-byte)
(cons (lookup (symbol->list (quote unsyntax-splicing)) a)
(cons (read-word (read-byte) w a) (list))))
(#t
(cons (lookup (symbol->list (quote unsyntax)) a)
(cons (read-word (read-byte) w a) (list))))))
((eq? (peek-byte) 39) (read-byte)
(cons (lookup (cons (integer->char 35) (cons (integer->char 39) (list))) a)
(cons (read-word (read-byte) w a) (list))))
((eq? (peek-byte) 59) (read-byte)
(read-word (read-byte) w a)
(read-word (read-byte) w a))
((eq? (peek-byte) 96) (read-byte)
(cons (lookup (cons (integer->char 35) (cons (integer->char 96) (list))) a)
(cons (read-word (read-byte) w a) (list))))
(#t (read-word (read-byte) (append2 w (cons (integer->char c) (list))) a))))
((eq? c 39) (if (null? w) (cons (lookup (cons (integer->char c) (list)) a)
(cons (read-word (read-byte) w a) (list)))
(begin (unread-byte c) (lookup w a))))
((eq? c 40) (if (null? w) (read-list a)
(begin (unread-byte c) (lookup w a))))
((eq? c 41) (if (null? w) (cons (lookup (cons (integer->char c) (list)) a)
(cons (read-word (read-byte) w a) (list)))
(begin (unread-byte c) (lookup w a))))
((eq? c 44) (cond
((eq? (peek-byte) 64) (begin (read-byte)
(cons
(lookup (symbol->list (quote unquote-splicing)) a)
(cons (read-word (read-byte) w a) (list)))))
(#t (cons (lookup-char c a) (cons (read-word (read-byte) w a)
(list))))))
((eq? c 96) (cons (lookup-char c a) (cons (read-word (read-byte) w a) (list))))
((eq? c 59) (read-line-comment c) (read-word 10 w a))
((eq? c -1) (list))
(#t (read-word (read-byte) (append2 w (cons (integer->char c) (list))) a))))
((lambda (p)
;;(display (quote scheme-program=)) (display p) (newline)
(begin-env p (current-module)))
(read-input-file)))

118
reader.c
View file

@ -36,13 +36,6 @@ unread_char (SCM c)
return ungetchar (VALUE (c));
}
int
read_block_comment (int s, int c)
{
if (c == s && peekchar () == '#') return getchar ();
return read_block_comment (s, getchar ());
}
int
read_line_comment (int c)
{
@ -68,125 +61,14 @@ read_word (int c, SCM w, SCM a)
if (c == ')' && w == cell_nil) {ungetchar (c); return cell_nil;}
if (c == ')') {ungetchar (c); return lookup (w, a);}
if (c == ';') {read_line_comment (c); return read_word ('\n', w, a);}
#if READER
if (c == '"' && w == cell_nil) return read_string ();
if (c == '"') {ungetchar (c); return lookup (w, a);}
if (c == ',' && peekchar () == '@') {getchar (); return cons (lookup (STRING (cell_symbol_unquote_splicing), a),
cons (read_word (getchar (), w, a),
cell_nil));}
if ((c == '\''
|| c == '`'
|| c == ',')
&& w == cell_nil) {return cons (lookup_char (c, a),
cons (read_word (getchar (), w, a),
cell_nil));}
if (c == '#' && peekchar () == ',' && w == cell_nil) {
getchar ();
if (peekchar () == '@'){getchar (); return cons (lookup (STRING (cell_symbol_unsyntax_splicing), a),
cons (read_word (getchar (), w, a),
cell_nil));}
return cons (lookup (STRING (cell_symbol_unsyntax), a), cons (read_word (getchar (), w, a), cell_nil));
}
if (c == '#' && (peekchar () == '\'' || peekchar () == '`') && w == cell_nil) {
c = getchar ();
return cons (lookup (cons (make_char ('#'), cons (make_char (c), cell_nil)), a),
cons (read_word (getchar (), w, a), cell_nil));}
if (c == '#' && peekchar () == 'x') {getchar (); return read_hex ();}
if (c == '#' && peekchar () == '\\') {getchar (); return read_character ();}
if (c == '#' && w == cell_nil && peekchar () == '(') {getchar (); return list_to_vector (read_list (a));}
if (c == '#' && peekchar () == ';') {getchar (); read_word (getchar (), w, a); return read_word (getchar (), w, a);}
if (c == '#' && (peekchar () == '!' || peekchar () == '|')) {c = getchar (); read_block_comment (c, getchar ()); return read_word (getchar (), w, a);}
#endif //READER
return read_word (getchar (), append2 (w, cons (make_char (c), cell_nil)), a);
}
SCM
read_character ()
{
int c = getchar ();
if (c >= '0' && c <= '7'
&& peekchar () >= '0' && peekchar () <= '7') {
c = c - '0';
while (peekchar () >= '0' && peekchar () <= '7') {
c <<= 3;
c += getchar () - '0';
}
}
else if (c >= 'a' && c <= 'z'
&& peekchar () >= 'a' && peekchar () <= 'z') {
char buf[10];
char *p = buf;
*p++ = c;
while (peekchar () >= 'a' && peekchar () <= 'z') {
*p++ = getchar ();
}
*p = 0;
if (!strcmp (buf, char_nul.name)) c = char_nul.value;
else if (!strcmp (buf, char_alarm.name)) c = char_alarm.value;
else if (!strcmp (buf, char_backspace.name)) c = char_backspace.value;
else if (!strcmp (buf, char_tab.name)) c = char_tab.value;
else if (!strcmp (buf, char_newline.name)) c = char_newline.value;
else if (!strcmp (buf, char_vtab.name)) c = char_vtab.value;
else if (!strcmp (buf, char_page.name)) c = char_page.value;
else if (!strcmp (buf, char_return.name)) c = char_return.value;
else if (!strcmp (buf, char_space.name)) c = char_space.value;
else {
fprintf (stderr, "char not supported: %s\n", buf);
assert (!"char not supported");
}
}
return make_char (c);
}
SCM
read_hex ()
{
int n = 0;
int c = peekchar ();
while ((c >= '0' && c <= '9')
|| (c >= 'A' && c <= 'F')
|| (c >= 'a' && c <= 'f')) {
n <<= 4;
if (c >= 'a') n += c - 'a' + 10;
else if (c >= 'A') n += c - 'A' + 10;
else n+= c - '0';
getchar ();
c = peekchar ();
}
return make_number (n);
}
SCM
append_char (SCM x, int i)
{
return append2 (x, cons (make_char (i), cell_nil));
}
SCM
read_string ()
{
SCM p = cell_nil;
int c = getchar ();
while (true) {
if (c == '"') break;
if (c == '\\' && peekchar () == '\\') p = append_char (p, getchar ());
else if (c == '\\' && peekchar () == '"') p = append_char (p, getchar ());
else if (c == '\\' && peekchar () == 'n') {getchar (); p = append_char (p, '\n');}
else if (c == EOF) assert (!"EOF in string");
else p = append_char (p, c);
c = getchar ();
}
return make_string (p);
}
int
eat_whitespace (int c)
{
while (c == ' ' || c == '\t' || c == '\n' || c == '\f') c = getchar ();
if (c == ';') return eat_whitespace (read_line_comment (c));
#if READER
if (c == '#' && (peekchar () == '!' || peek_char () == '|')) {c=getchar (); read_block_comment (c, getchar ()); return eat_whitespace (getchar ());}
#endif
return c;
}

View file

@ -74,7 +74,7 @@ exit $?
(pass-if "string-ref" (seq? (string-ref "hello world" 4) #\o))
(pass-if "eq?" (not (eq? (string-append "a" "b" "c") "abc")))
(pass-if "string-length" (seq? (string-length (string-append "a" "b" "c")) 3))
(pass-if "string->list" (sequal? (string->list "abc\n") '(#\a #\b #\c #\newline)))
(pass-if-equal "string->list" '(#\a #\b #\c #\newline) (string->list "abc\n"))
(pass-if "char" (seq? (char->integer #\A) 65))
(pass-if "char 2" (seq? (char->integer #\101) (char->integer #\A)))
(pass-if "char 3" (seq? (integer->char 10) #\newline))