mes: resurrect full reader in C core.

* module/mes/read-0.mes (defined?): New function.
  (eat-whitespace, read-env, read-word, read-block-comment,
  read-line-comment, read-list, read-character, read-hex, read-octal,
  reader:read-string, lookup, read-hash, read-word): Only define if
  not %c-reader.
* module/mes/base-0.mes (defined?): Remove.
* src/mes.c[MES_C_READER]: Set ARENA_SIZE=10000000.
  (scm_symbol_quasiquote scm_symbol_unquote,
  scm_symbol_unquote_splicing, scm_symbol_syntax,
  scm_symbol_quasisyntax, scm_symbol_unsyntax,
  scm_symbol_unsyntax_splicing): New symbol.
  (scm_symbol_c_reader): New symbol.
  (MAKE_KEYWORD)[MES_C_READER]: New define.
  (mes_symbols): Define %c_reader.
* src/reader.c (read_word_)[MES_C_READER]: Extend to full Scheme
  reader.
  (eat_whitespace)[MES_C_READER]: Likewise.
  (read_block_comment, read_hash, read_word, read_character,
  read_octal, read_hex, append_char, read_string)[MES_C_READER]:
  Likewise.
* make.scm (bin.gcc,bin.mescc): Define MES_C_READER=1.
This commit is contained in:
Jan Nieuwenhuizen 2017-11-29 21:42:50 +01:00
parent 74c4197467
commit c3fdfedb20
6 changed files with 452 additions and 222 deletions

View file

@ -413,7 +413,8 @@ exec ${GUILE-guile} --no-auto-compile -L . -L guile -C . -C guile -s "$0" ${1+"$
(add-target (snarf "src/vector.c" #:mes? #t))))
(add-target (bin.gcc "src/mes.c" #:dependencies gcc-snarf-targets
#:defines `("FIXED_PRIMITIVES=1"
#:defines `("MES_C_READER=1"
"MES_FIXED_PRIMITIVES=1"
"MES_FULL=1"
"POSIX=1"
,(string-append "VERSION=\"" %version "\"")
@ -423,7 +424,8 @@ exec ${GUILE-guile} --no-auto-compile -L . -L guile -C . -C guile -s "$0" ${1+"$
(add-target (bin.gcc "src/mes.c" #:libc libc-gcc.mlibc-o
#:dependencies mes-snarf-targets
#:defines `("FIXED_PRIMITIVES=1"
#:defines `("MES_C_READER=1"
"MES_FIXED_PRIMITIVES=1"
"MES_FULL=1"
,(string-append "VERSION=\"" %version "\"")
,(string-append "MODULEDIR=\"" (string-append %prefix (if (string-null? %prefix) "" "/") "/" %moduledir "/") "\"")
@ -431,7 +433,8 @@ exec ${GUILE-guile} --no-auto-compile -L . -L guile -C . -C guile -s "$0" ${1+"$
#:includes '("src")))
(add-target (bin.mescc "src/mes.c" #:dependencies mes-snarf-targets
#:defines `("FIXED_PRIMITIVES=1"
#:defines `("MES_C_READER=1"
"MES_FIXED_PRIMITIVES=1"
"MES_FULL=1"
,(string-append "VERSION=\"" %version "\"")
,(string-append "MODULEDIR=\"" (string-append %prefix (if (string-null? %prefix) "" "/") %moduledir "/") "\"")

View file

@ -34,9 +34,6 @@
(define (primitive-eval e) (core:eval e (current-module)))
(define eval core:eval)
(define-macro (defined? x)
(list 'assq x '(cdr (cdr (current-module)))))
(if (defined? 'current-input-port) #t
(define (current-input-port) 0))

View file

@ -125,17 +125,8 @@
(define (symbol->keyword s)
(core:make-cell <cell:keyword> (symbol->list s) 0))
(define (read)
(read-word (read-byte) (list) (current-module)))
(define (read-env a)
(read-word (read-byte) (list) a))
(define (read-input-file)
(define (helper x)
(if (null? x) x
(cons x (helper (read)))))
(helper (read)))
(define-macro (defined? x)
(list (quote assq) x (quote (cdr (cdr (current-module))))))
(define-macro (cond . clauses)
(list (quote if) (pair? clauses)
@ -148,6 +139,44 @@
(if (pair? (cdr clauses))
(cons (quote cond) (cdr clauses))))))
(define-macro (and . x)
(if (null? x) #t
(if (null? (cdr x)) (car x)
(list (quote if) (car x) (cons (quote and) (cdr x))
#f))))
(define-macro (or . x)
(if (null? x) #f
(if (null? (cdr x)) (car x)
(list (list (quote lambda) (list (quote r))
(list (quote if) (quote r) (quote r)
(cons (quote or) (cdr x))))
(car x)))))
(define (not x)
(if x #f #t))
(define (map1 f lst)
(if (null? lst) (list)
(cons (f (car lst)) (map1 f (cdr lst)))))
(define (read)
(read-word (read-byte) (list) (current-module)))
(define (read-input-file)
(core:read-input-file-env (read-env (current-module)) (current-module)))
(if (not %c-reader)
(begin
(define (read-env a)
(read-word (read-byte) (list) a))
(define (read-input-file)
(define (helper x)
(if (null? x) x
(cons x (helper (read)))))
(helper (read)))
(define (eat-whitespace c)
(cond
((eq? c 32) (eat-whitespace (read-byte)))
@ -189,23 +218,6 @@
(cons w (read-list a))))
(read-word (read-byte) (list) a))))
(define-macro (and . x)
(if (null? x) #t
(if (null? (cdr x)) (car x)
(list (quote if) (car x) (cons (quote and) (cdr x))
#f))))
(define-macro (or . x)
(if (null? x) #f
(if (null? (cdr x)) (car x)
(list (list (quote lambda) (list (quote r))
(list (quote if) (quote r) (quote r)
(cons (quote or) (cdr x))))
(car x)))))
(define (not x)
(if x #f #t))
(define (read-character)
(define (read-octal c p n)
(if (not (and (> p 47) (< p 56))) n
@ -281,10 +293,6 @@
(#t (reader:read-string (read-byte) (peek-byte) (append-char s c)))))
(list->string (reader:read-string (read-byte) (peek-byte) (list))))
(define (map1 f lst)
(if (null? lst) (list)
(cons (f (car lst)) (map1 f (cdr lst)))))
(define (lookup w a)
(define (lookup-number c p s n)
(and (> c 47) (< c 58)
@ -354,7 +362,7 @@
((eq? c 9) (read-word 32 w a))
((eq? c 12) (read-word 32 w a))
((eq? c -1) (list))
(#t (read-word (read-byte) (append2 w (cons c (list))) a))))
(#t (read-word (read-byte) (append2 w (cons c (list))) a))))))
((lambda (p)
(core:eval (cons (quote begin) p) (current-module)))

View file

@ -86,15 +86,17 @@
(or (and (number? x) (= x -1))
(and (char? x) (eof-object? (char->integer x)))))
(if (not (defined? 'peek-char))
(define (peek-char)
(integer->char (peek-byte)))
(integer->char (peek-byte))))
(if (not (defined? 'read-char))
(define (read-char)
(integer->char (read-byte)))
(integer->char (read-byte))))
(if (not (defined? 'unread-char))
(define (unread-char c)
(unread-byte (char->integer c))
c)
(unread-byte (char->integer c))))
(define (assq-set! alist key val)
(let ((entry (assq key alist)))

View file

@ -24,8 +24,13 @@
#include <string.h>
#include <mlibc.h>
#if MES_C_READER
int ARENA_SIZE = 10000000;
#else
int ARENA_SIZE = 100000;
#endif
int MAX_ARENA_SIZE = 20000000;
//int GC_SAFETY_DIV = 400;
//int GC_SAFETY = ARENA_SIZE / 400;
int GC_SAFETY = 250;
@ -126,6 +131,19 @@ struct scm scm_symbol_lambda = {TSYMBOL, "lambda",0};
struct scm scm_symbol_begin = {TSYMBOL, "begin",0};
struct scm scm_symbol_if = {TSYMBOL, "if",0};
struct scm scm_symbol_quote = {TSYMBOL, "quote",0};
#if 1
//MES_C_READER
//Only for MES_C_READER; snarfing makes these always needed for linking
struct scm scm_symbol_quasiquote = {TSYMBOL, "quasiquote", 0};
struct scm scm_symbol_unquote = {TSYMBOL, "unquote", 0};
struct scm scm_symbol_unquote_splicing = {TSYMBOL, "unquote-splicing", 0};
struct scm scm_symbol_syntax = {TSYMBOL, "syntax",0};
struct scm scm_symbol_quasisyntax = {TSYMBOL, "quasisyntax", 0};
struct scm scm_symbol_unsyntax = {TSYMBOL, "unsyntax", 0};
struct scm scm_symbol_unsyntax_splicing = {TSYMBOL, "unsyntax-splicing", 0};
#endif // MES_C_READER
struct scm scm_symbol_set_x = {TSYMBOL, "set!",0};
struct scm scm_symbol_sc_expand = {TSYMBOL, "sc-expand",0};
@ -165,7 +183,7 @@ struct scm scm_vm_apply = {TSPECIAL, "core:apply",0};
struct scm scm_vm_apply2 = {TSPECIAL, "*vm-apply2*",0};
struct scm scm_vm_eval = {TSPECIAL, "core:eval",0};
//FIXED_PRIMITIVES
//MES_FIXED_PRIMITIVES
struct scm scm_vm_eval_car = {TSPECIAL, "*vm-eval-car*",0};
struct scm scm_vm_eval_cdr = {TSPECIAL, "*vm-eval-cdr*",0};
struct scm scm_vm_eval_cons = {TSPECIAL, "*vm-eval-cons*",0};
@ -187,6 +205,7 @@ struct scm scm_vm_return = {TSPECIAL, "*vm-return*",0};
struct scm scm_symbol_gnuc = {TSYMBOL, "%gnuc",0};
struct scm scm_symbol_mesc = {TSYMBOL, "%mesc",0};
struct scm scm_symbol_c_reader = {TSYMBOL, "%c-reader",0};
struct scm scm_test = {TSYMBOL, "test",0};
@ -271,6 +290,9 @@ int g_function = 0;
#define MAKE_NUMBER(n) make_cell_ (tmp_num_ (TNUMBER), 0, tmp_num2_ (n))
#define MAKE_REF(n) make_cell_ (tmp_num_ (TREF), n, 0)
#define MAKE_STRING(x) make_cell_ (tmp_num_ (TSTRING), x, 0)
#if MES_C_READER
#define MAKE_KEYWORD(x) make_cell_ (tmp_num_ (TKEYWORD), x, 0)
#endif
#define CAAR(x) CAR (CAR (x))
#define CADR(x) CAR (CDR (x))
@ -717,7 +739,7 @@ eval_apply ()
case cell_vm_apply: goto apply;
case cell_vm_apply2: goto apply2;
case cell_vm_eval: goto eval;
#if FIXED_PRIMITIVES
#if MES_FIXED_PRIMITIVES
case cell_vm_eval_car: goto eval_car;
case cell_vm_eval_cdr: goto eval_cdr;
case cell_vm_eval_cons: goto eval_cons;
@ -851,7 +873,7 @@ eval_apply ()
{
switch (CAR (r1))
{
#if FIXED_PRIMITIVES
#if MES_FIXED_PRIMITIVES
case cell_symbol_car:
{
push_cc (CADR (r1), r1, r0, cell_vm_eval_car); goto eval;
@ -879,7 +901,7 @@ eval_apply ()
eval_null_p:
x = r1; gc_pop_frame (); r1 = null_p (x); goto eval_apply;
}
#endif // FIXED_PRIMITIVES
#endif // MES_FIXED_PRIMITIVES
case cell_symbol_quote:
{
x = r1; gc_pop_frame (); r1 = CADR (x); goto eval_apply;
@ -1161,6 +1183,12 @@ mes_symbols () ///((internal))
a = acons (cell_symbol_mesc, cell_t, a);
#endif
#if MES_C_READER
a = acons (cell_symbol_c_reader, cell_t, a);
#else
a = acons (cell_symbol_c_reader, cell_f, a);
#endif
a = acons (cell_closure, a, a);
return a;

View file

@ -44,21 +44,40 @@ read_line_comment (int c)
}
SCM
read_word (int c, SCM w, SCM a)
read_word_ (int c, SCM w, SCM a)
{
if (c == EOF && w == cell_nil) return cell_nil;
if (c == '\t') return read_word ('\n', w, a);
if (c == '\f') return read_word ('\n', w, a);
if (c == '\n' && w == cell_nil) return read_word (getchar (), w, a);
if (c == '\t') return read_word_ ('\n', w, a);
if (c == '\f') return read_word_ ('\n', w, a);
if (c == '\n' && w == cell_nil) return read_word_ (getchar (), w, a);
if (c == '\n' && VALUE (car (w)) == '.' && cdr (w) == cell_nil) return cell_dot;
if (c == ' ') return read_word_ ('\n', w, a);
if (c == EOF || c == '\n') return lookup_ (w, a);
if (c == ' ') return read_word ('\n', w, a);
if (c == '(' && w == cell_nil) return read_list (a);
if (c == '(') {ungetchar (c); return lookup_ (w, a);}
if (c == ')' && w == cell_nil) {ungetchar (c); return cell_nil;}
if (c == ')') {ungetchar (c); return lookup_ (w, a);}
if (c == ';') {read_line_comment (c); return read_word ('\n', w, a);}
return read_word (getchar (), append2 (w, cons (MAKE_CHAR (c), cell_nil)), a);
if (c == ';') {read_line_comment (c); return read_word_ ('\n', w, a);}
#if MES_C_READER
if (c == '"' && w == cell_nil) return read_string ();
if (c == '"') {ungetchar (c); return lookup_ (w, a);}
if (c == ',' && peekchar () == '@') {getchar (); return cons (cell_symbol_unquote_splicing,
cons (read_word_ (getchar (), w, a),
cell_nil));}
if (c == '\'') return cons (cell_symbol_quote, cons (read_word_ (getchar (), w, a), cell_nil));
if (c == '`') return cons (cell_symbol_quasiquote, cons (read_word_ (getchar (), w, a), cell_nil));
if (c == ',') return cons (cell_symbol_unquote, cons (read_word_ (getchar (), w, a), cell_nil));
if (c == '#' && peekchar () == '!') {c = getchar (); read_block_comment (c, getchar ()); return read_word_ (getchar (), w, a);}
if (c == '#' && peekchar () == '|') {c = getchar (); read_block_comment (c, getchar ()); return read_word_ (getchar (), w, a);}
if (c == '#' && peekchar () == 'f') return read_word_ (getchar (), append2 (w, cons (MAKE_CHAR (c), cell_nil)), a);
if (c == '#' && peekchar () == 't') return read_word_ (getchar (), append2 (w, cons (MAKE_CHAR (c), cell_nil)), a);
if (c == '#') return read_hash (getchar (), w, a);
#endif //MES_C_READER
return read_word_ (getchar (), append2 (w, cons (MAKE_CHAR (c), cell_nil)), a);
}
int
@ -66,6 +85,9 @@ eat_whitespace (int c)
{
while (c == ' ' || c == '\t' || c == '\n' || c == '\f') c = getchar ();
if (c == ';') return eat_whitespace (read_line_comment (c));
#if MES_C_READER
if (c == '#' && (peekchar () == '!' || peekchar () == '|')) {c=getchar (); read_block_comment (c, getchar ()); return eat_whitespace (getchar ());}
#endif
return c;
}
@ -75,7 +97,7 @@ read_list (SCM a)
int c = getchar ();
c = eat_whitespace (c);
if (c == ')') return cell_nil;
SCM w = read_word (c, cell_nil, a);
SCM w = read_word_ (c, cell_nil, a);
if (w == cell_dot)
return car (read_list (a));
return cons (w, read_list (a));
@ -84,7 +106,7 @@ read_list (SCM a)
SCM
read_env (SCM a)
{
return read_word (getchar (), cell_nil, a);
return read_word_ (getchar (), cell_nil, a);
}
SCM
@ -109,6 +131,176 @@ lookup_ (SCM s, SCM a)
return lookup_symbol_ (s);
}
#if MES_C_READER
SCM
read_block_comment (int s, int c)
{
if (c == s && peekchar () == '#') return getchar ();
return read_block_comment (s, getchar ());
}
SCM
read_hash (int c, SCM w, SCM a)
{
if (c == ',')
{
if (peekchar () == '@')
{
getchar ();
return cons (cell_symbol_unsyntax_splicing, cons (read_word_ (getchar (), w, a), cell_nil));
}
return cons (cell_symbol_unsyntax, cons (read_word_ (getchar (), w, a), cell_nil));
}
if (c == '\'') return cons (cell_symbol_syntax, cons (read_word_ (getchar (), w, a), cell_nil));
if (c == '`') return cons (cell_symbol_quasisyntax, cons (read_word_ (getchar (), w, a), cell_nil));
if (c == ':') return MAKE_KEYWORD (CAR (read_word_ (getchar (), cell_nil, a)));
if (c == 'o') return read_octal ();
if (c == 'x') return read_hex ();
if (c == '\\') return read_character ();
if (c == '(') return list_to_vector (read_list (a));
if (c == ';') read_word_ (getchar (), w, a); return read_word_ (getchar (), w, a);
if (c == '!') {read_block_comment (c, getchar ()); return read_word_ (getchar (), w, a);}
if (c == '|') {read_block_comment (c, getchar ()); return read_word_ (getchar (), w, a);}
if (c == 'f') return cell_f;
if (c == 't') return cell_t;
return read_word_ (getchar (), append2 (w, cons (MAKE_CHAR (c), cell_nil)), a);
}
SCM
read_word (SCM c, SCM w, SCM a)
{
return read_word_ (VALUE (c), w, a);
}
SCM
read_character ()
{
int c = getchar ();
if (c >= '0' && c <= '7'
&& peekchar () >= '0' && peekchar () <= '7')
{
c = c - '0';
while (peekchar () >= '0' && peekchar () <= '7')
{
c <<= 3;
c += getchar () - '0';
}
}
else if (((c >= 'a' && c <= 'z')
|| c == '*')
&& ((peekchar () >= 'a' && peekchar () <= 'z')
|| peekchar () == '*'))
{
char buf[10];
char *p = buf;
*p++ = c;
while ((peekchar () >= 'a' && peekchar () <= 'z')
|| peekchar () == '*')
{
*p++ = getchar ();
}
*p = 0;
if (!strcmp (buf, "*eof*")) c = EOF;
else if (!strcmp (buf, "nul")) c = '\0';
else if (!strcmp (buf, "alarm")) c = '\a';
else if (!strcmp (buf, "backspace")) c = '\b';
else if (!strcmp (buf, "tab")) c = '\t';
else if (!strcmp (buf, "newline")) c = '\n';
else if (!strcmp (buf, "vtab")) c = '\v';
else if (!strcmp (buf, "page")) c = '\f';
#if __MESC__
//Nyacc bug
else if (!strcmp (buf, "return")) c = 13;
else if (!strcmp (buf, "cr")) c = 13;
#else
else if (!strcmp (buf, "return")) c = '\r';
else if (!strcmp (buf, "cr")) c = '\r';
#endif
else if (!strcmp (buf, "space")) c = ' ';
else
{
eputs ("char not supported: ");
eputs (buf);
eputs ("\n");
#if !__MESC__
assert (!"char not supported");
#endif
}
}
return MAKE_CHAR (c);
}
SCM
read_octal ()
{
int n = 0;
int c = peekchar ();
int s = 1;
if (c == '-') {s = -1;getchar (); c = peekchar ();}
while (c >= '0' && c <= '7')
{
n <<= 3;
n+= c - '0';
getchar ();
c = peekchar ();
}
return MAKE_NUMBER (s*n);
}
SCM
read_hex ()
{
int n = 0;
int c = peekchar ();
int s = 1;
if (c == '-') {s = -1;getchar (); c = peekchar ();}
while ((c >= '0' && c <= '9')
|| (c >= 'A' && c <= 'F')
|| (c >= 'a' && c <= 'f'))
{
n <<= 4;
if (c >= 'a') n += c - 'a' + 10;
else if (c >= 'A') n += c - 'A' + 10;
else n+= c - '0';
getchar ();
c = peekchar ();
}
return MAKE_NUMBER (s*n);
}
SCM
append_char (SCM x, int i)
{
return append2 (x, cons (MAKE_CHAR (i), cell_nil));
}
SCM
read_string ()
{
SCM p = cell_nil;
int c = getchar ();
while (1) {
if (c == '"') break;
if (c == '\\' && peekchar () == '\\') p = append_char (p, getchar ());
else if (c == '\\' && peekchar () == '"') p = append_char (p, getchar ());
else if (c == '\\' && peekchar () == 'n') {getchar (); p = append_char (p, '\n');}
#if !__MESC__
else if (c == EOF) assert (!"EOF in string");
#endif
else p = append_char (p, c);
c = getchar ();
}
return MAKE_STRING (p);
}
#else // !MES_C_READER
SCM read_word (SCM c,SCM w,SCM a) {}
SCM read_character () {}
SCM read_octal () {}
SCM read_hex () {}
SCM read_string () {}
#endif // MES_C_READER
int g_tiny = 0;
int