Implement strings and symbols as list of characters [WAS: c-string].

* mes.c (scm_t): Add string field.
  (make_string, internal_lookup_symbol, internal_make_symbol,
  make_symbol, lookup, readword): Take scm*.  Update callers.
  (display_helper): Support string field.
  (append_char): New function.
  (readstring): Use it.  Produce scm*.
  (cstring_to_list): New function.
  (add_environment, internal_make_symbol): Use it.
  (list_of_char_equal_p): New function.
  (internal_lookup_symbol): Use it.
* lib.c (list_ref): New function.
* string.c (string_ref): Use it.
  (string, string_append, string_length, substring, number_to_string,
  string_to_symbol, symbol_to_string): Update to list-of-characters
  implementation.
This commit is contained in:
Jan Nieuwenhuizen 2016-10-25 16:50:19 +02:00
parent 266c66e40c
commit 16f678a158
3 changed files with 161 additions and 106 deletions

10
lib.c
View file

@ -56,6 +56,16 @@ list (scm *x) ///((args . n))
return x; return x;
} }
scm *
list_ref (scm *x, scm *k)
{
assert (x->type == PAIR);
assert (k->type == NUMBER);
int n = k->value;
while (n-- && x->cdr != &scm_nil) x = x->cdr;
return x != &scm_nil ? x->car : &scm_undefined;
}
scm * scm *
vector_to_list (scm *v) vector_to_list (scm *v)
{ {

192
mes.c
View file

@ -18,7 +18,6 @@
* along with Mes. If not, see <http://www.gnu.org/licenses/>. * along with Mes. If not, see <http://www.gnu.org/licenses/>.
*/ */
#define STRING_MAX 2048
#define _GNU_SOURCE #define _GNU_SOURCE
#include <assert.h> #include <assert.h>
#include <ctype.h> #include <ctype.h>
@ -44,6 +43,7 @@ typedef struct scm_t {
enum type type; enum type type;
union { union {
char const *name; char const *name;
struct scm_t* string;
struct scm_t* car; struct scm_t* car;
struct scm_t* ref; struct scm_t* ref;
int length; int length;
@ -376,7 +376,9 @@ builtin_eval (scm *e, scm *a)
if (e->type == SYMBOL) { if (e->type == SYMBOL) {
scm *y = assq_ref_cache (e, a); scm *y = assq_ref_cache (e, a);
if (y == &scm_undefined) { if (y == &scm_undefined) {
fprintf (stderr, "eval: unbound variable: %s\n", e->name); fprintf (stderr, "eval: unbound variable:");
display_ (stderr, e);
fprintf (stderr, "\n");
assert (!"unbound variable"); assert (!"unbound variable");
} }
return y; return y;
@ -404,9 +406,12 @@ builtin_eval (scm *e, scm *a)
return define (e, a); return define (e, a);
#else #else
if (e->car == &symbol_define) { if (e->car == &symbol_define) {
fprintf (stderr, "C DEFINE: %s\n", e->cdr->car->type == SYMBOL fprintf (stderr, "C DEFINE: ");
? e->cdr->car->name display_ (stderr,
: e->cdr->car->car->name); e->cdr->car->type == SYMBOL
? e->cdr->car->string
: e->cdr->car->car->string);
fprintf (stderr, "\n");
} }
assert (e->car != &symbol_define); assert (e->car != &symbol_define);
assert (e->car != &symbol_define_macro); assert (e->car != &symbol_define_macro);
@ -525,7 +530,7 @@ make_macro (scm *name, scm *x)
scm *p = alloc (1); scm *p = alloc (1);
p->type = MACRO; p->type = MACRO;
p->macro = x; p->macro = x;
p->name = name->name; p->string = name->string;
return p; return p;
} }
@ -548,38 +553,68 @@ make_ref (scm *x)
} }
scm * scm *
make_string (char const *s) make_string (scm *x)
{ {
scm *p = alloc (1); scm *p = alloc (1);
p->type = STRING; p->type = STRING;
p->name = strdup (s); p->string = x;
return p;
}
scm *
cstring_to_list (char const* s)
{
scm *p = &scm_nil;
while (s && *s)
p = append2 (p, cons (make_char (*s++), &scm_nil));
return p; return p;
} }
scm *symbols = 0; scm *symbols = 0;
scm * scm *
internal_lookup_symbol (char const *s) list_of_char_equal_p (scm *a, scm *b)
{
while (a != &scm_nil && b != &scm_nil && a->car->value == b->car->value) {
assert (a->car->type == CHAR);
assert (b->car->type == CHAR);
a = a->cdr;
b = b->cdr;
}
return (a == &scm_nil && b == &scm_nil) ? &scm_t : &scm_f;
}
scm *
internal_lookup_symbol (scm *s)
{ {
scm *x = symbols; scm *x = symbols;
while (x && strcmp (s, x->car->name)) x = x->cdr; while (x) {
// FIXME: .string and .name is the same field; .name is used as a
// handy static field initializer. A string can only be mistaken
// for a cell with type == PAIR for the one character long,
// zero-padded #\etx.
if (x->car->string->type != PAIR)
x->car->string = cstring_to_list (x->car->name);
if (list_of_char_equal_p (x->car->string, s) == &scm_t) break;
x = x->cdr;
}
if (x) x = x->car; if (x) x = x->car;
return x; return x;
} }
scm * scm *
internal_make_symbol (char const *s) internal_make_symbol (scm *s)
{ {
scm *x = alloc (1); scm *x = alloc (1);
x->type = SYMBOL; x->type = SYMBOL;
x->name = strdup (s); x->string = s;
x->value = 0; x->value = 0;
symbols = cons (x, symbols); symbols = cons (x, symbols);
return x; return x;
} }
scm * scm *
make_symbol (char const *s) make_symbol (scm *s)
{ {
scm *x = internal_lookup_symbol (s); scm *x = internal_lookup_symbol (s);
return x ? x : internal_make_symbol (s); return x ? x : internal_make_symbol (s);
@ -648,28 +683,44 @@ vector_set_x (scm *x, scm *i, scm *e)
} }
scm * scm *
lookup (char const *s, scm *a) lookup (scm *s, scm *a)
{ {
if (isdigit (*s) || (*s == '-' && isdigit (*(s+1)))) if (isdigit (s->car->value) || (s->car->value == '-' && s->cdr != &scm_nil)) {
return make_number (atoi (s)); scm *p = s;
int sign = 1;
scm *x; if (s->car->value == '-') {
x = internal_lookup_symbol (s); sign = -1;
p = s->cdr;
}
int n = 0;
while (p != &scm_nil && isdigit (p->car->value)) {
n *= 10;
n += p->car->value - '0';
p = p->cdr;
}
if (p == &scm_nil) return make_number (n * sign);
}
scm *x = internal_lookup_symbol (s);
if (x) return x; if (x) return x;
if (*s == '\'') return &symbol_quote; if (s->cdr == &scm_nil) {
if (*s == '`') return &symbol_quasiquote; if (s->car->value == '\'') return &symbol_quote;
if (*s == ',' && *(s+1) == '@') return &symbol_unquote_splicing; if (s->car->value == '`') return &symbol_quasiquote;
if (*s == ',') return &symbol_unquote; if (s->car->value == ',') return &symbol_unquote;
}
if (*s == '#' && *(s+1) == '\'') return &symbol_syntax; else if (s->cdr->cdr == &scm_nil) {
if (*s == '#' && *(s+1) == '`') return &symbol_quasisyntax; if (s->car->value == ',' && s->cdr->car->value == '@') return &symbol_unquote_splicing;
if (*s == '#' && *(s+1) == ',' && *(s+2) == '@') return &symbol_unsyntax_splicing; if (s->car->value == '#' && s->cdr->car->value == '\'') return &symbol_syntax;
if (*s == '#' && *(s+1) == ',') return &symbol_unsyntax; if (s->car->value == '#' && s->cdr->car->value == '`') return &symbol_quasisyntax;
if (s->car->value == '#' && s->cdr->car->value == ',') return &symbol_unsyntax;
if (!strcmp (s, "EOF")) { }
fprintf (stderr, "mes: got EOF\n"); else if (s->cdr->cdr->cdr == &scm_nil) {
return &scm_nil; // `EOF': eval program, which may read stdin if (s->car->value == '#' && s->cdr->car->value == ',' && s->cdr->cdr->car->value == '@') return &symbol_unsyntax_splicing;
if (s->car->value == 'E' && s->cdr->car->value == 'O' && s->cdr->cdr->car->value == 'F') {
fprintf (stderr, "mes: got EOF\n");
return &scm_nil; // `EOF': eval program, which may read stdin
}
} }
return internal_make_symbol (s); return internal_make_symbol (s);
@ -678,10 +729,7 @@ lookup (char const *s, scm *a)
scm * scm *
lookup_char (int c, scm *a) lookup_char (int c, scm *a)
{ {
char buf[2]; return lookup (cons (make_char (c), &scm_nil), a);
buf[0] = c;
buf[1] = 0;
return lookup (buf, a);
} }
scm * scm *
@ -774,7 +822,16 @@ display_helper (FILE* f, scm *x, bool cont, char const *sep, bool quote)
} }
else if (x->type == REF) display_helper (f, x->ref, cont, "", true); else if (x->type == REF) display_helper (f, x->ref, cont, "", true);
else if (builtin_p (x) == &scm_t) fprintf (f, "#<procedure %s>", x->name); else if (builtin_p (x) == &scm_t) fprintf (f, "#<procedure %s>", x->name);
else if (pair_p (x) == &scm_f) fprintf (f, "%s", x->name); else if (x->type != PAIR && x->string) {
scm *p = x->string;
assert (p);
while (p != &scm_nil) {
assert (p->car->type == CHAR);
fputc (p->car->value, f);
p = p->cdr;
}
}
else if (x->type != PAIR && x->name) fprintf (f, "%s", x->name);
return &scm_unspecified; return &scm_unspecified;
} }
@ -843,53 +900,48 @@ readblock (int c)
} }
scm * scm *
readword (int c, char *w, scm *a) readword (int c, scm *w, scm *a)
{ {
if (c == EOF && !w) return &scm_nil; if (c == EOF && w == &scm_nil) return &scm_nil;
if (c == '\n' && !w) return readword (getchar (), w, a); if (c == '\n' && w == &scm_nil) return readword (getchar (), w, a);
if (c == '\n' && *w == '.' && w[1] == 0) return &scm_dot; if (c == '\n' && w->car->value == '.' && w->cdr == &scm_nil) return &scm_dot;
if (c == EOF || c == '\n') return lookup (w, a); if (c == EOF || c == '\n') return lookup (w, a);
if (c == ' ') return readword ('\n', w, a); if (c == ' ') return readword ('\n', w, a);
if (c == '"' && !w) return readstring (); if (c == '"' && w == &scm_nil) return readstring ();
if (c == '"') {ungetchar (c); return lookup (w, a);} if (c == '"') {ungetchar (c); return lookup (w, a);}
if (c == '(' && !w) return readlist (a); if (c == '(' && w == &scm_nil) return readlist (a);
if (c == '(') {ungetchar (c); return lookup (w, a);} if (c == '(') {ungetchar (c); return lookup (w, a);}
if (c == ')' && !w) {ungetchar (c); return &scm_nil;} if (c == ')' && w == &scm_nil) {ungetchar (c); return &scm_nil;}
if (c == ')') {ungetchar (c); return lookup (w, a);} if (c == ')') {ungetchar (c); return lookup (w, a);}
if (c == ',' && peekchar () == '@') {getchar (); return cons (lookup (",@", a), if (c == ',' && peekchar () == '@') {getchar (); return cons (lookup (symbol_unquote_splicing.string, a),
cons (readword (getchar (), w, a), cons (readword (getchar (), w, a),
&scm_nil));} &scm_nil));}
if ((c == '\'' if ((c == '\''
|| c == '`' || c == '`'
|| c == ',') || c == ',')
&& !w) {return cons (lookup_char (c, a), && w == &scm_nil) {return cons (lookup_char (c, a),
cons (readword (getchar (), w, a), cons (readword (getchar (), w, a),
&scm_nil));} &scm_nil));}
if (c == '#' && peekchar () == ',' && !w) { if (c == '#' && peekchar () == ',' && w == &scm_nil) {
getchar (); getchar ();
if (peekchar () == '@'){getchar (); return cons (lookup ("#,@", a), if (peekchar () == '@'){getchar (); return cons (lookup (symbol_unsyntax_splicing.string, a),
cons (readword (getchar (), w, a), cons (readword (getchar (), w, a),
&scm_nil));} &scm_nil));}
return cons (lookup ("#,", a), cons (readword (getchar (), w, a), &scm_nil)); return cons (lookup (symbol_unsyntax.string, a), cons (readword (getchar (), w, a), &scm_nil));
} }
if (c == '#' if (c == '#'
&& (peekchar () == '\'' && (peekchar () == '\''
|| peekchar () == '`') || peekchar () == '`')
&& !w) {char buf[3] = "#"; buf[1] = getchar (); return cons (lookup (buf, a), && w == &scm_nil) {return cons (lookup (cons (make_char ('#'), cons (make_char (getchar ()), &scm_nil)), a),
cons (readword (getchar (), w, a), cons (readword (getchar (), w, a),
&scm_nil));} &scm_nil));}
if (c == ';') {readcomment (c); return readword ('\n', w, a);} if (c == ';') {readcomment (c); return readword ('\n', w, a);}
if (c == '#' && peekchar () == 'x') {getchar (); return read_hex ();} if (c == '#' && peekchar () == 'x') {getchar (); return read_hex ();}
if (c == '#' && peekchar () == '\\') {getchar (); return read_character ();} if (c == '#' && peekchar () == '\\') {getchar (); return read_character ();}
if (c == '#' && !w && peekchar () == '(') {getchar (); return list_to_vector (readlist (a));} if (c == '#' && w == &scm_nil && peekchar () == '(') {getchar (); return list_to_vector (readlist (a));}
if (c == '#' && peekchar () == '(') {ungetchar (c); return lookup (w, a);} if (c == '#' && peekchar () == '(') {ungetchar (c); return lookup (w, a);}
if (c == '#' && peekchar () == '!') {getchar (); readblock (getchar ()); return readword (getchar (), w, a);} if (c == '#' && peekchar () == '!') {getchar (); readblock (getchar ()); return readword (getchar (), w, a);}
char buf[STRING_MAX] = {0}; return readword (getchar (), append2 (w, cons (make_char (c), &scm_nil)), a);
char ch = c;
char *p = w ? w + strlen (w) : buf;
*p = ch;
*(p+1) = 0;
return readword (getchar (), w ? w : buf, a);
} }
scm * scm *
@ -924,7 +976,7 @@ read_character ()
} }
else if (c >= 'a' && c <= 'z' else if (c >= 'a' && c <= 'z'
&& peekchar () >= 'a' && peekchar () <= 'z') { && peekchar () >= 'a' && peekchar () <= 'z') {
char buf[STRING_MAX]; char buf[10];
char *p = buf; char *p = buf;
*p++ = c; *p++ = c;
while (peekchar () >= 'a' && peekchar () <= 'z') { while (peekchar () >= 'a' && peekchar () <= 'z') {
@ -947,22 +999,26 @@ read_character ()
return make_char (c); return make_char (c);
} }
scm *
append_char (scm *x, int i)
{
return append2 (x, cons (make_char (i), &scm_nil));
}
scm * scm *
readstring () readstring ()
{ {
char buf[STRING_MAX]; scm *p = &scm_nil;
char *p = buf;
int c = getchar (); int c = getchar ();
while (true) { while (true) {
if (c == '"') break; if (c == '"') break;
if (c == '\\' && peekchar () == '"') *p++ = getchar (); if (c == '\\' && peekchar () == '"') p = append_char (p, getchar ());
else if (c == '\\' && peekchar () == 'n') {getchar (); *p++ = '\n';} else if (c == '\\' && peekchar () == 'n') {getchar (); p = append_char (p, '\n');}
else if (c == EOF) assert (!"EOF in string"); else if (c == EOF) assert (!"EOF in string");
else *p++ = c; else p = append_char (p, c);
c = getchar (); c = getchar ();
} }
*p = 0; return make_string (p);
return make_string (buf);
} }
int int
@ -980,7 +1036,7 @@ readlist (scm *a)
int c = getchar (); int c = getchar ();
c = eat_whitespace (c); c = eat_whitespace (c);
if (c == ')') return &scm_nil; if (c == ')') return &scm_nil;
scm *w = readword (c, 0, a); scm *w = readword (c, &scm_nil, a);
if (w == &scm_dot) if (w == &scm_dot)
return car (readlist (a)); return car (readlist (a));
return cons (w, readlist (a)); return cons (w, readlist (a));
@ -989,13 +1045,13 @@ readlist (scm *a)
scm * scm *
read_env (scm *a) read_env (scm *a)
{ {
return readword (getchar (), 0, a); return readword (getchar (), &scm_nil, a);
} }
scm * scm *
add_environment (scm *a, char const *name, scm *x) add_environment (scm *a, char const *name, scm *x)
{ {
return cons (cons (make_symbol (name), x), a); return cons (cons (make_symbol (cstring_to_list (name)), x), a);
} }
scm * scm *

View file

@ -21,54 +21,34 @@
scm * scm *
string (scm *x) ///((args . n)) string (scm *x) ///((args . n))
{ {
char buf[STRING_MAX] = ""; return make_string (x);
char *p = buf;
while (x != &scm_nil)
{
scm *s = car (x);
assert (s->type == CHAR);
*p++ = s->value;
x = cdr (x);
}
return make_string (buf);
} }
scm * scm *
string_append (scm *x) ///((args . n)) string_append (scm *x) ///((args . n))
{ {
char buf[STRING_MAX] = ""; scm *p = &scm_nil;
while (x != &scm_nil) while (x != &scm_nil)
{ {
scm *s = car (x); scm *s = car (x);
assert (s->type == STRING); assert (s->type == STRING);
strcat (buf, s->name); p = append2 (p, s->string);
x = cdr (x); x = cdr (x);
} }
return make_string (buf); return make_string (p);
} }
scm * scm *
list_to_string (scm *x) list_to_string (scm *x)
{ {
char buf[STRING_MAX] = ""; return make_string (x);
char *p = buf;
while (x != &scm_nil)
{
scm *s = car (x);
assert (s->type == CHAR);
*p++ = s->value;
x = cdr (x);
}
*p = 0;
return make_string (buf);
} }
scm * scm *
string_length (scm *x) string_length (scm *x)
{ {
assert (x->type == STRING); assert (x->type == STRING);
return make_number (strlen (x->name)); return make_number (length (x->string)->value);
} }
scm * scm *
@ -76,7 +56,8 @@ string_ref (scm *x, scm *k)
{ {
assert (x->type == STRING); assert (x->type == STRING);
assert (k->type == NUMBER); assert (k->type == NUMBER);
return make_char (x->name[k->value]); scm n = {NUMBER, .value=k->value};
return make_char (list_ref (x->string, &n)->value);
} }
scm * scm *
@ -84,40 +65,48 @@ substring (scm *x) ///((args . n))
{ {
assert (x->type == PAIR); assert (x->type == PAIR);
assert (x->car->type == STRING); assert (x->car->type == STRING);
char const *s = x->car->name; scm *s = x->car->string;
assert (x->cdr->car->type == NUMBER); assert (x->cdr->car->type == NUMBER);
int start = x->cdr->car->value; int start = x->cdr->car->value;
int end = strlen (s); int end = length (s)->value;
if (x->cdr->cdr->type == PAIR) { if (x->cdr->cdr->type == PAIR) {
assert (x->cdr->cdr->car->type == NUMBER); assert (x->cdr->cdr->car->type == NUMBER);
assert (x->cdr->cdr->car->value <= end); assert (x->cdr->cdr->car->value <= end);
end = x->cdr->cdr->car->value; end = x->cdr->cdr->car->value;
} }
char buf[STRING_MAX]; int n = end - start;
strncpy (buf, s+start, end - start); while (start--) s = s->cdr;
buf[end-start] = 0; scm *p = &scm_nil;
return make_string (buf); while (n-- && s != &scm_nil) {
p = append2 (p, cons (make_char (s->car->value), &scm_nil));
s = s->cdr;
}
return make_string (p);
} }
scm * scm *
number_to_string (scm *x) number_to_string (scm *x)
{ {
assert (x->type == NUMBER); assert (x->type == NUMBER);
char buf[STRING_MAX]; int n = x->value;
sprintf (buf,"%d", x->value); scm *p = n < 0 ? cons (make_char ('-'), &scm_nil) : &scm_nil;
return make_string (buf); do {
p = cons (make_char (n % 10 + '0'), p);
n = n / 10;
} while (n);
return make_string (p);
} }
scm * scm *
string_to_symbol (scm *x) string_to_symbol (scm *x)
{ {
assert (x->type == STRING); assert (x->type == STRING);
return make_symbol (x->name); return make_symbol (x->string);
} }
scm * scm *
symbol_to_string (scm *x) symbol_to_string (scm *x)
{ {
assert (x->type == SYMBOL); assert (x->type == SYMBOL);
return make_string (x->name); return make_string (x->string);
} }