diff options
author | Bdale Garbee <bdale@gag.com> | 2017-04-24 18:22:03 -0600 |
---|---|---|
committer | Bdale Garbee <bdale@gag.com> | 2017-04-24 18:22:03 -0600 |
commit | b91f67005709cb7f65e0a461b49b5cb0952cb391 (patch) | |
tree | e9f6c0f30a81cf30a9cfd52887171168f7830f85 /src/lisp/ao_lisp_read.c | |
parent | 1e956f93e0c9f8ed6180490f80e8aead5538f818 (diff) | |
parent | 8a10ddb0bca7d6f6aa4aedda171899abd165fd74 (diff) |
Merge branch 'branch-1.7' into debian
Diffstat (limited to 'src/lisp/ao_lisp_read.c')
-rw-r--r-- | src/lisp/ao_lisp_read.c | 498 |
1 files changed, 498 insertions, 0 deletions
diff --git a/src/lisp/ao_lisp_read.c b/src/lisp/ao_lisp_read.c new file mode 100644 index 00000000..84ef2a61 --- /dev/null +++ b/src/lisp/ao_lisp_read.c @@ -0,0 +1,498 @@ +/* + * Copyright © 2016 Keith Packard <keithp@keithp.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#include "ao_lisp.h" +#include "ao_lisp_read.h" + +static const uint16_t lex_classes[128] = { + IGNORE, /* ^@ */ + IGNORE, /* ^A */ + IGNORE, /* ^B */ + IGNORE, /* ^C */ + IGNORE, /* ^D */ + IGNORE, /* ^E */ + IGNORE, /* ^F */ + IGNORE, /* ^G */ + IGNORE, /* ^H */ + WHITE, /* ^I */ + WHITE, /* ^J */ + WHITE, /* ^K */ + WHITE, /* ^L */ + WHITE, /* ^M */ + IGNORE, /* ^N */ + IGNORE, /* ^O */ + IGNORE, /* ^P */ + IGNORE, /* ^Q */ + IGNORE, /* ^R */ + IGNORE, /* ^S */ + IGNORE, /* ^T */ + IGNORE, /* ^U */ + IGNORE, /* ^V */ + IGNORE, /* ^W */ + IGNORE, /* ^X */ + IGNORE, /* ^Y */ + IGNORE, /* ^Z */ + IGNORE, /* ^[ */ + IGNORE, /* ^\ */ + IGNORE, /* ^] */ + IGNORE, /* ^^ */ + IGNORE, /* ^_ */ + PRINTABLE|WHITE, /* */ + PRINTABLE, /* ! */ + PRINTABLE|STRINGC, /* " */ + PRINTABLE|COMMENT, /* # */ + PRINTABLE, /* $ */ + PRINTABLE, /* % */ + PRINTABLE, /* & */ + PRINTABLE|QUOTEC, /* ' */ + PRINTABLE|BRA, /* ( */ + PRINTABLE|KET, /* ) */ + PRINTABLE, /* * */ + PRINTABLE|SIGN, /* + */ + PRINTABLE, /* , */ + PRINTABLE|SIGN, /* - */ + PRINTABLE, /* . */ + PRINTABLE, /* / */ + PRINTABLE|DIGIT, /* 0 */ + PRINTABLE|DIGIT, /* 1 */ + PRINTABLE|DIGIT, /* 2 */ + PRINTABLE|DIGIT, /* 3 */ + PRINTABLE|DIGIT, /* 4 */ + PRINTABLE|DIGIT, /* 5 */ + PRINTABLE|DIGIT, /* 6 */ + PRINTABLE|DIGIT, /* 7 */ + PRINTABLE|DIGIT, /* 8 */ + PRINTABLE|DIGIT, /* 9 */ + PRINTABLE, /* : */ + PRINTABLE|COMMENT, /* ; */ + PRINTABLE, /* < */ + PRINTABLE, /* = */ + PRINTABLE, /* > */ + PRINTABLE, /* ? */ + PRINTABLE, /* @ */ + PRINTABLE, /* A */ + PRINTABLE, /* B */ + PRINTABLE, /* C */ + PRINTABLE, /* D */ + PRINTABLE, /* E */ + PRINTABLE, /* F */ + PRINTABLE, /* G */ + PRINTABLE, /* H */ + PRINTABLE, /* I */ + PRINTABLE, /* J */ + PRINTABLE, /* K */ + PRINTABLE, /* L */ + PRINTABLE, /* M */ + PRINTABLE, /* N */ + PRINTABLE, /* O */ + PRINTABLE, /* P */ + PRINTABLE, /* Q */ + PRINTABLE, /* R */ + PRINTABLE, /* S */ + PRINTABLE, /* T */ + PRINTABLE, /* U */ + PRINTABLE, /* V */ + PRINTABLE, /* W */ + PRINTABLE, /* X */ + PRINTABLE, /* Y */ + PRINTABLE, /* Z */ + PRINTABLE, /* [ */ + PRINTABLE|BACKSLASH, /* \ */ + PRINTABLE, /* ] */ + PRINTABLE, /* ^ */ + PRINTABLE, /* _ */ + PRINTABLE, /* ` */ + PRINTABLE, /* a */ + PRINTABLE, /* b */ + PRINTABLE, /* c */ + PRINTABLE, /* d */ + PRINTABLE, /* e */ + PRINTABLE, /* f */ + PRINTABLE, /* g */ + PRINTABLE, /* h */ + PRINTABLE, /* i */ + PRINTABLE, /* j */ + PRINTABLE, /* k */ + PRINTABLE, /* l */ + PRINTABLE, /* m */ + PRINTABLE, /* n */ + PRINTABLE, /* o */ + PRINTABLE, /* p */ + PRINTABLE, /* q */ + PRINTABLE, /* r */ + PRINTABLE, /* s */ + PRINTABLE, /* t */ + PRINTABLE, /* u */ + PRINTABLE, /* v */ + PRINTABLE, /* w */ + PRINTABLE, /* x */ + PRINTABLE, /* y */ + PRINTABLE, /* z */ + PRINTABLE, /* { */ + PRINTABLE|VBAR, /* | */ + PRINTABLE, /* } */ + PRINTABLE|TWIDDLE, /* ~ */ + IGNORE, /* ^? */ +}; + +static int lex_unget_c; + +static inline int +lex_get() +{ + int c; + if (lex_unget_c) { + c = lex_unget_c; + lex_unget_c = 0; + } else { + c = ao_lisp_getc(); + } + return c; +} + +static inline void +lex_unget(int c) +{ + if (c != EOF) + lex_unget_c = c; +} + +static int +lex_quoted (void) +{ + int c; + int v; + int count; + + c = lex_get(); + if (c == EOF) + return EOF; + c &= 0x7f; + switch (c) { + case 'n': + return '\n'; + case 'f': + return '\f'; + case 'b': + return '\b'; + case 'r': + return '\r'; + case 'v': + return '\v'; + case 't': + return '\t'; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + v = c - '0'; + count = 1; + while (count <= 3) { + c = lex_get(); + if (c == EOF) + return EOF; + c &= 0x7f; + if (c < '0' || '7' < c) { + lex_unget(c); + break; + } + v = (v << 3) + c - '0'; + ++count; + } + return v; + default: + return c; + } +} + +static uint16_t lex_class; + +static int +lexc(void) +{ + int c; + do { + c = lex_get(); + if (c == EOF) { + lex_class = ENDOFFILE; + c = 0; + } else { + c &= 0x7f; + lex_class = lex_classes[c]; + if (lex_class & BACKSLASH) { + c = lex_quoted(); + if (c == EOF) + lex_class = ENDOFFILE; + else + lex_class = PRINTABLE; + } + } + } while (lex_class & IGNORE); + return c; +} + +#define AO_LISP_TOKEN_MAX 32 + +static char token_string[AO_LISP_TOKEN_MAX]; +static int token_int; +static int token_len; + +static inline void add_token(int c) { + if (c && token_len < AO_LISP_TOKEN_MAX - 1) + token_string[token_len++] = c; +} + +static inline void end_token(void) { + token_string[token_len] = '\0'; +} + +static int +lex(void) +{ + int c; + + token_len = 0; + for (;;) { + c = lexc(); + if (lex_class & ENDOFFILE) + return END; + + if (lex_class & WHITE) + continue; + + if (lex_class & COMMENT) { + while ((c = lexc()) != '\n') { + if (lex_class & ENDOFFILE) + return END; + } + continue; + } + + if (lex_class & (BRA|KET|QUOTEC)) { + add_token(c); + end_token(); + switch (c) { + case '(': + return OPEN; + case ')': + return CLOSE; + case '\'': + return QUOTE; + } + } + if (lex_class & TWIDDLE) { + token_int = lexc(); + return NUM; + } + if (lex_class & STRINGC) { + for (;;) { + c = lexc(); + if (lex_class & (STRINGC|ENDOFFILE)) { + end_token(); + return STRING; + } + add_token(c); + } + } + if (lex_class & PRINTABLE) { + int isnum; + int hasdigit; + int isneg; + + isnum = 1; + hasdigit = 0; + token_int = 0; + isneg = 0; + for (;;) { + if (!(lex_class & NUMBER)) { + isnum = 0; + } else { + if (token_len != 0 && + (lex_class & SIGN)) + { + isnum = 0; + } + if (c == '-') + isneg = 1; + if (lex_class & DIGIT) { + hasdigit = 1; + if (isnum) + token_int = token_int * 10 + c - '0'; + } + } + add_token (c); + c = lexc (); + if (lex_class & (NOTNAME)) { +// if (lex_class & ENDOFFILE) +// clearerr (f); + lex_unget(c); + end_token (); + if (isnum && hasdigit) { + if (isneg) + token_int = -token_int; + return NUM; + } + return NAME; + } + } + + } + } +} + +static int parse_token; + +struct ao_lisp_cons *ao_lisp_read_cons; +struct ao_lisp_cons *ao_lisp_read_cons_tail; +struct ao_lisp_cons *ao_lisp_read_stack; + +static int +push_read_stack(int cons, int in_quote) +{ + DBGI("push read stack %p %d\n", ao_lisp_read_cons, in_quote); + DBG_IN(); + if (cons) { + ao_lisp_read_stack = ao_lisp_cons_cons(ao_lisp_cons_poly(ao_lisp_read_cons), + ao_lisp_cons_cons(ao_lisp_int_poly(in_quote), + ao_lisp_read_stack)); + if (!ao_lisp_read_stack) + return 0; + } + ao_lisp_read_cons = NULL; + ao_lisp_read_cons_tail = NULL; + return 1; +} + +static int +pop_read_stack(int cons) +{ + int in_quote = 0; + if (cons) { + ao_lisp_read_cons = ao_lisp_poly_cons(ao_lisp_read_stack->car); + ao_lisp_read_stack = ao_lisp_poly_cons(ao_lisp_read_stack->cdr); + in_quote = ao_lisp_poly_int(ao_lisp_read_stack->car); + ao_lisp_read_stack = ao_lisp_poly_cons(ao_lisp_read_stack->cdr); + for (ao_lisp_read_cons_tail = ao_lisp_read_cons; + ao_lisp_read_cons_tail && ao_lisp_read_cons_tail->cdr; + ao_lisp_read_cons_tail = ao_lisp_poly_cons(ao_lisp_read_cons_tail->cdr)) + ; + } else { + ao_lisp_read_cons = 0; + ao_lisp_read_cons_tail = 0; + ao_lisp_read_stack = 0; + } + DBG_OUT(); + DBGI("pop read stack %p %d\n", ao_lisp_read_cons, in_quote); + return in_quote; +} + +ao_poly +ao_lisp_read(void) +{ + struct ao_lisp_atom *atom; + char *string; + int cons; + int in_quote; + ao_poly v; + + parse_token = lex(); + DBGI("token %d (%s)\n", parse_token, token_string); + + cons = 0; + in_quote = 0; + ao_lisp_read_cons = ao_lisp_read_cons_tail = ao_lisp_read_stack = 0; + for (;;) { + while (parse_token == OPEN) { + if (!push_read_stack(cons, in_quote)) + return AO_LISP_NIL; + cons++; + in_quote = 0; + parse_token = lex(); + DBGI("token %d (%s)\n", parse_token, token_string); + } + + switch (parse_token) { + case END: + default: + if (cons) + ao_lisp_error(AO_LISP_EOF, "unexpected end of file"); + return _ao_lisp_atom_eof; + break; + case NAME: + atom = ao_lisp_atom_intern(token_string); + if (atom) + v = ao_lisp_atom_poly(atom); + else + v = AO_LISP_NIL; + break; + case NUM: + v = ao_lisp_int_poly(token_int); + break; + case STRING: + string = ao_lisp_string_copy(token_string); + if (string) + v = ao_lisp_string_poly(string); + else + v = AO_LISP_NIL; + break; + case QUOTE: + if (!push_read_stack(cons, in_quote)) + return AO_LISP_NIL; + cons++; + in_quote = 1; + v = _ao_lisp_atom_quote; + break; + case CLOSE: + if (!cons) { + v = AO_LISP_NIL; + break; + } + v = ao_lisp_cons_poly(ao_lisp_read_cons); + --cons; + in_quote = pop_read_stack(cons); + break; + } + + /* loop over QUOTE ends */ + for (;;) { + if (!cons) + return v; + + struct ao_lisp_cons *read = ao_lisp_cons_cons(v, NULL); + if (!read) + return AO_LISP_NIL; + + if (ao_lisp_read_cons_tail) + ao_lisp_read_cons_tail->cdr = ao_lisp_cons_poly(read); + else + ao_lisp_read_cons = read; + ao_lisp_read_cons_tail = read; + + if (!in_quote || !ao_lisp_read_cons->cdr) + break; + + v = ao_lisp_cons_poly(ao_lisp_read_cons); + --cons; + in_quote = pop_read_stack(cons); + } + + parse_token = lex(); + DBGI("token %d (%s)\n", parse_token, token_string); + } + return v; +} |