/* $NetBSD$ */
/*-
* Copyright (c) 2007 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Jachym Holecek <
[email protected]>.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the NetBSD
* Foundation, Inc. and its contributors.
* 4. Neither the name of The NetBSD Foundation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/queue.h>
#include <sys/param.h> /* roundup() */
#if defined(_KERNEL)
#include <sys/systm.h>
#elif defined(_STANDALONE)
#include <lib/libkern/libkern.h>
#else
#include <ctype.h>
#include <errno.h>
#include <limits.h>
#if defined(DEBUG)
#include <stdarg.h>
#include <stdio.h>
#endif
#include <stdlib.h>
#endif
#include <prop/proplib.h>
#include "prop_object_impl.h"
#include "prop_codec_impl.h"
#if defined(_STANDALONE)
#define BUFINCR 16
#else
#define BUFINCR 128
#endif
#if !defined(__UNCONST)
#define __UNCONST(a) ((void *)(unsigned long)(const void *)(a))
#endif
/*
* This needs to go via a macro -- even if verbose()'s body would be empty
* ifndef DEBUG, GCC still isn't smart enough to eliminate calls to funcs
* that only exist if DEBUG. And we don't want to clutter the code with
* preprocessor conditionals.
*/
#if defined(DEBUG)
#define VERBOSE(s, ...) verbose(s "\n", ## __VA_ARGS__)
#else
#define VERBOSE(s, ...) /* silence */
#endif
/* Ewww, depolymerise function names. */
#define poec _prop_object_externalize_context
#define poec_create _prop_object_externalize_context_alloc
#define poec_append _prop_object_externalize_append_cstring
#define poec_push _prop_object_externalize_append_char
#define poec_destroy _prop_object_externalize_context_free
#define prop_keysym_str prop_dictionary_keysym_cstring_nocopy
#define _TK_COMPOUND_VALUES \
TK_ARRAYO: \
case TK_DICTO
typedef enum {
/* Self representing. */
TK_FIRST = 0, /* So that memset(0) DTRT. */
TK_ARRAYC,
TK_ARRAYO,
TK_DATA,
TK_DICTC,
TK_DICTO,
TK_WHITE,
TK_LAST,
/* Values. */
TK_STRING,
TK_SINT64,
TK_SYMBOL,
TK_UINT64
} token_type_t;
#define _PA_TOKEN_LAST TK_UINT64
#define _PA_TOKEN_NAMES \
"FIRST", "ARRAYC", "ARRAYO", "DATA", "DICTC", "DICTO", "WHITE", \
"LAST", "STRING", "SINT64", "SYMBOL", "UINT64",
typedef enum {
SC_FIRST = 0,
SC_WHITE,
SC_SINT64,
SC_UINT64,
SC_QUOTED,
SC_SYMBOL,
SC_TOPLEVEL,
SC_ERROR,
SC_BASE64_APPEND,
SC_BASE64_MAYBE_NEXT,
SC_BASE64_NEXT,
SC_BASE64_CLOSE,
SC_STRING_APPEND,
SC_STRING_MAYBE_NEXT,
SC_STRING_CONCAT,
SC_STRING_CLOSE,
SC_COMMENT
} scan_state_t;
#define _SC_STATE_LAST SC_COMMENT
#define _SC_STATE_NAMES \
"FIRST", "WHITE", "SINT64", "UINT64", "QUOTED", "SYMBOL", "TOPLEVEL", \
"ERROR", "BASE64_APPEND", "BASE64_MAYBE_NEXT", "BASE64_NEXT", \
"SC_BASE64_CLOSE", "STRING_APPEND", "STRING_MAYBE_NEXT", \
"STRING_CONCAT", "STRING_CLOSE", "COMMENT"
typedef enum {
PA_TOPLEVEL = 0,
PA_ARRAY,
PA_DICTIONARY,
PA_ERROR,
PA_HOME,
PA_OBJECT
} parse_state_t;
#define _PA_STATE_LAST PA_OBJECT
#define _PA_STATE_NAMES \
"TOPLEVEL", "ARRAY", "DICTIONARY", "ERROR", "HOME", "OBJECT"
union value {
char *val_string;
uint64_t val_unsigned;
int64_t val_signed;
struct {
const u_char *vd_buf;
size_t vd_len;
} val_data;
};
/* Scanner constructs tokens and enqueues them to parser. */
struct token {
SIMPLEQ_ENTRY(token) tok_link;
token_type_t tok_type;
#if !defined(_STANDALONE)
size_t tok_pos_line;
size_t tok_pos_col;
#endif
union value tok_value;
#define tok_string tok_value.val_string
#define tok_signed tok_value.val_signed
#define tok_unsigned tok_value.val_unsigned
#define tok_data_buf tok_value.val_data.vd_buf
#define tok_data_len tok_value.val_data.vd_len
};
/* Stack for nested objects, entries relinked to consq on completion. */
struct frame {
SIMPLEQ_ENTRY(frame) se_link; /* Stack entry link. */
prop_object_t se_object; /* Compound object. */
/* Dec: key to parent dict, Enc: next object from parent. */
union {
const char *un_symbol;
prop_object_iterator_t un_iter;
} se_un;
#define se_symbol se_un.un_symbol
#define se_iter se_un.un_iter
};
SIMPLEQ_HEAD(stack, frame);
struct parser {
/* Incoming tokens, operation stack, list of constructed objects. */
SIMPLEQ_HEAD(, token) pa_tokens; /* FIFO */
struct stack pa_stack; /* LIFO */
struct stack pa_consq; /* FIFO */
/* Store parser state across calls. */
parse_state_t pa_state;
parse_state_t pa_prev;
parse_state_t pa_last;
/* Store scanner state across calls. */
scan_state_t sc_state; /* Current state. */
scan_state_t sc_prev; /* State at start of this cycle. */
scan_state_t sc_last; /* Last value of prev != state. */
#if !defined(_STANDALONE)
/* Position in input stream. */
size_t sc_pos_line;
size_t sc_pos_col;
#endif
/* Scanner internalization buffer. */
u_char *sc_string;
size_t sc_strcur;
size_t sc_strlen;
/* Base64 decoder. */
size_t sc_base64_size; /* in bits */
};
/* Base64 code space (plus '='). */
static const char base64abc[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopq"
"rstuvwxyz0123456789+/";
#if defined(_KERNEL) || defined(_STANDALONE)
static char *
strdup(const char *s)
{
char *p;
size_t l;
l = strlen(s) + 1;
p = _PROP_MALLOC(l, M_TEMP);
return (memcpy(p, s, l));
}
static int
isprint(int c)
{
return (c >= ' ' && c <= '~');
}
#endif /* _KERNEL || _STANDALONE */
static boolean_t
stroneof(const char *s, const char *t[])
{
int i;
for (i = 0; t[i]; i++)
if (strcasecmp(s, t[i]) == 0)
return (TRUE);
return (FALSE);
}
#if defined(DEBUG)
static void
verbose(const char *fmt, ...)
{
static FILE *tracefile = NULL;
char *s;
va_list ap;
/* XXX locking */
if (tracefile == NULL) {
s = getenv("PROPLIB_SCN_TRACEFILE");
if (s == NULL)
s = "__prop_scn.out";
tracefile = fopen(s, "w");
if (tracefile == NULL)
abort();
}
va_start(ap, fmt);
vfprintf(tracefile, fmt, ap);
va_end(ap);
}
static const char *
scanner_quote_char(u_char c)
{
static char buf[8]; /* 'c', '\n', 0xab */
u_char d = 0;
switch (c) {
case '\f': d = 'f'; break;
case '\n': d = 'n'; break;
case '\r': d = 'r'; break;
case '\t': d = 't'; break;
case '\v': d = 'v'; break;
}
if (d) {
sprintf(buf, "'\\%c'", d);
return (buf);
}
if (isprint(c))
sprintf(buf, "'%c'", (char)c);
else
sprintf(buf, "0x%02x", (u_int)c);
return (buf);
}
static const char *
scanner_state_name(scan_state_t n)
{
static const char *const names[] = { _SC_STATE_NAMES };
static int idx = 0; /* XXX hack, printf */
static char buf[5][32];
if (n < 0 || n > _SC_STATE_LAST) {
if (++idx == 5)
idx = 0;
snprintf(buf[idx], 32, "<wrong %d>", n);
return (buf[idx]);
}
return (names[n]);
}
static const char *
parser_state_name(parse_state_t n)
{
static const char *const names[] = { _PA_STATE_NAMES };
static int idx = 0; /* XXX hack, printf */
static char buf[5][32];
if (++idx == 5)
idx = 0;
if (n < 0 || n > _PA_STATE_LAST) {
snprintf(buf[idx], 32, "<wrong %d>", n);
return (buf[idx]);
}
return (names[n]);
}
static const char *
parser_token_name(token_type_t n)
{
static const char *const names[] = { _PA_TOKEN_NAMES };
if (n < 0 || n > _PA_TOKEN_LAST)
return ("<wrong token>");
return (names[n]);
}
static const char *
parser_token_desc(struct token *t)
{
static char buf[32];
static const size_t sof = sizeof(buf);
const char *s;
int n = t->tok_type;
s = parser_token_name(n);
switch (n) {
case TK_SYMBOL:
snprintf(buf, sof, "%s '%s'", s, t->tok_string);
return (buf);
case TK_SINT64:
snprintf(buf, sof, "%s %"PRId64, s, t->tok_signed);
return (buf);
case TK_STRING:
snprintf(buf, sof, "%s \"%s\"", s, t->tok_string);
return (buf);
case TK_UINT64:
snprintf(buf, sof, "%s #%"PRIx64, s, t->tok_unsigned);
return (buf);
case TK_DATA:
snprintf(buf, sof, "%s %zdB", s, t->tok_data_len);
return (buf);
}
return (s);
}
#endif /* PROP_SCN_DEBUG */
static prop_string_t
prop_scn_create_string(struct token *t)
{
prop_string_t ps;
char *str = t->tok_string;
ps = _prop_string_alloc();
if (ps == NULL)
return (NULL);
ps->ps_mutable = str;
ps->ps_size = strlen(str);
return (ps);
}
static prop_number_t
prop_scn_create_uint(struct token *t)
{
return (prop_number_create_unsigned_integer(t->tok_unsigned));
}
static prop_number_t
prop_scn_create_sint(struct token *t)
{
return (prop_number_create_integer(t->tok_signed));
}
static boolean_t
scanner_ensure_strlen(struct parser *pa)
{
void *b;
/* Enough room for <char, '\0'> or base64 triplet. */
if ((pa->sc_strlen - pa->sc_strcur) < 3) {
b = _PROP_REALLOC(pa->sc_string, pa->sc_strlen + BUFINCR,
M_TEMP);
if (b == NULL) {
VERBOSE("scanner: ENOMEM internalization buffer");
return (TRUE);
}
pa->sc_string = b;
pa->sc_strlen += BUFINCR;
}
/* No error == success. */
return (FALSE);
}
static struct token *
parser_token_put(struct parser *pa, token_type_t tok)
{
struct token *t;
t = _PROP_MALLOC(sizeof(struct token), M_TEMP);
if (t == NULL) {
VERBOSE(" parser: ENOMEM token %s", parser_token_name(tok));
return (NULL);
}
memset(t, 0, sizeof(struct token));
t->tok_type = tok;
#if !defined(_STANDALONE)
t->tok_pos_line = pa->sc_pos_line;
t->tok_pos_col = pa->sc_pos_col;
#endif
SIMPLEQ_INSERT_TAIL(&pa->pa_tokens, t, tok_link);
return (t);
}
static void
parser_token_free(struct token *t, boolean_t force)
{
/* Normally, tok_string is donated upwards, not duplicated. */
if (force && (t->tok_type == TK_STRING || t->tok_type == TK_SYMBOL) &&
t->tok_string != NULL)
_PROP_FREE(t->tok_string, M_TEMP);
_PROP_FREE(t, M_TEMP);
}
#define SC_ARROW_P(src, dst) (pa->sc_prev == (src) && pa->sc_state == (dst))
#define SC_CYCLE_P(s) SC_ARROW_P(s, s)
static u_char
scanner_base64_decode(u_char c)
{
const char *s;
if (c == '=')
return (0);
s = strchr(base64abc, (int)(u_int)c);
_PROP_ASSERT(s);
return ((u_char)(s - base64abc));
}
static int
prop_scanner_exec(struct parser *pa, const u_char *input, size_t length)
{
const u_char *p = input;
const u_char *pe = input + length;
struct token *t;
boolean_t keepchar;
u_char c;
if (input == NULL || length == 0) {
if (parser_token_put(pa, TK_LAST) == NULL)
return (ENOMEM);
return (0);
}
advance:
if (p == pe)
return (0);
c = *p++;
dispatch:
keepchar = TRUE; /* for callcc */
switch (pa->sc_state) {
case SC_FIRST:
/* Force transition. */
pa->sc_state = SC_TOPLEVEL;
goto callcc;
case SC_TOPLEVEL:
switch (c) {
case '#':
pa->sc_state = SC_UINT64;
goto callnext;
case '{':
if (parser_token_put(pa, TK_DICTO) == NULL)
return (ENOMEM);
goto callnext;
case '}':
if (parser_token_put(pa, TK_DICTC) == NULL)
return (ENOMEM);
goto callnext;
case '[':
if (parser_token_put(pa, TK_ARRAYO) == NULL)
return (ENOMEM);
goto callnext;
case ']':
if (parser_token_put(pa, TK_ARRAYC) == NULL)
return (ENOMEM);
goto callnext;
case '"':
/* Opening quote, clear from edge action. */
pa->sc_state = SC_STRING_APPEND;
goto callnext;
case ';':
pa->sc_state = SC_COMMENT;
goto callnext;
case ':':
pa->sc_state = SC_BASE64_APPEND;
goto callnext;
}
if (c == '-' || c == '+' || isdigit(c)) {
pa->sc_state = SC_SINT64;
goto callnext;
} else
if (isspace(c)) {
pa->sc_state = SC_WHITE;
goto callnext;
}
pa->sc_state = SC_SYMBOL;
goto callnext;
case SC_BASE64_APPEND:
if (strchr(base64abc, c) != NULL || c == '=') {
char d = scanner_base64_decode(c);
if (scanner_ensure_strlen(pa))
return (ENOMEM);
switch (pa->sc_base64_size % 24) {
case 0:
pa->sc_string[pa->sc_strcur] = d << 2;
break;
case 6:
/* LINTED: sc_string & d are u_char */
pa->sc_string[pa->sc_strcur] |= d >> 4;
pa->sc_strcur++;
pa->sc_string[pa->sc_strcur] = d << 4;
break;
case 12:
/* LINTED: sc_string & d are u_char */
pa->sc_string[pa->sc_strcur] |= d >> 2;
pa->sc_strcur++;
pa->sc_string[pa->sc_strcur] = d << 6;
break;
case 18:
pa->sc_string[pa->sc_strcur++] |= d;
break;
}
pa->sc_base64_size += 6;
} else {
pa->sc_state = SC_BASE64_MAYBE_NEXT;
goto callcc;
}
break;
case SC_BASE64_MAYBE_NEXT:
if (pa->sc_base64_size == 0) {
VERBOSE("scanner: BASE64 first chunk empty");
pa->sc_state = SC_ERROR;
goto callcc;
}
if (c == '.') {
pa->sc_state = SC_BASE64_NEXT;
goto callnext;
}
if (! isspace(c)) {
pa->sc_state = SC_BASE64_CLOSE;
goto callcc;
}
break;
case SC_BASE64_NEXT:
if (strchr(base64abc, c) != NULL || c == '=') {
pa->sc_state = SC_BASE64_APPEND;
goto callcc;
}
if (! isspace(c)) {
pa->sc_state = SC_ERROR;
goto callcc;
}
break;
case SC_BASE64_CLOSE:
{
u_char *s;
/* Length must always be multiple of 3 bytes. */
if (pa->sc_base64_size % 24) {
/* Warrants we're long enough to roundup. */
if (scanner_ensure_strlen(pa))
return (ENOMEM);
/* Don't overwrite string[strcur]! */
memset(pa->sc_string + pa->sc_strcur + 1, 0,
pa->sc_strlen - pa->sc_strcur - 1);
pa->sc_base64_size =
roundup(pa->sc_base64_size, 24);
}
/* Convert length to bytes. */
pa->sc_base64_size = roundup(pa->sc_base64_size, 8)/8;
s = _PROP_MALLOC(pa->sc_base64_size, M_TEMP);
if (s == NULL) {
VERBOSE("scanner: ENOMEM data");
return (ENOMEM);
}
memcpy(s, pa->sc_string, pa->sc_base64_size);
if ((t = parser_token_put(pa, TK_DATA)) == NULL)
return (ENOMEM);
t->tok_data_buf = s;
t->tok_data_len = pa->sc_base64_size;
pa->sc_state = SC_TOPLEVEL;
goto callcc;
}
/* UNREACHED */
case SC_STRING_APPEND:
if (c == '\\') {
pa->sc_state = SC_QUOTED;
goto callnext;
}
if (c == '"') {
/* Closing quote, see if concatenation follows. */
pa->sc_state = SC_STRING_MAYBE_NEXT;
goto callnext;
}
if (! isprint(c)) {
pa->sc_state = SC_ERROR;
goto callcc;
}
/* Append from edge action. */
break;
case SC_STRING_MAYBE_NEXT:
if (c == '.') {
pa->sc_state = SC_STRING_CONCAT;
goto callnext;
}
if (! isspace(c)) {
pa->sc_state = SC_STRING_CLOSE;
goto callcc;
}
break;
case SC_STRING_CONCAT:
if (c == '"') {
/* Opening quote of concat string. */
pa->sc_state = SC_STRING_APPEND;
goto callnext;
}
if (! isspace(c)) {
pa->sc_state = SC_ERROR;
goto callcc;
}
break;
case SC_STRING_CLOSE:
{
char *s;
s = strdup((const char *)pa->sc_string);
if (s == NULL) {
VERBOSE("scanner: ENOMEM string");
return (ENOMEM);
}
if ((t = parser_token_put(pa, TK_STRING)) == NULL)
return (ENOMEM);
t->tok_string = s;
pa->sc_state = SC_TOPLEVEL;
goto callcc;
}
/* NOTREACHED */
case SC_QUOTED:
{
u_char d;
if (scanner_ensure_strlen(pa))
return (ENOMEM);
switch (d = c) {
case '\\': d = '\\'; break;
case 'n': d = '\n'; break;
case 't': d = '\t'; break;
case '"': d = '\"'; break;
}
pa->sc_string[pa->sc_strcur++] = d;
pa->sc_string[pa->sc_strcur] = '\0';
pa->sc_state = SC_STRING_APPEND;
goto callnext;
}
/* NOTREACHED */
case SC_WHITE:
if (! isspace(c)) {
if ((t = parser_token_put(pa, TK_WHITE)) == NULL)
return (ENOMEM);
pa->sc_state = SC_TOPLEVEL;
goto callcc;
}
break;
case SC_SINT64:
if (! isdigit(c) && c != '+' && c != '-') {
long long n;
char *end;
n = strtoll((const char *)pa->sc_string, &end, 10);
if (*end != '\0' || n > INT64_MAX || n < INT64_MIN) {
VERBOSE("scanner: wrong SINT64 '%s'",
pa->sc_string);
return (EINVAL);
}
if ((t = parser_token_put(pa, TK_SINT64)) == NULL)
return (ENOMEM);
t->tok_unsigned = (int64_t)n;
pa->sc_state = SC_TOPLEVEL;
goto callcc;
}
break;
case SC_UINT64:
if (! isxdigit(c)) {
unsigned long long u;
char *end;
u = strtoull((const char *)pa->sc_string, &end, 16);
if (*end != '\0' || u > UINT64_MAX) {
VERBOSE("scanner: wrong UINT64 '%s'",
pa->sc_string);
return (EINVAL);
}
if ((t = parser_token_put(pa, TK_UINT64)) == NULL)
return (ENOMEM);
t->tok_unsigned = (uint64_t)u;
pa->sc_state = SC_TOPLEVEL;
goto callcc;
}
break;
case SC_SYMBOL:
if (isspace(c) || c == '\\') {
char *s;
s = strdup((const char *)pa->sc_string);
if (s == NULL) {
VERBOSE("scanner: ENOMEM symbol");
return (ENOMEM);
}
if ((t = parser_token_put(pa, TK_SYMBOL)) == NULL) {
_PROP_FREE(s, M_TEMP);
return (ENOMEM);
}
t->tok_string = s;
pa->sc_state = SC_TOPLEVEL;
goto callcc;
}
if (! isprint(c)) {
pa->sc_state = SC_ERROR;
goto callcc;
}
break;
case SC_COMMENT:
if (c == '\n' || c == '\r') {
pa->sc_state = SC_TOPLEVEL;
goto callnext;
}
break;
case SC_ERROR:
VERBOSE("scanner: wrong char '%c' (line %zd char %zd) in "
"state %s", c, pa->sc_pos_line, pa->sc_pos_col,
scanner_state_name(pa->sc_last));
return (EINVAL);
}
callnext:
keepchar = FALSE;
callcc:
if (pa->sc_state != pa->sc_prev) {
VERBOSE("scanner: %-17s --> %-17s %s\t[%d, %d]",
((keepchar && pa->sc_prev != SC_FIRST) ?
"" : scanner_state_name(pa->sc_prev)),
scanner_state_name(pa->sc_state),
scanner_quote_char(c), pa->sc_pos_line, pa->sc_pos_col);
} else {
VERBOSE("scanner: %-17s ::: %-17s %s\t[%d, %d]", "", "",
scanner_quote_char(c), pa->sc_pos_line, pa->sc_pos_col);
}
/* Be specific about transitions, let compiler deal w/redundancy. */
if (SC_ARROW_P(SC_TOPLEVEL, SC_BASE64_APPEND)) {
pa->sc_base64_size = 0;
pa->sc_strcur = 0;
}
if (SC_ARROW_P(SC_TOPLEVEL, SC_STRING_APPEND) ||
SC_ARROW_P(SC_TOPLEVEL, SC_SYMBOL) ||
SC_ARROW_P(SC_TOPLEVEL, SC_UINT64) ||
SC_ARROW_P(SC_TOPLEVEL, SC_SINT64)) {
pa->sc_strcur = 0;
}
if (SC_CYCLE_P(SC_UINT64) ||
SC_CYCLE_P(SC_SINT64) ||
SC_CYCLE_P(SC_STRING_APPEND) ||
SC_CYCLE_P(SC_SYMBOL) ||
SC_ARROW_P(SC_TOPLEVEL, SC_SINT64) ||
SC_ARROW_P(SC_TOPLEVEL, SC_SYMBOL)) {
if (scanner_ensure_strlen(pa))
return (ENOMEM);
pa->sc_string[pa->sc_strcur++] = c;
pa->sc_string[pa->sc_strcur] = '\0';
}
#if !defined(_STANDALONE)
if (! keepchar) {
if (c == '\n') {
pa->sc_pos_line++;
pa->sc_pos_col = 1;
} else {
if (c == '\t')
pa->sc_pos_col = roundup(pa->sc_pos_col, 8);
else
pa->sc_pos_col++;
}
}
#endif
/* Commited to new state. */
if (pa->sc_state != pa->sc_prev)
pa->sc_last = pa->sc_prev;
pa->sc_prev = pa->sc_state;
if (keepchar)
goto dispatch;
else
goto advance;
/* UNREACHED */
}
#undef SC_CYCLE_P
#undef SC_ARROW_P
static void
parser_frame_free(struct frame *e)
{
if (e->se_symbol)
_PROP_FREE(__UNCONST(e->se_symbol), M_TEMP);
if (e->se_object != NULL)
prop_object_release(e->se_object);
_PROP_FREE(e, M_TEMP);
}
static boolean_t
parser_frame_enter(struct parser *pa, prop_object_t o)
{
struct frame *e;
if (o == NULL)
return (TRUE);
e = _PROP_MALLOC(sizeof(struct frame), M_TEMP);
if (e == NULL)
return (TRUE);
memset(e, 0, sizeof(struct frame));
e->se_object = o;
SIMPLEQ_INSERT_HEAD(&pa->pa_stack, e, se_link);
return (FALSE);
}
static int
parser_frame_store(struct parser *pa, prop_object_t o)
{
struct frame *e;
prop_object_t the;
e = SIMPLEQ_FIRST(&pa->pa_stack);
if (e == NULL) {
VERBOSE(" parser: stack underflow");
return (EINVAL);
}
the = e->se_object;
switch (prop_object_type(the)) {
case PROP_TYPE_ARRAY:
_PROP_ASSERT(e->se_symbol == NULL);
if (prop_array_add(the, o) == FALSE)
return (ENOMEM);
break;
case PROP_TYPE_DICTIONARY:
_PROP_ASSERT(e->se_symbol != NULL);
if (prop_dictionary_set(the, e->se_symbol, o) == FALSE)
return (ENOMEM);
_PROP_FREE(__UNCONST(e->se_symbol), M_TEMP);
e->se_symbol = NULL;
break;
default:
VERBOSE(" parser: wrong object on stack, not compound");
return (EINVAL);
}
prop_object_release(o);
return (0);
}
static int
parser_frame_leave(struct parser *pa)
{
struct frame *e;
prop_object_t o;
/* Get hold of the lower object. */
if ((e = SIMPLEQ_FIRST(&pa->pa_stack)) == NULL) {
VERBOSE(" parser: stack underflow");
return (EINVAL);
}
SIMPLEQ_REMOVE_HEAD(&pa->pa_stack, se_link);
/* Move it to finished objects if it's toplevel. */
if (SIMPLEQ_EMPTY(&pa->pa_stack)) {
SIMPLEQ_INSERT_TAIL(&pa->pa_consq, e, se_link);
_PROP_ASSERT(e->se_object);
return (0);
}
/* Otherwise insert into current compound. */
o = e->se_object;
/* Make sure ${o} isn't released, parser_frame_store() will do it. */
e->se_object = NULL;
parser_frame_free(e);
return (parser_frame_store(pa, o));
}
static int
prop_scn_parser_create(prop_parser_t *pp)
{
struct parser *pa;
pa = _PROP_MALLOC(sizeof(struct parser), M_TEMP);
if (pa == NULL)
return (ENOMEM);
memset(pa, 0, sizeof(struct parser));
SIMPLEQ_INIT(&pa->pa_tokens);
SIMPLEQ_INIT(&pa->pa_stack);
SIMPLEQ_INIT(&pa->pa_consq);
#if !defined(_STANDALONE)
/* Text editors tend to count from 1, be friendly. */
pa->sc_pos_line = 1;
pa->sc_pos_col = 1;
#endif
if (parser_token_put(pa, TK_FIRST) == NULL) {
_PROP_FREE(pa, M_TEMP);
return (ENOMEM);
}
*pp = pa;
return (0);
}
static void
prop_scn_parser_destroy(prop_parser_t arg)
{
struct parser *pa = arg;
struct token *t;
struct frame *e;
if (pa->sc_string)
_PROP_FREE(pa->sc_string, M_TEMP);
/* Free any pending tokens. */
while ((t = SIMPLEQ_FIRST(&pa->pa_tokens)) != NULL) {
SIMPLEQ_REMOVE_HEAD(&pa->pa_tokens, tok_link);
parser_token_free(t, TRUE);
}
/* Free any active stack frames. */
while ((e = SIMPLEQ_FIRST(&pa->pa_stack)) != NULL) {
SIMPLEQ_REMOVE_HEAD(&pa->pa_stack, se_link);
parser_frame_free(e);
}
/* Free any finished objects. */
while ((e = SIMPLEQ_FIRST(&pa->pa_consq)) != NULL) {
SIMPLEQ_REMOVE_HEAD(&pa->pa_consq, se_link);
parser_frame_free(e);
}
_PROP_FREE(pa, M_TEMP);
}
static prop_object_t
prop_scn_parser_yield(prop_parser_t arg)
{
struct parser *pa = arg;
struct frame *e;
prop_object_t o;
if ((e = SIMPLEQ_FIRST(&pa->pa_consq)) == NULL)
return (NULL);
SIMPLEQ_REMOVE_HEAD(&pa->pa_consq, se_link);
o = e->se_object;
e->se_object = NULL;
_PROP_ASSERT(e);
_PROP_ASSERT(o);
parser_frame_free(e);
return (o);
}
static int
prop_scn_parser_exec(prop_parser_t arg, const u_char *input, size_t length)
{
static const char *__truths[] = { "true", "yes", "on", NULL };
static const char *__lies[] = { "false", "no", "off", NULL };
prop_object_t the;
struct frame *frame;
struct token *t;
struct parser *pa = arg;
int nexttoken, ret;
ret = prop_scanner_exec(pa, input, length);
if (ret)
return (ret);
advance:
if ((t = SIMPLEQ_FIRST(&pa->pa_tokens)) == NULL)
return (0);
dispatch:
pa->pa_prev = pa->pa_state;
nexttoken = FALSE; /* for callcc */
switch (pa->pa_state) {
case PA_TOPLEVEL:
switch (t->tok_type) {
case TK_FIRST:
/* XXX read version token */
goto callnext;
case TK_DICTO:
if (parser_frame_enter(pa,
(prop_object_t)prop_dictionary_create())) {
VERBOSE(" parser: ENOMEM dictionary");
return (ENOMEM);
}
pa->pa_state = PA_DICTIONARY;
goto callnext;
case TK_ARRAYO:
if (parser_frame_enter(pa,
(prop_object_t)prop_array_create())) {
VERBOSE(" parser: ENOMEM array");
return (ENOMEM);
}
pa->pa_state = PA_ARRAY;
goto callnext;
case TK_LAST:
if (SIMPLEQ_NEXT(t, tok_link) != NULL) {
VERBOSE(" parser: stack not empty at EOF");
return (EINVAL);
}
return (0);
default:
/* GCC tries to be smart but fails. */
break;
}
if (t->tok_type != TK_WHITE) {
pa->pa_state = PA_ERROR;
goto callcc;
}
break;
case PA_ARRAY:
if (t->tok_type == TK_ARRAYC) {
if ((ret = parser_frame_leave(pa)) != 0) {
if (ret == EINVAL)
VERBOSE(" parser: [%d, %d] "
"misplaced ']'",
t->tok_pos_line, t->tok_pos_col);
return (ret);
}
pa->pa_state = PA_HOME;
goto callcc;
} else
if (t->tok_type != TK_WHITE) {
pa->pa_state = PA_OBJECT;
goto callcc;
}
break;
case PA_DICTIONARY:
switch (t->tok_type) {
case TK_DICTC:
if ((ret = parser_frame_leave(pa)) != 0) {
if (ret == EINVAL)
VERBOSE(" parser: [%d, %d] "
"misplaced '}'",
t->tok_pos_line, t->tok_pos_col);
return (ret);
}
pa->pa_state = PA_HOME;
goto callcc;
case TK_SYMBOL:
frame = SIMPLEQ_FIRST(&pa->pa_stack);
_PROP_ASSERT(frame && frame->se_symbol == NULL);
frame->se_symbol = (const char *)t->tok_string;
pa->pa_state = PA_OBJECT;
goto callnext;
default:
/* GCC */
break;
}
/* WHITE or ERROR */
break;
case PA_OBJECT:
if (t->tok_type == TK_WHITE)
break;
switch (t->tok_type) {
case TK_STRING:
the = prop_scn_create_string(t);
break;
case TK_UINT64:
the = prop_scn_create_uint(t);
break;
case TK_SINT64:
the = prop_scn_create_sint(t);
break;
case TK_DATA:
the = prop_data_create_data_nocopy(t->tok_data_buf,
t->tok_data_len);
break;
case TK_SYMBOL:
/* Coerce SYMBOL to bool at value position. */
if (stroneof((const char *)t->tok_string, __truths))
the = prop_bool_create(TRUE);
else
if (stroneof((const char *)t->tok_string, __lies))
the = prop_bool_create(FALSE);
else {
VERBOSE(" parser: [%d, %d] wrong BOOL '%s'",
t->tok_pos_line, t->tok_pos_col,
t->tok_string);
return (EINVAL);
}
break;
case _TK_COMPOUND_VALUES:
/* Descend one level deeper via TOPLEVEL actions. */
pa->pa_state = PA_TOPLEVEL;
goto callcc;
default:
pa->pa_state = PA_ERROR;
goto callcc;
}
/* We're supposed to have valid simple object now. */
if (the == NULL) {
VERBOSE(" parser: ENOMEM for %s",
parser_token_name(t->tok_type));
return (ENOMEM);
}
/* Store it in current container. */
if (parser_frame_store(pa, the))
return (ENOMEM);
pa->pa_state = PA_HOME;
goto callcc;
case PA_HOME:
/*
* We've just finished an object (simple or compound).
* Continue where we came from -- at the parent container's
* main entry point. We get here through callcc.
*/
frame = SIMPLEQ_FIRST(&pa->pa_stack);
if (frame == NULL) {
pa->pa_state = PA_TOPLEVEL;
break;
}
_PROP_ASSERT(frame->se_object);
switch (prop_object_type(frame->se_object)) {
case PROP_TYPE_ARRAY:
pa->pa_state = PA_ARRAY;
break;
case PROP_TYPE_DICTIONARY:
pa->pa_state = PA_DICTIONARY;
break;
default:
VERBOSE(" parser: wrong object on stack");
return (EINVAL);
}
break;
case PA_ERROR:
VERBOSE(" parser: [%d, %d] wrong token %s in state %s",
t->tok_pos_line, t->tok_pos_col,
parser_token_desc(t), parser_state_name(pa->pa_last));
return (EINVAL);
}
/* Call to next implies token was accepted, so stay above. */
if (pa->pa_prev == pa->pa_state &&
(pa->pa_state == PA_TOPLEVEL || pa->pa_state == PA_ARRAY ||
pa->pa_state == PA_DICTIONARY))
if (t->tok_type != TK_WHITE) {
pa->pa_state = PA_ERROR;
goto callcc;
}
callnext:
nexttoken = TRUE;
callcc:
if (pa->pa_state != pa->pa_last) {
VERBOSE(" parser: %-17s --> %-17s %s",
(nexttoken ? parser_state_name(pa->pa_prev) : ""),
parser_state_name(pa->pa_state),
parser_token_desc(t));
pa->pa_last = pa->pa_prev;
} else {
VERBOSE(" parser: %-17s ::: %-17s %s", "", "",
parser_token_desc(t));
}
if (nexttoken) {
SIMPLEQ_REMOVE_HEAD(&pa->pa_tokens, tok_link);
parser_token_free(t, FALSE);
goto advance;
} else {
goto dispatch;
}
/* UNREACHED */
}
static boolean_t
format_string_quote(struct poec *ec, const char *s)
{
const char *t;
boolean_t ret;
if (! poec_push(ec, '"'))
return (TRUE);
while (*s) {
t = NULL;
switch (*s) {
case '\f': t = "\\f"; break;
case '\n': t = "\\n"; break;
case '\r': t = "\\r"; break;
case '\t': t = "\\t"; break;
case '\v': t = "\\v"; break;
case '"': t = "\\\""; break;
}
if (t)
ret = poec_append(ec, t);
else
ret = poec_push(ec, *s);
if (ret == FALSE)
return (TRUE);
s++;
}
if (! poec_push(ec, '"'))
return (TRUE);
return (FALSE);
}
static boolean_t
format_data_base64(struct poec *ec, const char *s, size_t size)
{
const u_char *b = (const u_char *)s;
size_t i;
u_int n;
int ret = 0; /* XXX gcc sux */
poec_push(ec, ':');
/* LINTED n & b[] are unsigned */
n = b[0] >> 2;
for (i = 0; i < size; i++) {
switch (i % 3) {
case 0:
/* LINTED: b[] is u_char */
ret = poec_push(ec, base64abc[b[i] >> 2]);
n = b[i] & 0x03;
break;
case 1:
/* LINTED: b[] is u_char */
ret = poec_push(ec,
base64abc[(n << 4) | (b[i] >> 4)]);
n = b[i] & 0x0f;
break;
case 2:
/* LINTED: b[] is u_char */
if (!poec_push(ec, base64abc[(n << 2) | (b[i] >> 6)]) ||
!poec_push(ec, base64abc[b[i] & 0x3f]))
ret = FALSE;
else
ret = TRUE;
break;
}
if (ret == FALSE)
return (TRUE);
}
/* Finish based on how many bytes of a triplet we already have. */
switch (size % 3) {
case 1:
if (!poec_push(ec, base64abc[n << 4]))
return (TRUE);
break;
case 2:
if (!poec_push(ec, base64abc[n << 2]))
return (TRUE);
break;
}
/* Finally, pad to multiple of four characters of encoded text. */
switch (size % 3) {
case 1:
if (!poec_push(ec, '='))
return (TRUE);
/* FALLTHROUGH */
case 2:
if (!poec_push(ec, '='))
return (TRUE);
}
return (FALSE);
}
static boolean_t
format_indent(struct poec *ec)
{
int i;
#if 0
VERBOSE(" format: %zd/%zdB used, nesting depth %d",
ec->poec_len, ec->poec_capacity, ec->poec_depth);
#endif
for (i = 0; i < ec->poec_depth; i++)
if (poec_push(ec, '\t') == FALSE) {
VERBOSE(" format: ENOMEM indent");
return (TRUE);
}
return (FALSE);
}
static char *
prop_scn_externalize(prop_object_t o)
{
char buf[32];
struct stack sp;
struct frame *e;
struct poec *ec;
char *s;
prop_object_t the;
boolean_t ret;
SIMPLEQ_INIT(&sp);
if ((ec = poec_create()) == NULL)
return (NULL);
/*
* We only work for compound objects, like XML codec does.
* Note that this ensures we'll record ${o} on the stack.
*/
switch (prop_object_type(o)) {
case PROP_TYPE_ARRAY:
case PROP_TYPE_DICTIONARY:
break;
default:
goto lose;
}
the = o;
again:
switch (prop_object_type(the)) {
case PROP_TYPE_BOOL:
VERBOSE(" format: bool [%d]", ec->poec_depth);
if (prop_bool_true(the))
ret = poec_append(ec, "True");
else
ret = poec_append(ec, "False");
if (ret == FALSE)
goto lose;
break;
case PROP_TYPE_NUMBER:
VERBOSE(" format: number [%d]", ec->poec_depth);
if (prop_number_unsigned(the))
snprintf(buf, sizeof(buf), "#%" PRIx64,
prop_number_unsigned_integer_value(the));
else
snprintf(buf, sizeof(buf), "%" PRId64,
prop_number_integer_value(the));
if (poec_append(ec, buf) == FALSE)
goto lose;
break;
case PROP_TYPE_STRING:
VERBOSE(" format: string [%d]", ec->poec_depth);
if (format_string_quote(ec, prop_string_cstring_nocopy(the)))
goto lose;
break;
case PROP_TYPE_DATA:
VERBOSE(" format: data [%d]", ec->poec_depth);
if (format_data_base64(ec, prop_data_data_nocopy(the),
prop_data_size(the)))
goto lose;
break;
case PROP_TYPE_ARRAY:
VERBOSE(" format: array [%d]", ec->poec_depth);
if (!poec_append(ec, "["))
goto lose;
ec->poec_depth++;
if ((e = _PROP_MALLOC(sizeof(struct frame), M_TEMP)) == NULL)
goto lose;
memset(e, 0, sizeof(struct frame));
SIMPLEQ_INSERT_HEAD(&sp, e, se_link);
e->se_object = the;
e->se_iter = prop_array_iterator(the);
if (e->se_iter == NULL)
goto lose;
break;
case PROP_TYPE_DICTIONARY:
VERBOSE(" format: dictionary [%d]", ec->poec_depth);
if (!poec_append(ec, "{")) {
VERBOSE(" format: ENOMEM dictionary open");
goto lose;
}
ec->poec_depth++;
if ((e = _PROP_MALLOC(sizeof(struct frame), M_TEMP)) == NULL) {
VERBOSE(" format: ENOMEM stack frame");
goto lose;
}
memset(e, 0, sizeof(struct frame));
SIMPLEQ_INSERT_HEAD(&sp, e, se_link);
e->se_object = the;
e->se_iter = prop_dictionary_iterator(the);
if (e->se_iter == NULL) {
VERBOSE(" format: ENOMEM dictionary iterator");
goto lose;
}
break;
default:
VERBOSE(" format: object %p wrong type %d", the,
prop_object_type(the));
goto lose;
}
if (! poec_append(ec, "\n")) {
VERBOSE(" format: ENOMEM newline after object");
goto lose;
}
pop:
e = SIMPLEQ_FIRST(&sp);
if (e == NULL) {
_PROP_ASSERT(ec->poec_depth == 0);
printf("DONE\n"); /* XXX done */
}
_PROP_ASSERT(e->se_iter != NULL);
/* Grab next object. */
o = prop_object_iterator_next(e->se_iter);
if (o == NULL) {
SIMPLEQ_REMOVE_HEAD(&sp, se_link);
_PROP_ASSERT(ec->poec_depth != 0);
ec->poec_depth --;
if (format_indent(ec)) {
VERBOSE(" format: ENOMEM close indent");
goto lose;
}
if (prop_object_type(e->se_object) == PROP_TYPE_DICTIONARY)
ret = poec_append(ec, "}\n");
else
ret = poec_append(ec, "]\n");
if (ret == FALSE) {
VERBOSE(" format: ENOMEM close compound");
goto lose;
}
prop_object_iterator_release(e->se_iter);
_PROP_FREE(e, M_TEMP);
if (SIMPLEQ_EMPTY(&sp))
goto done;
else
goto pop;
}
/* Lookup the real object if we got indirect reference. */
if (prop_object_type(o) == PROP_TYPE_DICT_KEYSYM) {
const char *r;
prop_object_t p;
p = prop_dictionary_get_keysym((prop_dictionary_t)e->se_object,
(prop_dictionary_keysym_t)o);
_PROP_ASSERT(p != NULL);
r = prop_keysym_str((prop_dictionary_keysym_t)o);
if (r == NULL) {
VERBOSE(" format: EINVAL dictionary key");
goto lose;
}
if (format_indent(ec) || poec_append(ec, r) == FALSE ||
poec_push(ec, '\t') == FALSE) {
VERBOSE(" format: ENOMEM dictionary key");
goto lose;
}
the = p;
} else {
if (format_indent(ec)) {
VERBOSE(" format: ENOMEM array indent");
goto lose;
}
the = o;
}
/* We've got a fresh object from the compound, analyse it. */
goto again;
done:
/* Prepare the result for caller. */
ec->poec_buf[ec->poec_len] = '\0';
s = ec->poec_buf;
/* The stack is empty at this point, just free externalize context. */
poec_destroy(ec);
return (s);
/*NOTREACHED*/
lose:
VERBOSE(" format: LOST");
while ((e = SIMPLEQ_FIRST(&sp)) != NULL) {
SIMPLEQ_REMOVE_HEAD(&sp, se_link);
if (e->se_iter)
prop_object_iterator_release(e->se_iter);
_PROP_FREE(e, M_TEMP);
}
if (ec->poec_buf)
_PROP_FREE(ec->poec_buf, M_TEMP);
poec_destroy(ec);
return (NULL);
}
const struct _prop_codec prop_codec_scn = {
.codec_name = "scn",
.codec_sense = (const u_char *)".{[;",
.codec_externalize_compound = prop_scn_externalize,
.codec_parser_create = prop_scn_parser_create,
.codec_parser_exec = prop_scn_parser_exec,
.codec_parser_yield = prop_scn_parser_yield,
.codec_parser_destroy = prop_scn_parser_destroy,
};