/*-
* Copyright (c) 2006, 2007, 2025 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Jason R. Thorpe.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*
* _prop_intern_skip_whitespace --
* Skip and span of whitespace.
*/
const char *
_prop_intern_skip_whitespace(const char *cp)
{
while (_PROP_ISSPACE(*cp)) {
cp++;
}
return cp;
}
/*
* _prop_intern_match --
* Returns true if the two character streams match.
*/
bool
_prop_intern_match(const char *str1, size_t len1,
const char *str2, size_t len2)
{
return (len1 == len2 && memcmp(str1, str2, len1) == 0);
}
/*
* _prop_xml_intern_skip_comment --
* Skip the body and end tag of an XML comment.
*/
static bool
_prop_xml_intern_skip_comment(struct _prop_object_internalize_context *ctx)
{
const char *cp = ctx->poic_cp;
/*
* _prop_xml_intern_find_tag --
* Find the next tag in an XML stream. Optionally compare the found
* tag to an expected tag name. State of the context is undefined
* if this routine returns false. Upon success, the context points
* to the first octet after the tag.
*/
bool
_prop_xml_intern_find_tag(struct _prop_object_internalize_context *ctx,
const char *tag, _prop_tag_type_t type)
{
const char *cp;
size_t taglen;
taglen = tag != NULL ? strlen(tag) : 0;
start_over:
cp = ctx->poic_cp;
/*
* Find the start of the tag.
*/
cp = _prop_intern_skip_whitespace(cp);
if (*cp != '<') {
return false;
}
ctx->poic_tag_start = cp++;
if (_PROP_EOF(*cp)) {
return false;
}
if (*cp == '!') {
if (cp[1] != '-' || cp[2] != '-') {
return false;
}
/*
* Comment block -- only allowed if we are allowed to
* return a start tag.
*/
if (type == _PROP_TAG_TYPE_END) {
return false;
}
ctx->poic_cp = cp + 3;
if (_prop_xml_intern_skip_comment(ctx) == false) {
return false;
}
goto start_over;
}
if (*cp == '/') {
if (type != _PROP_TAG_TYPE_END &&
type != _PROP_TAG_TYPE_EITHER) {
return false;
}
cp++;
if (_PROP_EOF(*cp)) {
return false;
}
ctx->poic_tag_type = _PROP_TAG_TYPE_END;
} else {
if (type != _PROP_TAG_TYPE_START &&
type != _PROP_TAG_TYPE_EITHER) {
return false;
}
ctx->poic_tag_type = _PROP_TAG_TYPE_START;
}
ctx->poic_tagname = cp;
while (!_PROP_ISSPACE(*cp) && *cp != '/' && *cp != '>') {
if (_PROP_EOF(*cp)) {
return false;
}
cp++;
}
ctx->poic_tagname_len = cp - ctx->poic_tagname;
/* Make sure this is the tag we're looking for. */
if (tag != NULL &&
(taglen != ctx->poic_tagname_len ||
memcmp(tag, ctx->poic_tagname, taglen) != 0)) {
return false;
}
/* Check for empty tag. */
if (*cp == '/') {
if (ctx->poic_tag_type != _PROP_TAG_TYPE_START) {
return false; /* only valid on start tags */
}
ctx->poic_is_empty_element = true;
cp++;
if (_PROP_EOF(*cp) || *cp != '>') {
return false;
}
} else {
ctx->poic_is_empty_element = false;
}
/* Easy case of no arguments. */
if (*cp == '>') {
ctx->poic_tagattr = NULL;
ctx->poic_tagattr_len = 0;
ctx->poic_tagattrval = NULL;
ctx->poic_tagattrval_len = 0;
ctx->poic_cp = cp + 1;
return true;
}
_PROP_ASSERT(!_PROP_EOF(*cp));
cp++;
if (_PROP_EOF(*cp)) {
return false;
}
cp = _prop_intern_skip_whitespace(cp);
if (_PROP_EOF(*cp)) {
return false;
}
ctx->poic_tagattr = cp;
while (!_PROP_ISSPACE(*cp) && *cp != '=') {
if (_PROP_EOF(*cp)) {
return false;
}
cp++;
}
ctx->poic_tagattr_len = cp - ctx->poic_tagattr;
cp++;
if (*cp != '\"') {
return false;
}
cp++;
if (_PROP_EOF(*cp)) {
return false;
}
/*
* _prop_xml_intern_by_tag --
* Determine the object type from the tag in the context and
* internalize it.
*/
static prop_object_t
_prop_xml_intern_by_tag(struct _prop_object_internalize_context *ctx)
{
const struct _prop_object_internalizer *poi;
prop_object_t obj, parent_obj;
void *data, *iter;
prop_object_internalizer_continue_t iter_func;
struct _prop_stack stack;
#define ADDCHAR(x) \
do { \
if (target) { \
if (tarindex >= targsize) { \
return false; \
} \
target[tarindex] = (x); \
} \
tarindex++; \
} while (/*CONSTCOND*/0)
/*
* _prop_json_intern_decode_uesc_getu16 --
* Get the 16-bit value from a "u-escape" ("\uXXXX").
*/
static unsigned int
_prop_json_intern_decode_uesc_getu16(const char *src, unsigned int idx,
uint16_t *valp)
{
unsigned int i;
uint16_t val;
unsigned char c;
/*
* _prop_json_intern_decode_uesc --
* Decode a JSON UTF-16 "u-escape" ("\uXXXX").
*/
static int
_prop_json_intern_decode_uesc(const char *src, char *c, unsigned int *cszp)
{
unsigned int idx = 0;
uint32_t code;
uint16_t code16[2] = { 0, 0 };
idx = _prop_json_intern_decode_uesc_getu16(src, idx, &code16[0]);
if (idx == 0) {
return 0;
}
if (! SURROGATE_P(code16[0])) {
/* Simple case: not a surrogate pair */
code = code16[0];
} else if (HIGH_SURROGAGE_P(code16[0])) {
idx = _prop_json_intern_decode_uesc_getu16(src, idx,
&code16[1]);
if (idx == 0) {
return 0;
}
/* Next code must be the low surrogate. */
if (! LOW_SURROGATE_P(code16[1])) {
return 0;
}
code = (((uint32_t)code16[0] - HS_FIRST) << HS_SHIFT) +
( code16[1] - LS_FIRST) +
0x10000;
} else {
/* Got the low surrogate first; this is an error. */
return 0;
}
/*
* Ok, we have the code point. Now convert it to UTF-8.
* First we'll just split into nybbles.
*/
uint8_t u = (code >> 20) & 0xf;
uint8_t v = (code >> 16) & 0xf;
uint8_t w = (code >> 12) & 0xf;
uint8_t x = (code >> 8) & 0xf;
uint8_t y = (code >> 4) & 0xf;
uint8_t z = (code ) & 0xf;
/*
* If we're digesting JSON, check for a byte order mark and
* skip it, if present. We should never see one, but we're
* allowed to detect and ignore it. (RFC 8259 section 8.1)
*/
if (fmt == PROP_FORMAT_JSON) {
if (((unsigned char)data[0] == 0xff &&
(unsigned char)data[1] == 0xfe) ||
((unsigned char)data[0] == 0xfe &&
(unsigned char)data[1] == 0xff)) {
ctx->poic_cp = data + 2;
}
/* No additional processing work to do for JSON. */
return ctx;
}
/*
* Skip any whitespace and XML preamble stuff that we don't
* know about / care about.
*/
for (;;) {
data = _prop_intern_skip_whitespace(data);
if (_PROP_EOF(*data) || *data != '<') {
goto bad;
}
/*
* Skip over the XML preamble that Apple XML property
* lists usually include at the top of the file.
*/
if (MATCH("?xml ") ||
MATCH("!DOCTYPE plist")) {
while (*data != '>' && !_PROP_EOF(*data)) {
data++;
}
if (_PROP_EOF(*data)) {
goto bad;
}
data++; /* advance past the '>' */
continue;
}
if (MATCH("<!--")) {
ctx->poic_cp = data + 4;
if (_prop_xml_intern_skip_comment(ctx) == false) {
goto bad;
}
data = ctx->poic_cp;
continue;
}
#undef MATCH
/*
* We don't think we should skip it, so let's hope we can
* parse it.
*/
break;
}
/* Ensure there's no trailing junk. */
if (parent_obj != NULL) {
ctx->poic_cp = _prop_intern_skip_whitespace(ctx->poic_cp);
if (!_PROP_EOF(*ctx->poic_cp)) {
prop_object_release(parent_obj);
parent_obj = NULL;
}
}
return parent_obj;
}
/*
* _prop_object_internalize_xml --
* Internalize a property list from XML data.
*/
static prop_object_t
_prop_object_internalize_xml(struct _prop_object_internalize_context *ctx,
const struct _prop_object_type_tags *initial_tag)
{
prop_object_t obj = NULL;
/* We start with a <plist> tag. */
if (_prop_xml_intern_find_tag(ctx, "plist",
_PROP_TAG_TYPE_START) == false) {
goto out;
}
/* Plist elements cannot be empty. */
if (ctx->poic_is_empty_element) {
goto out;
}
/*
* We don't understand any plist attributes, but Apple XML
* property lists often have a "version" attribute. If we
* see that one, we simply ignore it.
*/
if (ctx->poic_tagattr != NULL &&
!_PROP_TAGATTR_MATCH(ctx, "version")) {
goto out;
}
/* Next we expect to see opening main tag. */
if (_prop_xml_intern_find_tag(ctx,
initial_tag != NULL ? initial_tag->xml_tag
: NULL,
_PROP_TAG_TYPE_START) == false) {
goto out;
}
/*
* We've advanced past the closing main tag.
* Now we want </plist>.
*/
if (_prop_xml_intern_find_tag(ctx, "plist",
_PROP_TAG_TYPE_END) == false) {
prop_object_release(obj);
obj = NULL;
}
out:
return obj;
}
/*
* _prop_object_internalize --
* Internalize a property list from a NUL-terminated data blob.
*/
prop_object_t
_prop_object_internalize(const char *data,
const struct _prop_object_type_tags *initial_tag)
{
struct _prop_object_internalize_context *ctx;
prop_object_t obj;
prop_format_t fmt;
/*
* Skip all whitespace until and look at the first
* non-whitespace character to determine the format:
* An XML plist will always have '<' as the first non-ws
* character. If we encounter something else, we assume
* it is JSON.
*/
data = _prop_intern_skip_whitespace(data);
if (_PROP_EOF(*data)) {
return NULL;
}
/*
* If the file length is an integral number of pages, then we
* need to map a guard page at the end in order to provide the
* necessary NUL-termination of the buffer.
*/
bool need_guard = (sb.st_size & pgmask) == 0;