/** \file tokenize.c
*
* Tokenize a string, accommodating quoted strings.
*
* @addtogroup autoopts
* @{
*/
/*
* This file defines the string_tokenize interface
* This file is part of AutoOpts, a companion to AutoGen.
* AutoOpts is free software.
* AutoOpts is Copyright (C) 1992-2018 by Bruce Korb - all rights reserved
*
* AutoOpts is available under any one of two licenses. The license
* in use must be one of these two and the choice is under the control
* of the user of the license.
*
* The GNU Lesser General Public License, version 3 or later
* See the files "COPYING.lgplv3" and "COPYING.gplv3"
*
* The Modified Berkeley Software Distribution License
* See the file "COPYING.mbsd"
*
* These files have the following sha256 sums:
*
* 8584710e9b04216a394078dc156b781d0b47e1729104d666658aecef8ee32e95 COPYING.gplv3
* 4379e7444a0e2ce2b12dd6f5a52a27a4d02d39d247901d3285c88cf0d37f477b COPYING.lgplv3
* 13aa749a5b0a454917a944ed8fffc530b784f5ead522b1aacaf4ec8aa55a6239 COPYING.mbsd
*/
int max_token_ct = 2; /* allow for trailing NULL pointer & NUL on string */
if (str == NULL) goto enoent_res;
/*
* Trim leading white space. Use "ENOENT" and a NULL return to indicate
* an empty string was passed.
*/
str = SPN_WHITESPACE_CHARS(str);
if (*str == NUL) goto enoent_res;
/*
* Take an approximate count of tokens. If no quoted strings are used,
* it will be accurate. If quoted strings are used, it will be a little
* high and we'll squander the space for a few extra pointers.
*/
{
char const * pz = str;
do {
max_token_ct++;
pz = BRK_WHITESPACE_CHARS(pz+1);
pz = SPN_WHITESPACE_CHARS(pz);
} while (*pz != NUL);
/*=export_func ao_string_tokenize
*
* what: tokenize an input string
*
* arg: + char const * + string + string to be tokenized +
*
* ret_type: token_list_t *
* ret_desc: pointer to a structure that lists each token
*
* doc:
*
* This function will convert one input string into a list of strings.
* The list of strings is derived by separating the input based on
* white space separation. However, if the input contains either single
* or double quote characters, then the text after that character up to
* a matching quote will become the string in the list.
*
* The returned pointer should be deallocated with @code{free(3C)} when
* are done using the data. The data are placed in a single block of
* allocated memory. Do not deallocate individual token/strings.
*
* The structure pointed to will contain at least these two fields:
* @table @samp
* @item tkn_ct
* The number of tokens found in the input string.
* @item tok_list
* An array of @code{tkn_ct + 1} pointers to substring tokens, with
* the last pointer set to NULL.
* @end table
*
* There are two types of quoted strings: single quoted (@code{'}) and
* double quoted (@code{"}). Singly quoted strings are fairly raw in that
* escape characters (@code{\\}) are simply another character, except when
* preceding the following characters:
* @example
* @code{\\} double backslashes reduce to one
* @code{'} incorporates the single quote into the string
* @code{\n} suppresses both the backslash and newline character
* @end example
*
* Double quote strings are formed according to the rules of string
* constants in ANSI-C programs.
*
* example:
* @example
* #include <stdlib.h>
* int ix;
* token_list_t * ptl = ao_string_tokenize(some_string)
* for (ix = 0; ix < ptl->tkn_ct; ix++)
* do_something_with_tkn(ptl->tkn_list[ix]);
* free(ptl);
* @end example
* Note that everything is freed with the one call to @code{free(3C)}.
*
* err:
* NULL is returned and @code{errno} will be set to indicate the problem:
* @itemize @bullet
* @item
* @code{EINVAL} - There was an unterminated quoted string.
* @item
* @code{ENOENT} - The input string was empty.
* @item
* @code{ENOMEM} - There is not enough memory.
* @end itemize
=*/
token_list_t *
ao_string_tokenize(char const * str)
{
token_list_t * res = alloc_token_list(str);
ch_t * pzDest;
/*
* Now copy each token into the output buffer.
*/
if (res == NULL)
return res;