// The next few routines take a "character class" as argument.
// e.g., "a-zA-Z", or "^ \t\n"
// (ranges indicated by - except in first position;
// ^ is first position means "not in" the following class)
// Splitl splits s[0:n] just before first character of class cl.
// Answers go in (p1, n1) and (p2, n2).
// If no split, the whole thing goes in the first component.
// Note: answers contain pointers into original string.
void
_splitl(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2)
{
Rune* p;
// Splitr splits s[0:n] just after last character of class cl.
// Answers go in (p1, n1) and (p2, n2).
// If no split, the whole thing goes in the last component.
// Note: answers contain pointers into original string.
void
_splitr(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2)
{
Rune* p;
// Splitall splits s[0:n] into parts that are separated by characters from class cl.
// Each part will have nonzero length.
// At most alen parts are found, and pointers to their starts go into
// the strarr array, while their lengths go into the lenarr array.
// The return value is the number of parts found.
int
_splitall(Rune* s, int n, Rune* cl, Rune** strarr, int* lenarr, int alen)
{
int i;
Rune* p;
Rune* q;
Rune* slast;
// Find part of s that excludes leading and trailing whitespace,
// and return that part in *pans (and its length in *panslen).
void
_trimwhite(Rune* s, int n, Rune** pans, int* panslen)
{
Rune* p;
Rune* q;
// _Strnclass returns a pointer to the first element of s[0:n] that is
// a member of class cl, nil if none.
Rune*
_Strnclass(Rune* s, Rune* cl, int n)
{
Rune* p;
// _Strnrclass returns a pointer to the last element of s[0:n] that is
// a member of class cl, nil if none
Rune*
_Strnrclass(Rune* s, Rune* cl, int n)
{
Rune* p;
if(s == nil || *s == 0 || n == 0)
return nil;
p = s + n - 1;
while(p >= s) {
if(_inclass(*p, cl))
return p;
p--;
};
return nil;
}
// Is c in the class cl?
int
_inclass(Rune c, Rune* cl)
{
int n;
int ans;
int negate;
int i;
n = _Strlen(cl);
if(n == 0)
return 0;
ans = 0;
negate = 0;
if(cl[0] == '^') {
negate = 1;
cl++;
n--;
}
for(i = 0; i < n; i++) {
if(cl[i] == '-' && i > 0 && i < n - 1) {
if(c >= cl[i - 1] && c <= cl[i + 1]) {
ans = 1;
break;
}
i++;
}
else if(c == cl[i]) {
ans = 1;
break;
}
}
if(negate)
ans = !ans;
return ans;
}
// Is pre a prefix of s?
int
_prefix(Rune* pre, Rune* s)
{
int ns;
int n;
int k;
// Like Strcmp, but use exactly n chars of s1 (assume s1 has at least n chars).
// Also, do a case-insensitive match, assuming s2
// has no chars in [A-Z], only their lowercase versions.
// (This routine is used for in-place keyword lookup, where s2 is in a keyword
// list and s1 is some substring, possibly mixed-case, in a buffer.)
int
_Strncmpci(Rune *s1, int n1, Rune *s2)
{
Rune c1, c2;
// emalloc and copy n chars of s (assume s is at least that long),
// and add 0 terminator.
// Return nil if n==0.
Rune*
_Strndup(Rune* s, int n)
{
Rune* ans;
if(n <= 0)
return nil;
ans = _newstr(n);
memmove(ans, s, n*sizeof(Rune));
ans[n] = 0;
setmalloctag(ans, getcallerpc(&s));
return ans;
}
// emalloc enough room for n Runes, plus 1 null terminator.
// (Not initialized to anything.)
Rune*
_newstr(int n)
{
Rune* ans;
ans = (Rune*)emalloc((n+1)*sizeof(Rune));
setmalloctag(ans, getcallerpc(&n));
return ans;
}
// emalloc and copy s+t
Rune*
_Strdup2(Rune* s, Rune* t)
{
int ns, nt;
Rune* ans;
Rune* p;
ns = _Strlen(s);
nt = _Strlen(t);
if(ns+nt == 0)
return nil;
ans = _newstr(ns+nt);
p = _Stradd(ans, s, ns);
p = _Stradd(p, t, nt);
*p = 0;
setmalloctag(ans, getcallerpc(&s));
return ans;
}
// Return emalloc'd substring s[start:stop],
Rune*
_Strsubstr(Rune* s, int start, int stop)
{
Rune* t;
// Convert buf[0:n], bytes whose character set is chset,
// into a emalloc'd null-terminated Unicode string.
Rune*
toStr(uchar* buf, int n, int chset)
{
int i;
int m;
Rune ch;
Rune* ans;
switch(chset) {
case US_Ascii:
case ISO_8859_1:
ans = (Rune*)emalloc((n+1)*sizeof(Rune));
for(i = 0; i < n; i++)
ans[i] = buf[i];
ans[n] = 0;
break;
case UTF_8:
m = 0;
for(i = 0; i < n; ) {
i += chartorune(&ch, (char*)(buf+i));
m++;
}
ans = (Rune*)emalloc((m+1)*sizeof(Rune));
m = 0;
for(i = 0; i < n; ) {
i += chartorune(&ch, (char*)(buf+i));
ans[m++] = ch;
}
ans[m] = 0;
break;
// Convert buf[0:n], Unicode characters,
// into an emalloc'd null-terminated string in character set chset.
// Use Runeerror for unconvertable characters.
uchar*
fromStr(Rune* buf, int n, int chset)
{
uchar* ans;
int i, lim, m;
Rune ch;
uchar* p;
uchar s[UTFmax];
ans = nil;
switch(chset) {
case US_Ascii:
case ISO_8859_1:
ans = (uchar*)emalloc(n+1);
lim = (chset==US_Ascii)? 127 : 255;
for(i = 0; i < n; i++) {
ch = buf[i];
if(ch > lim)
ch = Runeerror;
ans[i] = ch;
}
ans[n] = 0;
break;
case UTF_8:
m = 0;
for(i = 0; i < n; i++) {
m += runetochar((char*)s, &buf[i]);
}
ans = (uchar*)emalloc(m+1);
p = ans;
for(i = 0; i < n; i++)
p += runetochar((char*)p, &buf[i]);
*p = 0;
break;