/*
 * Copyright (c) 2003-2013
 * Distributed Systems Software.  All rights reserved.
 * See the file LICENSE for redistribution information.
 */

/*****************************************************************************
 * COPYRIGHT AND PERMISSION NOTICE
 * 
 * Copyright (c) 2001-2003 The Queen in Right of Canada
 * 
 * All rights reserved.
 * 
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal in the Software without restriction, including without limitation 
 * the rights to use, copy, modify, merge, publish, distribute, and/or sell
 * copies of the Software, and to permit persons to whom the Software is 
 * furnished to do so, provided that the above copyright notice(s) and this
 * permission notice appear in all copies of the Software and that both the
 * above copyright notice(s) and this permission notice appear in supporting
 * documentation.
 * 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
 * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE 
 * BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES,
 * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
 * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 
 * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS 
 * SOFTWARE.
 * 
 * Except as contained in this notice, the name of a copyright holder shall not
 * be used in advertising or otherwise to promote the sale, use or other
 * dealings in this Software without prior written authorization of the
 * copyright holder.
 ***************************************************************************/

/*
 * Various string manipulation functions
 * These aren't necessarily the fastest or most space efficient algorithms,
 * but that's ok for our applications.
 */

#ifndef lint
static const char copyright[] =
"Copyright (c) 2003-2013\n\
Distributed Systems Software.  All rights reserved.";
static const char revid[] =
  "$Id: str.c 2649 2013-02-28 17:45:03Z brachman $";
#endif

#ifdef DSSLIB
#include "dsslib.h"
#else
#include "local.h"
#endif

static const char *log_module_name = "str";

/*
 * This is like strncpy() except the copy is always null-terminated.
 */
char *
strcpyn(char *dst, const char *src, size_t n)
{
  unsigned int ui;
  char *a;
  const char *b;

  a = dst;
  b = src;
  for (ui = 0; ui < n; ui++) {
	if ((*a++ = *b++) == '\0')
	  return(dst);
  }
  *a = '\0';
  return(dst);
}

/*
 * STR is one or more lines.
 * Fold continued lines (ones that end in backslash-newline) into one
 * long line.
 */
Ds *
strfold(char *str)
{
  char *p;
  Ds *ds;

  ds = ds_init(NULL);

  p = str;
  while (*p != '\0') {
	if (*p == '\\') {
	  /*
	   * Combine lines by not copying "\\n".
	   * Copy a backslash followed by anything else but delete/ignore
	   * a backslash if it is followed by a null.
	   */
	  if (*(p + 1) == '\n')
		p += 2;
	  else if (*(p + 1) != '\0') {
		ds_appendc(ds, (int) *p++);
		ds_appendc(ds, (int) *p++);
	  }
	  else
		p++;
	}
	else
	  ds_appendc(ds, (int) *p++);
  }

  ds_appendc(ds, (int) '\0');
  return(ds);
}

/*
 * Destructively trim up to LIMIT characters (unlimited if LIMIT == 0) in
 * the set TRIM_CHARS from the end of STR by putting a null character on top
 * of the leftmost such character.  Return the first argument.
 */
char *
strtrim(char *str, char *trim_chars, int limit)
{
  int n;
  char *p;

  p = str + strlen(str) - 1;
  n = 0;
#ifdef NOTDEF
  while ((!limit || n < limit) && p >= str
		 && strchr(trim_chars, (int) *p) != NULL)
	p--, n++;
#else
  while ((!limit || n < limit) && p >= str
		 && strtr_char((int) *p, trim_chars, 0))
	p--, n++;
#endif

  *(p + 1) = '\0';

  return(str);
}

#ifndef HAVE_STRSEP
char *
strsep(char **stringp, const char *delim)
{
  char *s0, *s;

  if (stringp == NULL || *stringp == NULL || delim == NULL)
	return(NULL);

  /* Locate a char in *stringp matching any char in DELIM. */
  s0 = *stringp;
  for (s = *stringp;  *s != '\0';  s++) {
	const char *d;

	/* Look for a match of *s to any char in delim. */
	for (d = delim;  *d != '\0';  d++) {
	  if (*d == *s) {
		*s = '\0';
		*stringp = s + 1;
		return(s0);
	  }
    }
  }

  *stringp = NULL;
  return(s0);
}
#endif

#ifdef NOTDEF
/*
 * Return the length of the initial substring of s1 which does not
 * contain any characters in s2.
 * Use another algorithm on long strings.
 */
int
strcspn(char *s1, char *s2)
{
  char *p1, *p2;

  for (p1 = s1; *p1 != '\0'; p1++) {
	for (p2 = s2; *p2 != *p1 && *p2 != '\0'; p2++)
	  ;
	if (*p2 != '\0')
	  break;
  }
  return((int) (p1 - s1));
}
#endif

/*
 * Return the length of the initial substring of s1 which does not
 * contain any characters in s2, ignoring case.
 * Use another algorithm on long strings.
 */
int
strcasecspn(const char *s1, const char *s2)
{
  const char *p1, *p2;

  for (p1 = s1; *p1 != '\0'; p1++) {
	for (p2 = s2; tolower((int) *p2) != tolower((int) *p1) && *p2 != '\0'; p2++)
	  ;
	if (*p2 != '\0')
	  break;
  }
  return((int) (p1 - s1));
}

/*
 * Duplicate S until STOP_CH (or the end of S) is seen, then set ENDP
 * (if non-NULL) to point to STOP_CH in S (or the nul in S).
 * The function name is supposed to be analogous to 'strndup()'.
 * It can be useful when parsing, for instance.
 */
char *
strchrdup(const char *s, int stop_ch, char **endp)
{
  const char *p;

  for (p = s; *p != '\0'; p++) {
	if (*p == stop_ch)
	  break;
  }

  if (endp != NULL)
	*endp = (char *) p;

  if (*p == '\0')
	return(strdup(s));

  return(strndup(s, p - s));
}

/*
 * Duplicate S until any character in CHARSET (or the end of S) is seen, then
 * set ENDP (if non-NULL) to point to that character in S (or the nul in S).
 * The function name is supposed to be analogous to 'strndup()'.
 * It can be useful when parsing, for instance.
 */
char *
strcspndup(const char *s, const char *charset, char **endp)
{
  const char *e, *p;

  for (p = s; *p != '\0'; p++) {
	if ((e = strchr(charset, (int) *p)) != NULL)
	  break;
  }

  if (endp != NULL)
	*endp = (char *) p;

  if (*p == '\0')
	return(strdup(s));

  return(strndup(s, p - s));
}

char *
strtolower(char *str)
{
  char *p, *q;

  p = strdup(str);
  for (q = p; *q != '\0'; q++) {
	if (isupper((int) *q))
	  *q = tolower((int) *q);
  }

  return(p);
}

char *
strtoupper(char *str)
{
  char *p, *q;

  p = strdup(str);
  for (q = p; *q != '\0'; q++) {
	if (islower((int) *q))
	  *q = toupper((int) *q);
  }

  return(p);
}

/* Stringification. */
#define STRNUM_TYPENAME(NAME)		STRNUM_TYPENAME_X(NAME)
#define STRNUM_TYPENAME_X(NAME)		#NAME

/* XXX maybe this should be run-time extensible? */
static Strnum_tab strnum_tab[] = {
  { STRNUM_I,      sizeof(STRNUM_I_TYPE),   STRNUM_TYPENAME(STRNUM_I_TYPE) },
  { STRNUM_UI,     sizeof(STRNUM_UI_TYPE),  STRNUM_TYPENAME(STRNUM_UI_TYPE) },
  { STRNUM_L,      sizeof(STRNUM_L_TYPE),   STRNUM_TYPENAME(STRNUM_L_TYPE) },
  { STRNUM_UL,     sizeof(STRNUM_UL_TYPE),  STRNUM_TYPENAME(STRNUM_UL_TYPE) },
  { STRNUM_LL,     sizeof(STRNUM_LL_TYPE),  STRNUM_TYPENAME(STRNUM_LL_TYPE) },
  { STRNUM_ULL,    sizeof(STRNUM_ULL_TYPE), STRNUM_TYPENAME(STRNUM_ULL_TYPE) },
  { STRNUM_STR,    sizeof(STRNUM_STR_TYPE), STRNUM_TYPENAME(STRNUM_STR_TYPE) },
  { STRNUM_TIME_T, sizeof(STRNUM_TIME_T_TYPE),
	STRNUM_TYPENAME(STRNUM_TIME_T_TYPE) },
  { STRNUM_MODE_T, sizeof(STRNUM_MODE_T_TYPE),
	STRNUM_TYPENAME(STRNUM_MODE_T_TYPE) },
  { STRNUM_PID_T, sizeof(STRNUM_PID_T_TYPE),
	STRNUM_TYPENAME(STRNUM_PID_T_TYPE) },
  { STRNUM_IN_PORT_T, sizeof(STRNUM_IN_PORT_T_TYPE),
	STRNUM_TYPENAME(STRNUM_IN_PORT_T_TYPE) },
  { STRNUM_SIZE_T, sizeof(STRNUM_SIZE_T_TYPE),
	STRNUM_TYPENAME(STRNUM_SIZE_T_TYPE) },
  { STRNUM_SSIZE_T, sizeof(STRNUM_SSIZE_T_TYPE),
	STRNUM_TYPENAME(STRNUM_SSIZE_T_TYPE) },
  { STRNUM_IMAX_T, sizeof(STRNUM_IMAX_T_TYPE),
	STRNUM_TYPENAME(STRNUM_IMAX_T_TYPE) },
  { STRNUM_UIMAX_T, sizeof(STRNUM_UIMAX_T_TYPE),
	STRNUM_TYPENAME(STRNUM_UIMAX_T_TYPE) },
  { STRNUM_ERR,  0,
	NULL }
};

Strnum_tab *
strnum_lookup(char *name)
{
  int i;

  for (i = 0; strnum_tab[i].name != NULL; i++) {
	if (streq(name, strnum_tab[i].name))
	  return(&strnum_tab[i]);
  }

  return(NULL);
}

Strnum
strnum_type(char *name)
{
  Strnum_tab *s;

  if ((s = strnum_lookup(name)) == NULL)
	return(STRNUM_ERR);

  return(s->type);
}

int
strnum(char *str, Strnum type, void *value)
{

  return(strnum_b(str, type, 10, value, NULL));
}

#ifdef NOTDEF
int
strnuma(char *str, Strnum type, void *value)
{
  int st;

  if (type == STRNUM_I) {
	int *val, *valp;

	val = (int *) ALLOC(STRNUM_I_TYPE);
	st = strnum(str, type, val);
	*(int **) value = val;
  }
  /* ... */
  else
	st = -1;

  return(st);
}

static void
foo(char *str)
{
  int *x;

  strnuma(str, STRNUM_I, &x);
}
#endif

/*
 * Alternate interface that stops conversion at the first invalid
 * character and returns a pointer to that character.
 */
int
strnumx(char *str, Strnum type, void *value, char **endp)
{

  return(strnum_b(str, type, 10, value, endp));
}

/*
 * Convert STR to a number of type TYPE in base BASE, watching for errors.
 * If the conversion succeeds, store the result in VALUE which must be
 * properly aligned and sized memory for TYPE, and return 0 (if VALUE is NULL,
 * however, just return 0 - this is useful for type checking).
 * If ENDP is non-NULL, set it to point to the first invalid character found.
 * Return -1 if an error occurs.  If ENDP is NULL it is an error if an invalid
 * character is found in STR, otherwise no error will occur and ENDP will be
 * set to point at that character.
 */
int
strnum_b(char *str, Strnum type, int base, void *value, char **endp)
{
  char *ptr;

  if (*str == '\0' && type != STRNUM_STR)
	return(-1);

  errno = 0;
  switch (type) {
  case STRNUM_STR:
	{
	  /* This is a special case, mostly for strpack(). Just copy the string. */
	  if (value != NULL) {
		if (str == NULL)
		  *(char **) value = NULL;
		else
		  *(char **) value = strdup(str);
	  }
	  /* ENDP is not set. */
	  break;
	}
	
  case STRNUM_I:
	{
	  int val_int;
	  long val_long;

	  val_long = strtol(str, &ptr, base);
	  if (endp != NULL)
		*endp = ptr;
	  if ((endp == NULL && *ptr != '\0')
		  || errno == ERANGE || (val_long == 0 && errno == EINVAL))
		return(-1);
	  val_int = (int) val_long;
	  if ((long) val_int != val_long)
		return(-1);

	  if (value != NULL)
		*(int *) value = val_int;
	  break;
	}

  case STRNUM_L:
	{
	  long val_long;

	  val_long = strtol(str, &ptr, base);
	  if (endp != NULL)
		*endp = ptr;
	  if ((endp == NULL && *ptr != '\0')
		  || errno == ERANGE || (val_long == 0 && errno == EINVAL))
		return(-1);

	  if (value != NULL)
		*(long *) value = val_long;
	  break;
	}

  case STRNUM_LL:
	{
	  long long val_long_long;

	  val_long_long = strtoll(str, &ptr, base);
	  if (endp != NULL)
		*endp = ptr;
	  if ((endp == NULL && *ptr != '\0')
		  || errno == ERANGE || (val_long_long == 0 && errno == EINVAL))
		return(-1);

	  if (value != NULL)
		*(long long *) value = val_long_long;
	  break;
	}

  case STRNUM_IMAX_T:
	{
	  intmax_t val_intmax;

	  val_intmax = strtoimax(str, &ptr, base);
	  if (endp != NULL)
		*endp = ptr;
	  if ((endp == NULL && *ptr != '\0')
		  || errno == ERANGE || (val_intmax == 0 && errno == EINVAL))
		return(-1);

	  if (value != NULL)
		*(intmax_t *) value = val_intmax;
	  break;
	}

  case STRNUM_UI:
  case STRNUM_UINZ:
	{
	  unsigned int val_uint;
	  unsigned long val_ulong;

	  val_ulong = strtoul(str, &ptr, base);
	  if (endp != NULL)
		*endp = ptr;
	  if ((endp == NULL && *ptr != '\0')
		  || errno == ERANGE || (val_ulong == 0 && errno == EINVAL))
		return(-1);
	  val_uint = (unsigned int) val_ulong;
	  if ((unsigned long) val_uint != val_ulong)
		return(-1);

	  /* Unsigned integer, but not zero. */
	  if (type == STRNUM_UINZ && val_uint == 0)
		return(-1);

	  if (value != NULL)
		*(unsigned int *) value = val_uint;
	  break;
	}

  case STRNUM_IN_PORT_T:
	{
	  in_port_t val_in_port_t;
	  unsigned long val_ulong;

	  val_ulong = strtoul(str, &ptr, base);
	  if (endp != NULL)
		*endp = ptr;
	  if ((endp == NULL && *ptr != '\0')
		  || errno == ERANGE || (val_ulong == 0 && errno == EINVAL))
		return(-1);
	  val_in_port_t = (in_port_t) val_ulong;
	  if ((unsigned long) val_in_port_t != val_ulong)
		return(-1);

	  if (value != NULL)
		*(in_port_t *) value = val_in_port_t;
	  break;
	}

  case STRNUM_UL:
	{
	  unsigned long val_ulong;

	  val_ulong = strtoul(str, &ptr, base);
	  if (endp != NULL)
		*endp = ptr;
	  if ((endp == NULL && *ptr != '\0')
		  || errno == ERANGE || (val_ulong == 0 && errno == EINVAL))
		return(-1);

	  if (value != NULL)
		*(unsigned long *) value = val_ulong;
	  break;
	}

  case STRNUM_ULL:
	{
	  unsigned long long val_ulong_long;

	  val_ulong_long = strtoull(str, &ptr, base);
	  if (endp != NULL)
		*endp = ptr;
	  if ((endp == NULL && *ptr != '\0')
		  || errno == ERANGE || (val_ulong_long == 0 && errno == EINVAL))
		return(-1);

	  if (value != NULL)
		*(unsigned long long *) value = val_ulong_long;
	  break;
	}

  case STRNUM_UIMAX_T:
	{
	  uintmax_t val_uintmax;

	  val_uintmax = strtoumax(str, &ptr, base);
	  if (endp != NULL)
		*endp = ptr;
	  if ((endp == NULL && *ptr != '\0')
		  || errno == ERANGE || (val_uintmax == 0 && errno == EINVAL))
		return(-1);

	  if (value != NULL)
		*(uintmax_t *) value = val_uintmax;
	  break;
	}

  case STRNUM_TIME_T:
	{
	  unsigned long val_ulong;
	  time_t val_time_t;

	  val_ulong = strtoul(str, &ptr, base);
	  if (endp != NULL)
		*endp = ptr;
	  if ((endp == NULL && *ptr != '\0')
		  || errno == ERANGE || (val_ulong == 0 && errno == EINVAL))
		return(-1);
	  val_time_t = (time_t) val_ulong;
	  if ((unsigned long) val_time_t != val_ulong)
		return(-1);

	  if (value != NULL)
		*(time_t *) value = val_time_t;
	  break;
	}

  case STRNUM_SIZE_T:
	{
	  unsigned long val_ulong;
	  size_t val_size_t;

	  val_ulong = strtoul(str, &ptr, base);
	  if (endp != NULL)
		*endp = ptr;
	  if ((endp == NULL && *ptr != '\0')
		  || errno == ERANGE || (val_ulong == 0 && errno == EINVAL))
		return(-1);
	  val_size_t = (size_t) val_ulong;
	  if ((unsigned long) val_size_t != val_ulong)
		return(-1);

	  if (value != NULL)
		*(size_t *) value = val_size_t;
	  break;
	}

  case STRNUM_SSIZE_T:
	{
	  long val_long;
	  ssize_t val_ssize_t;

	  val_long = strtol(str, &ptr, base);
	  if (endp != NULL)
		*endp = ptr;
	  if ((endp == NULL && *ptr != '\0')
		  || errno == ERANGE || (val_long == 0 && errno == EINVAL))
		return(-1);
	  val_ssize_t = (ssize_t) val_long;
	  if ((long) val_ssize_t != val_long)
		return(-1);

	  if (value != NULL)
		*(ssize_t *) value = val_ssize_t;
	  break;
	}

  case STRNUM_PID_T:
	{
	  unsigned long val_ulong;
	  pid_t val_pid_t;

	  val_ulong = strtoul(str, &ptr, base);
	  if (endp != NULL)
		*endp = ptr;
	  if ((endp == NULL && *ptr != '\0')
		  || errno == ERANGE || (val_ulong == 0 && errno == EINVAL))
		return(-1);
	  val_pid_t = (pid_t) val_ulong;
	  if ((unsigned long) val_pid_t != val_ulong)
		return(-1);

	  if (value != NULL)
		*(pid_t *) value = val_pid_t;
	  break;
	}

  case STRNUM_MODE_T:
	{
	  unsigned long val_ulong;
	  mode_t val_mode_t;

	  val_ulong = strtoul(str, &ptr, base);
	  if (endp != NULL)
		*endp = ptr;
	  if ((endp == NULL && *ptr != '\0')
		  || errno == ERANGE || (val_ulong == 0 && errno == EINVAL))
		return(-1);
	  val_mode_t = (mode_t) val_ulong;
	  if ((unsigned long) val_mode_t != val_ulong)
		return(-1);

	  if (value != NULL)
		*(mode_t *) value = val_mode_t;
	  break;
	}

  default:
	if (endp != NULL)
	  *endp = str;
	return(-1);
  }

  return(0);
}

/*
 * Note: this encoding scheme is similar to (but not identical to) the one
 * described in Section 6.8 of RFC 2045.
 * The reason for the difference is that we need to avoid the '=' and '+'
 * characters, which are used by RFC 2045 but cause problems in a variety of
 * contexts unless they are specially escaped - and we don't want to require
 * another layer of encoding.  Other characters can be similarly problematic.
 * We want something that can be used in a query string or URI, filename or
 * URI path, or MIME header value.  For instance, the '/' is a problem if the
 * value can appear in a path element or in a URI.
 * A comma is a problem because it can be used to separate multiple cookie
 * values in a Cookie header (see get_cookies()).
 *
 * Prior to 1.4.19, '.' mapped to 0 and '/' mapped to 1.
 * Starting with 1.4.19, we would also like to avoid '/', so we will map
 * '_' (0x5f) to 0 and '-' (0x2d) to 1.
 *
 * XXX consider changing '-' to '@' for even greater application, since
 * if we're generating a filename, a leading '-' can be interpreted as a
 * flag in a command line context.  This might be stretching it though.
 *
 * Backward decoding compatibility will be retained for a release or two...
 *
 * XXX Also see RFC 4648 (http://www.rfc-editor.org/rfc/rfc4648.txt) for the
 * "base64url" encoding, which appeared after this encoding was developed
 * and uses the same alphabet but a different mapping.
 */

#ifdef USE_OLD_ENCODING
static const char strba64_dacs_conv_table[64] = {
  '.', '/', '0', '1', '2', '3', '4', '5',
  '6', '7', '8', '9', 'A', 'B', 'C', 'D',
  'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L',
  'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
  'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b',
  'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j',
  'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r',
  's', 't', 'u', 'v', 'w', 'x', 'y', 'z'
};

enum {
  CONV_TABLE_BASE    = 0x2e,	/* 056 == 46 == '.' */
  CONV_TABLE_SIZE    = 0x4d,
  CONV_TABLE_NOXBITS = 16,
  XX                 = 0x40
};

/*
 * These are the mappings from a base-64 symbol to its decimal value.
 */
static const char stra64b_dacs_conv_table[CONV_TABLE_SIZE] = {
  /* 0x2e */                                                           0,  1,
  /* 0x30 */   2,  3,  4,  5,  6,  7,  8,  9, 10, 11, XX, XX, XX, XX, XX, XX,
  /* 0x40 */  XX, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
  /* 0x50 */  27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, XX, XX, XX, XX, XX,
  /* 0x60 */  XX, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52,
  /* 0x70 */  53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63
};

#else

static const char strba64_dacs_conv_table[64] = {
  '_', '-', '0', '1', '2', '3', '4', '5',	/*  0.. 7 */
  '6', '7', '8', '9', 'A', 'B', 'C', 'D',	/*  8..15 */
  'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L',	/* 16..23 */
  'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',	/* 24..31 */
  'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b',	/* 32..39 */
  'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j',	/* 40..47 */
  'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r',	/* 48..55 */
  's', 't', 'u', 'v', 'w', 'x', 'y', 'z'	/* 56..63 */
};

enum {
  /* Lowest ASCII code in the table (055 == 45 == '-') */
  CONV_TABLE_BASE    = 0x2d,
  /* Number of entries in the reverse lookup table */
  CONV_TABLE_SIZE    = 0x4e,
  CONV_TABLE_NOXBITS = 16,
  XX                 = 0x40
};

/*
 * These are the mappings from a base-64 symbol to its decimal value.
 * This table will still map '.' to 0 and '/' to 1 for backward
 * compatibility.  With suitable warning, this should be removed in a
 * future release.  XX represents ASCII values that do not map to anything.
 */
static const char stra64b_dacs_conv_table[CONV_TABLE_SIZE] = {
  /* 0x2d */                                                       1, XX, XX,
  /* 0x30 */   2,  3,  4,  5,  6,  7,  8,  9, 10, 11, XX, XX, XX, XX, XX, XX,
  /* 0x40 */  XX, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
  /* 0x50 */  27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, XX, XX, XX, XX,  0,
  /* 0x60 */  XX, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52,
  /* 0x70 */  53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63
};
#endif

/* XXX is this sufficient to describe the mapping tables? */
static const char *strba64_conv_table = strba64_dacs_conv_table;
static const char *stra64b_conv_table = stra64b_dacs_conv_table;

/*
 * INP is a binary string, NBYTES long.  Convert it to a null-terminated
 * ASCII text string, as radix-64, and set OUTP to point to an allocated
 * buffer.  This maps each 6 bits of INP into a printable character.
 * Return the length of the text string.
 */
unsigned int
strba64(const unsigned char *inp, unsigned int nbytes, char **outp)
{
  unsigned int n, rem, rem_bits, val;
  const unsigned char *p;
  Ds ds;

  n = nbytes;
  p = inp;
  ds_init(&ds);
  rem = 0;

  rem_bits = 0;
  while (n > 0) {
	switch (rem_bits) {
	case 0:
	  val = (*p & 0xfc) >> 2;
	  ds_appendc(&ds, strba64_conv_table[val]);
	  rem = *p & 0x03;
	  rem_bits = 2;
	  break;
	case 2:
	  val = (rem << 4) | (*p & 0xf0) >> 4;
	  ds_appendc(&ds, strba64_conv_table[val]);
	  rem = *p & 0x0f;
	  rem_bits = 4;
	  break;
	case 4:
	  val = (rem << 2) | (*p & 0xc0) >> 6;
	  ds_appendc(&ds, strba64_conv_table[val]);
	  val = *p & 0x3f;
	  ds_appendc(&ds, strba64_conv_table[val]);
	  rem_bits = 0;
	  break;
	}
	p++;
	n--;
  }

  /*
   * An extra byte, containing any leftover bits, is always appended.
   * There can be at most 4 leftover bits, so their maximum value is 15.
   */
  if (rem_bits == 0)
	ds_appendc(&ds, strba64_conv_table[CONV_TABLE_NOXBITS]);
  else
	ds_appendc(&ds, strba64_conv_table[rem]);

  ds_appendc(&ds, '\0');

  *outp = ds_buf(&ds);
  return(ds_len(&ds));
}

/*
 * INP is a null-terminated ASCII string produced by strba64().
 * OUTP and/or NBYTES can be NULL if their value isn't wanted
 * (when the output is known to be a null-terminated string, for example).
 * Convert it to binary, storing it in OUTP, and setting NBYTES to its length.
 * Also convert each long from network-normal form.
 * OUTP needs to be acceptably aligned.
 * Return a pointer to the output buffer and set NBYTES to the number of
 * bytes copied to OUTP.
 * If decoding fails, NULL is returned and NBYTES is set to zero.
 */
unsigned char *
stra64b(const char *inp, unsigned char **outp, unsigned int *nbytes)
{
  unsigned int ind, n, got_bits, val, x;
  unsigned char *out;
  char *p;
  Ds ds;

  if (*inp == '\0') {
	if (nbytes != NULL)
	  *nbytes = 0;
	out = (unsigned char *) malloc(1);
	*out = '\0';
	if (outp != NULL)
	  *outp = out;
	return(out);
  }

  n = 0;
  p = (char *) inp;
  val = 0;
  ds_init(&ds);

  got_bits = 0;
  while (*(p + 1) != '\0') {
#ifndef USE_OLD_ENCODING
	if (*p == '.') x = 0;
	else if (*p == '/') x = 1;
	else if ((ind = *p - CONV_TABLE_BASE) >= CONV_TABLE_SIZE
			 || (x = stra64b_conv_table[ind]) == XX) {
#else
	ind = *p - CONV_TABLE_BASE;
	if (ind >= CONV_TABLE_SIZE || (x = stra64b_conv_table[ind]) == XX) {
#endif
	  if (nbytes != NULL)
		*nbytes = 0;
	  return(NULL);
	}

	switch (got_bits) {
	case 0:
	  val = x << 2;
	  got_bits = 6;
	  break;
	case 6:
	  val |= ((x & 0x30) >> 4);
	  ds_appendc(&ds, val);
	  val = (x & 0x0f) << 4;
	  got_bits = 4;
	  break;
	case 4:
	  val |= (x & 0x3c) >> 2;
	  ds_appendc(&ds, val);
	  val = (x & 0x03) << 6;
	  got_bits = 2;
	  break;
	case 2:
	  val |= x;
	  ds_appendc(&ds, val);
	  got_bits = 0;
	  break;
	}

	p++;
  }

  /* Take care of the final remainder byte. */
#ifndef USE_OLD_ENCODING
	if (*p == '.') x = 0;
	else if (*p == '/') x = 1;
	else if ((ind = *p - CONV_TABLE_BASE) >= CONV_TABLE_SIZE
			 || (x = stra64b_conv_table[ind]) == XX) {
#else
  ind = *p - CONV_TABLE_BASE;
  if (ind >= CONV_TABLE_SIZE || (x = stra64b_conv_table[ind]) == XX) {
#endif
	if (nbytes != NULL)
	  *nbytes = 0;
	return(NULL);
  }

  if (x != CONV_TABLE_NOXBITS)
	ds_appendc(&ds, val | x);

  if (nbytes != NULL)
	*nbytes = ds_len(&ds);
  out = (unsigned char *) ds_buf(&ds);
  if (outp != NULL)
	*outp = out;

  return(out);
}

unsigned char *
strdec64(const char *inp)
{
  unsigned int len;
  unsigned char *out;

  if (inp == NULL)
	return(NULL);

  out = stra64b(inp, NULL, NULL);

  return(out);
}

char *
strbenc64(const unsigned char *str, unsigned int len)
{
  char *out;

  if (str == NULL)
	return(NULL);

  strba64(str, len, &out);

  return(out);
}

char *
strenc64(char *str)
{
  char *out;

  if (str == NULL)
	return(NULL);

  /* Include the null byte. */
  out = strbenc64((unsigned char *) str, (unsigned int) strlen(str) + 1);

  return(out);
}

/*
 * Different encoded character sets are used for the radix-85 representation,
 * also called ASCII85.  This table, which uses characters 0x21 through 0x75,
 * is used by PostScript and PDF (ASCII85Decode).  See
 *    http://en.wikipedia.org/wiki/Ascii85
 * and Adobe PDF Reference, Version 1.7, (November, 2006), Section 3.3.2.
 * The relevant RFC is:
 *    http://www.ietf.org/rfc/rfc1924.txt
 * although that uses a different mapping:
 *   '0'..'9', 'A'..'Z', 'a'..'z', '!', '#', '$', '%', '&', '(',
 *   ')', '*', '+', '-', ';', '<', '=', '>', '?', '@', '^', '_',
 *   '`', '{', '|', '}', '~'
 */
static MAYBE_UNUSED const char conv85_std_table[85] = {
  '!', '"', '#', '$', '%', '&', '\'', '(', ')', '*',
  '+', ',', '-', '.', '/',
  '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
  ':', ';', '<', '=', '>', '?', '@',
  'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
  'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
  'U', 'V', 'W', 'X', 'Y', 'Z',
  '[', '\\', ']', '^', '_', '`',
  'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j',
  'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
  'u'
};

static MAYBE_UNUSED const unsigned char conv85_std_test1[] = {
  0x12, 0x34, 0x56, 0x78, 0x9a
};
static MAYBE_UNUSED const char *conv85_std_enc1 = "&i<X6RK";

/*
 * Some characters in the "standard" radix-85 encoded character set are not
 * well-suited to appearing in the query component of a URL, so we need to
 * make some adjustments.  This change should not matter much because the
 * encoding should only be used internally (between DACS components).
 * Troublesome characters:
 * '#' (start of fragment), '=' (variable/value internal separator),
 * '&' (variable/value external separator), '%' (hex encoding escape),
 * '+' (blank encoding)
 * '?' (allowed, but may be confusing), '\\' (allowed, but may be confusing)
 * '~' is reserved for the start/end delimiting pair.
 */
static MAYBE_UNUSED const char conv85_dacs_table[85] = {
  'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
  'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
  'U', 'V', 'W', 'X', 'Y', 'Z',
  'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j',
  'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
  'u', 'v', 'w', 'x', 'y', 'z',
  '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
  '!', '"', '#', '$', '\'', '(', ')', '*', ',', '-',
  '.', ':', ';', '^', '>', '_', '@', '{', '|', '}',
  '`', '[', ']'
};

int strba85_demark = 0;
const char *conv85_enc = conv85_std_table;

static void
encode85(Ds *ds, const char *enc_tab, unsigned int val, unsigned int count)
{
  int i;
  unsigned char enc[5], *p;

  /* Compute them "forwards" but generate output in reverse. */
  p = enc;
  for (i = 0; i < 5; i++) {
	*p++ = val % 85;
	val /= 85;
  }

  for (i = 0; i <= count; i++)
	ds_appendc(ds, (int) enc_tab[*--p]);
}

/*
 * INP is a binary string, NBYTES long.  Convert it to a null-terminated
 * ASCII text string, as radix-85, and set OUTP to point to an allocated
 * buffer.  This maps each 4 bytes of INP into 5 printable characters.
 * Return the length of the text string.
 * We optionally emit the conventional leading "<~" and trailing "~>",
 * because they are not strictly necessary for our purposes, but always
 * recognize them when decoding.
 */
unsigned int
strba85(const unsigned char *inp, unsigned int nbytes, char **outp)
{
  unsigned int count, i, val;
  const unsigned char *p;
  Ds ds;

  count = 0;
  val = 0;
  p = inp;
  ds_init(&ds);

  if (strba85_demark) {
	ds_appendc(&ds, (int) '<');
	ds_appendc(&ds, (int) '~');
  }

  for (i = 0; i < nbytes; i++) {
	switch (count++) {
	case 0:
	  val |= (*p++ << 24);
	  break;

	case 1:
	  val |= (*p++ << 16);
	  break;

	case 2:
	  val |= (*p++ << 8);
	  break;

	case 3:
	  val |= *p++;
	  if (val == 0)
		ds_appendc(&ds, (int) 'z');
	  else
		encode85(&ds, conv85_enc, val, count);
	  val = count = 0;
	  break;
	}
  }

  if (count)
	encode85(&ds, conv85_enc, val, count);

  if (strba85_demark) {
	ds_appendc(&ds, (int) '~');
	ds_appendc(&ds, (int) '>');
  }

  ds_appendc(&ds, '\0');

  *outp = ds_buf(&ds);
  return(ds_len(&ds));
}


static MAYBE_UNUSED void
test85(void)
{
  char *out;

  conv85_enc = conv85_std_table;
  strba85(conv85_std_test1, 5, &out);
  printf("%s", out);
  if (!streq(out, conv85_std_enc1))
	printf(" (wrong!)\n");
  else
	printf(" (ok!)\n");
}
  
static void
decode85(Ds *ds, unsigned int val, int count)
{

  switch (count) {
  case 4:
	ds_appendc(ds, (int) (val >> 24));
	ds_appendc(ds, (int) (val >> 16));
	ds_appendc(ds, (int) (val >>  8));
	ds_appendc(ds, (int) val);
	break;

  case 3:
	ds_appendc(ds, (int) (val >> 24));
	ds_appendc(ds, (int) (val >> 16));
	ds_appendc(ds, (int) (val >>  8));
	break;

  case 2:
	ds_appendc(ds, (int) (val >> 24));
	ds_appendc(ds, (int) (val >> 16));
	break;

  case 1:
	ds_appendc(ds, (int) (val >> 24));
	break;
  }
}

static int
lookup85(const char *enc_tab, int ch)
{
  int i;

  for (i = 0; i < 85; i++) {
	if ((int) enc_tab[i] == ch)
	  return(i);
  }

  return(-1);
}

/*
 * INP is a null-terminated ASCII string produced by strba85().
 * OUTP and/or NBYTES can be NULL if their value isn't wanted
 * (when the output is known to be a null-terminated string, for example).
 * Internal whitespace is ignored.
 * Convert it to binary, storing it in OUTP, and setting NBYTES to its length.
 * OUTP needs to be acceptably aligned.
 * Return a pointer to the output buffer and set NBYTES to the number of
 * bytes copied to OUTP.
 * If decoding fails, NULL is returned and NBYTES is set to zero.
 */
unsigned char *
stra85b(const char *inp, unsigned char **outp, unsigned int *nbytes)
{
  unsigned int ch, count, n, v85, val;
  const char *p;
  Ds ds;
  static unsigned int pow85[5] = {
	(85 * 85 * 85 * 85), (85 * 85 * 85), (85 * 85), 85, 1
  };

  p = inp;

  if (*p == '<' && *(p + 1) == '~')
	p += 2;

  count = 0;
  n = 0;
  val = 0;
  ds_init(&ds);

  while ((ch = *p++) != '\0') {
	switch (ch) {
	case 'z':
	  if (count) {
		/* z inside 5-tuple? */
		return(NULL);
	  }

	  ds_appendc(&ds, 0);
	  ds_appendc(&ds, 0);
	  ds_appendc(&ds, 0);
	  ds_appendc(&ds, 0);
	  break;

	case '~':
	  if (*p != '>')
		return(NULL);

	  /* This is the end. */
	  goto done;
	  /*NOTREACHED*/

	case '\n':
	case '\r':
	case '\t':
	case ' ':
	  break;

	default:
	  if ((v85 = lookup85(conv85_enc, ch)) == -1) {
		/* Bad character */
		return(NULL);
	  }
	  val += v85 * pow85[count++];
	  if (count == 5) {
		decode85(&ds, val, 4);
		count = 0;
		val = 0;
	  }
	  break;
	}
  }

 done:

  if (count > 0) {
	count--;
 	val += pow85[count];
	decode85(&ds, val, count);
  }

  if (nbytes != NULL)
	*nbytes = ds_len(&ds);
  if (outp != NULL)
	*outp = (unsigned char *) ds_buf(&ds);

  return((unsigned char *) ds_buf(&ds));
}

/*
 * Make sure the function isn't optimized away because the string is not
 * read after it is written.
 * See, e.g.:
 *    http://www.mail-archive.com/cryptography%40metzdowd.com/msg08369.html
 */
void
memzap(void *s, size_t len)
{
  volatile unsigned char *ptr = (volatile unsigned char *) s;
  size_t i;

  for (i = 0; i < len; i++)
	*ptr++ = 0;
}

void
memzapb(void *s, int val, size_t len)
{
  volatile unsigned char *ptr = (volatile unsigned char *) s;
  size_t i;

  for (i = 0; i < len; i++)
	*ptr++ = val;
}

void *
memdupn(void *s, size_t len)
{
  void *ptr;

  ptr = malloc(len);
  memcpy(ptr, s, len);

  return(ptr);
}

/*
 * Destroy the value of environment variable NAME if it is found.
 * For greater certainty, we do this directly rather than by invoking
 * getenv(), putenv(), etc.
 * XXX presumably there aren't any cached copies of the value in
 * library functions
 */
void
envzap(char *name)
{
  int i, len;
  char *cp, *np, **p;
  extern char **environ;

  if (name == NULL || environ == NULL)
	return;

  for (np = name; *np != '\0' && *np != '='; np++)
	continue;
  len = np - name;

  for (p = environ; (cp = *p) != NULL; p++) {
	for (np = name, i = len; i != 0 && *cp != '\0'; i--) {
	  if (*cp++ != *np++)
		break;
	}
	if (i == 0 && *cp == '=') {
	  char **dp;

	  strzap(*p);
	  /*
	   * Do the equivalent of unsetenv(), which is not available everywhere.
	   * The memory is not freed, if it should be.
	   */
	  dp = p;
	  do {
		dp[0] = dp[1];
	  } while (*dp++ != NULL);
	  /* Continue in case NAME appears again. */
	}
  }
}

/*
 * For each occurrence of any of QCHARS in STR, quote the character by
 * preceding it with a backslash character.
 * Return the new string, which might simply be a copy of the original.
 */
char *
strquote(char *str, char *quote_chars)
{
  char *p, *qchars;
  Ds ds;
  static char *default_quote_chars = "\"\'";

  if (quote_chars == NULL)
	qchars = default_quote_chars;
  else
	qchars = quote_chars;

  ds_init(&ds);
  for (p = str; *p != '\0'; p++) {
	if (index(qchars, (int) *p) != NULL)
	  ds_appendc(&ds, (int) '\\');
	ds_appendc(&ds, (int) *p);
  }

  ds_appendc(&ds, (int) '\0');
  return(ds_buf(&ds));
}

/*
 * Strip the outermost "quotes" surrounding STR.
 * STR is a string with matching "quote" characters, where
 * the character in START_Q at index N is a valid starting quote character
 * and the character in END_Q at index N is the matching ending quote
 * character.  Return the embedded string, or NULL.
 *
 * Example: strunquote("[foo]", "([{", ")]}") gives "foo"
 * Example: strunquote("'foo'", "'\"`", "'\"`") also gives "foo"
 */
char *
strextract(char *str, char *start_q, char *end_q)
{
  char e_ch, *p;
  size_t slen;

  if ((p = strchr(start_q, (int) str[0])) == NULL)
	return(NULL);
  if (strlen(start_q) != strlen(end_q))
	return(NULL);
  e_ch = end_q[p - start_q];

  slen = strlen(str);
  if (slen < 2 || str[slen - 1] != e_ch)
	return(NULL);

  return(strndup(str + 1, slen - 2));
}

/*
 * If the argument is surrounded by matching quote characters
 * (single, double, or backtick quotes), return a new string with those quotes
 * elided; otherwise, return a copy of the original string.
 * This is just a convenient interface to the more general strextract().
 */
char *
strdequote(char *p)
{
  char *embedded;

  if ((embedded = strextract(p, "'\"`", "'\"`")) == NULL)
	return(strdup(p));

  return(embedded);
}

/*
 * Given string P of length LEN, if P ends with the string SUFFIX,
 * return a pointer to the start of SUFFIX in P, NULL otherwise.
 * SUFFIX is a suffix of P if they are identical.
 */
char *
strsuffix(char *p, size_t len, char *suffix)
{
  char *q;
  size_t suffix_len;

  suffix_len = strlen(suffix);
  if (suffix_len > len)
	return(0);
  q = p + len - suffix_len;
  return(streq(q, suffix) ? q : NULL);
}

/*
 * Like strsuffix(), except case insensitive.
 */
char *
strcasesuffix(const char *p, size_t len, const char *suffix)
{
  const char *q;
  size_t suffix_len;

  suffix_len = strlen(suffix);
  if (suffix_len > len)
	return(0);
  q = p + len - suffix_len;
  return(strcaseeq(q, suffix) ? (char *) q : NULL);
}

/*
 * Test if the initial substring of P is PREFIX (they are if they are
 * identical, too).
 * If it is, return a pointer to the character in P following the prefix,
 * otherwise return NULL.
 */
char *
strprefix(const char *p, const char *prefix)
{
  size_t len;

  len = strlen(prefix);
  if (strneq(p, prefix, len))
	return((char *) p + len);
  return(NULL);
}

/*
 * Like strprefix(), except case insensitive.
 */
char *
strcaseprefix(const char *p, const char *prefix)
{
  size_t len;

  len = strlen(prefix);
  if (strncaseeq(p, prefix, len))
	return((char *) p + len);
  return(NULL);
}

/*
 * Test if PREFIX is a prefix of WORD and PREFIX is at least MATCH_LEN
 * bytes long.  Comparisons are case insensitive.
 * Examples:
 *   strncaseprefix("pass", "password", 4) is 1
 *   strncaseprefix("PASSWord", "password", 8) is 1
 *   strncaseprefix("passz", "password", 4) is 0
 *   strncaseprefix("pass", "password", 5) is 0
 * Return 1 if so, 0 otherwise.
 */
int
strncaseprefix(char *prefix, char *word, size_t match_len)
{
  size_t prefix_len, word_len;

  prefix_len = strlen(prefix);
  word_len = strlen(word);
  if (prefix_len > word_len || prefix_len < match_len)
	return(0);

  if (strncaseeq(prefix, word, prefix_len))
	return(1);

  return(0);
}

/*
 * Count the number of times character C appears in string P.
 */
int
strchrcount(const char *p, int c)
{
  int n;
  const char *q;

  if (p == NULL || *p == '\0')
	return(0);

  q = p;
  n = 0;
  while ((q = strchr(q, c)) != NULL) {
	q++;
	n++;
  }

  return(n);
}

/*
 * Count the number of times any character in CHARSET appears in string P.
 * CHARSET must be comprised of unique characters (no duplicates).
 * Return -1 if CHARSET is invalid.
 */
int
strchrscount(const char *p, const char *charset)
{
  int n;
  const char *q;

  if (p == NULL || *p == '\0')
	return(0);

  if (charset == NULL || *charset == '\0')
	return(-1);

  n = 0;
  for (q = charset; *q != '\0'; q++) {
	if (strchrcount(charset, (int) *q) != 1)
	  return(-1);
	n += strchrcount(p, (int) *q);
  }

  return(n);
}

#ifndef HAVE_ISBLANK
/*
 * Test for a space or tab character.
 * Return zero if the character tests false and
 * non-zero if the character tests true.
 */
int
isblank(int ch)
{

  if (ch == ' ' || ch == '\t')
	return(1);

  return(0);
}
#endif

/*
 * Check if STR consists of one or more decimal digits.
 */
int
is_digit_string(char *str)
{
  char *p;

  p = str;
  do {
	if (!isdigit((int) *p))
	  return(0);
	p++;
  } while (*p != '\0');

  return(1);
}

int
is_signed_digit_string(char *str)
{
  char *p;

  p = str;
  if (*p == '+' || *p == '-')
	p++;

  while (isdigit((int) *p))
	p++;
  if (*p == '\0')
	return(1);
  return(0);
}

/*
 * If ENV_STRING looks like "<name>=<value>", then set ENV_NAME to point
 * to the <name> and ENV_VAL to point to the <value>.
 * Either of ENV_NAME and ENV_VAL may be NULL, which means the substring
 * is not to be returned.
 * <name> may not contain a '=' character.
 * If no '=' character is present, it is treated as if <name> is absent
 * and <value> is the entire string.
 * Return a pointer to the <name> part (which may be NULL); if ENV_NAME is
 * not NULL, it will be set to point to the same string.
 */
char *
env_parse(char *env_string, char **env_name, char **env_val)
{
  char *e, *p, *q;

  p = q = NULL;
  if (env_string != NULL && env_string[0] != '\0') {
	if ((e = strchr(env_string, '=')) == NULL) {
	  if (env_val != NULL)
		q = strdup(env_string);
	}
	else {
	  p = (char *) malloc(e - env_string + 1);
	  strncpy(p, env_string, e - env_string);
	  p[e - env_string] = '\0';
	  if (env_val != NULL)
		q = strdup(e + 1);
	}
  }

  if (env_name != NULL)
	*env_name = p;
  if (env_val != NULL)
	*env_val = q;

  return(p);
}

char *
ul_str(unsigned long val)
{
  char *buf;

  /*
   * 32 bits -> 10 digits max
   * 64 bits -> 20 digits max
   */
  buf = (char *) safe_malloc(32);
  sprintf(buf, "%lu", val);

  return(buf);
}

#ifndef HAVE_STRNCASECMP
int
strncasecmp(const char *s1, const char *s2, size_t len)
{
  int uc1, uc2;
  const char *p1, *p2;
  size_t c;

  p1 = (const char *) s1;
  p2 = (const char *) s2;
  c = len;

  while (*p1 != '\0' && *p2 != '\0' && c--) {
	uc1 = tolower((int) *p1++);
	uc2 = tolower((int) *p2++);
	if (uc1 != uc2)
	  return(((unsigned char) uc1 > (unsigned char) uc2) ? 1 : -1);
  }

  if (*p1 != '\0' && *p2 == '\0')
	return(1);
  if (*p1 == '\0' && *p2 != '\0')
	return(-1);

  return(0);
}
#endif

/*
 * Return 1 if CH1 and CH2 are equal, case-independently if NOCASE is non-zero,
 * 0 otherwise.
 */
static int
char_eq(int ch1, int ch2, int nocase)
{

  if (nocase)
	return(tolower(ch1) == tolower(ch2));

  return(ch1 == ch2);
}

static char *
strstr_internal(const char *str, const char *substr, int nocase)
{
  char ch, ch_first, *s, *ss;
  size_t len;
  int (*cmp)(const char *s1, const char *s2, size_t len);

  if (nocase)
	cmp = strncasecmp;
  else
	cmp = strncmp;

  s = (char *) str;
  if ((ch_first = *substr++) != '\0') {
	len = strlen(substr);
	ss = (char *) substr;
	do {
	  do {
		if ((ch = *s++) == '\0')
		  return(NULL);
	  } while (!char_eq(ch, ch_first, nocase));
	} while (cmp(s, ss, len) != 0);
	s--;
  }

  return(s);
}

/*
 * We use libraries that include a function by this name; life is easier
 * if we know that this implementation is being used.
 * But the native version might be optimized, so use it when possible.
 * This implementation is poor if STR is long - there are much better methods.
 */
char *
dacs_strstr(const char *str, const char *substr)
{

  return(strstr_internal(str, substr, 0));
}

/*
 * Like strstr(), but use case insensitive character comparison.
 * We use libraries that include a function by this name; life is easier
 * if we know that this implementation is being used.
 * But the native version might be optimized, so use it when possible.
 * This implementation is poor if STR is long - there are much better methods
 * (such as strqs()).
 */
char *
strcasestr(const char *str, const char *substr)
{

  return(strstr_internal(str, substr, 1));
}

static char *
strrstr_internal(const char *str, const char *substr, int nocase)
{
  const char *found, *p;
  size_t substr_len;
  char *(*cmp)(const char *str, const char *substr);

  if (nocase)
	cmp = strcasestr;
  else
	cmp = strstr;

  found = NULL;
  substr_len = strlen(substr);

  p = str;
  do {
	if ((p = cmp(p, substr)) == NULL)
	  break;
	found = p++;
  } while (*p != '\0');

  return((char *) found);
}

/*
 * Look for the start of the *last* occurrence of SUBSTR in STR.
 * This may not be as efficient as searching the string backwards, but it
 * may leverage an optimized strstr() implementation and in any case we
 * probably don't care much about its relative efficiency when STR is not
 * very long.
 */
char *
strrstr(const char *str, const char *substr)
{

  return(strrstr_internal(str, substr, 0));
}

char *
strrcasestr(const char *str, const char *substr)
{

  return(strrstr_internal(str, substr, 1));
}

enum {
  STRQS_ALPHABET_SIZE = 128
};

/*
 * Construct the delta 1 shift table from PATTERN of length PATTERN_LEN
 * and return it.  If TD_ONE is non-NULL, assume it is a buffer of the correct
 * length to use, otherwise allocate such a buffer.
 * Compute the length of PATTERN if PATTERN_LEN is zero.
 */
unsigned int *
strqs_init(int *td_one, const char *pattern, size_t pattern_len, int nocase)
{
  int i;
  unsigned int *td;
  const char *p;
  size_t plen;

  if (td_one == NULL)
	td = (unsigned int *) malloc(STRQS_ALPHABET_SIZE * sizeof(unsigned int));
  else
	td = (unsigned int *) td_one;

  if (pattern_len == 0)
	plen = strlen(pattern);
  else
	plen = pattern_len;

  for (i = 0; i < STRQS_ALPHABET_SIZE; i++)
	td[i] = plen + 1;

  for (p = pattern; *p != '\0'; p++) {
	if (nocase && isalpha((int) *p)) {
	  int cl, cu;

	  cl = tolower((int) *p);
	  cu = toupper((int) *p);
	  td[cl] = td[cu] = plen - (p - pattern);
	}
	else {
	  int c;

	  c = (int) (*p & 0377);
	  td[c] = plen - (p - pattern);
	}
  }

  return(td);
}

/*
 * Extended interface to strqs() where all of the arguments have been
 * provided.  This can avoid recomputing them if the caller needs to look
 * for the same pattern again.
 * TD_ONE is the delta 1 shift table produced by strqs_init(), ENDP points to
 * the character following the end of the text (STR), PLEN is the length of the
 * pattern (SUBSTR), and NOCASE is non-zero if case-insensitive comparisons
 * should be used (in which case TD_ONE must have been produced with the
 * NOCASE flag to strqs_init()).
 */
char *
strqsx(const char *str, const char *endp, const char *substr,
	   unsigned int *td_one, size_t plen, int nocase)
{
  const char *p, *t, *tx;

  tx = str;
  while ((tx + plen) < endp) {
	for (p = substr, t = tx; *p != '\0'; p++, t++) {
	  if (nocase) {
		if (tolower((int) *p) != tolower((int) *t))
		  break;
	  }
	  else if (*p != *t)
		break;
	}
	if (*p == '\0')
	  return((char *) tx);
	tx += td_one[*(tx + plen) & 0377];
  }

  return(NULL);
}

static char *
strqs_internal(const char *str, const char *substr, int nocase)
{
  unsigned int *td_one;
  char *val;
  size_t str_len, substr_len;

  str_len = strlen(str);
  substr_len = strlen(substr);

  td_one = strqs_init(NULL, substr, substr_len, nocase);

  val = strqsx(str, str + str_len, substr, td_one, substr_len, nocase);

  return(val);
}

/*
 * Find the start of the first occurrence of SUBSTR in STR, like strstr().
 * This is an implementation of "Quick Search":
 * "A Very Fast Substring Search Algorithm", Daniel M. Sunday, CACM, Vol. 33,
 * No. 8 (August 1990), pp. 132-142.
 * The paper describes some variations of this algorithm that may be a little
 * better.
 * 
 */
char *
strqs(const char *str, const char *substr)
{
  char *val;

  val = strqs_internal(str, substr, 0);
  return(val);
}

/*
 * A case-insensitive variant of strqs().
 */
char *
strcaseqs(const char *str, const char *substr)
{
  char *val;

  val = strqs_internal(str, substr, 1);
  return(val);
}

typedef struct Str_test_parse {
  char *str;
  char *name;
  char *value;
  char *endstr;
  int flags;
  int sep_char;
  int end_char;
  int rc;
} Str_test_parse;

static Str_test_parse str_test_parse_strings[] = {
  { "a: b",        "a",  " b",  "",
	KWV_PARSE_DEFAULT,          ':', 0,   0 },
  { "a: \"bcd\"",         "a",  "bcd",  "",
	KWV_PARSE_DEFAULT,          ':', 0,   0 },
  { "a=\"\"",      "a",  "",    "",
	KWV_PARSE_NEED_VALUE,       0, 0,   0 },
  { "a='b'",       "a",  "'b'",   "",
	KWV_PARSE_KEEP_QUOTES,      0, 0,   0 },
  { "a=\"b\"; c=d","a",  "b",     "; c=d",
	KWV_PARSE_DEFAULT,          0, ';', 0 },
  { "a=b; c=d",    "a",  "b",     "; c=d",
	KWV_PARSE_DEFAULT,          0, ';', 0 },
  { "a=\"b=\"c\"", "a",  "b=\"c", "",
	KWV_PARSE_INTERNAL_QUOTES,  0, 0,   0 },
  { "ab=cd\n",     "ab", "cd",    "\n",
    KWV_PARSE_DEFAULT,          0, 0,   0 },
  { "a = \"bb\" ", "a",  "bb",    " ",
	KWV_PARSE_DEFAULT,          0, 0,   0 },
  { "a = b",       "a",  " b",    "",
	KWV_PARSE_DEFAULT,          0, 0,   0 },
  { "=a",          "",   "",      "",
	KWV_PARSE_DEFAULT,          0, 0,  -1 },
  { "a=",          "a",  "",      "",
	KWV_PARSE_DEFAULT,          0, 0,   0 },
  { "a=  ",        "a",  "  ",    "",
	KWV_PARSE_DEFAULT,          0, 0,   0 },
  { "a=b",         "a",  "b",     "",
	KWV_PARSE_DEFAULT,          0, 0,   0 },
  { "a='b'",       "a",  "b",     "",
	KWV_PARSE_DEFAULT,          0, 0,   0 },
  { "a=\"b\"",     "a",  "b",     "",
	KWV_PARSE_DEFAULT,          0, 0,   0 },
  { "  a=\"b\"",   "a",  "b",     "",
	KWV_PARSE_DEFAULT,          0, 0,   0 },
  { "  a=\"b\"  ", "a",  "b",     "  ",
    KWV_PARSE_DEFAULT,          0, 0,   0 },
  { "a = \"b\"",   "a",  "b",     "",
	KWV_PARSE_DEFAULT,          0, 0,   0 },
  { "a = \"b",     "a",  "b",     "",
	KWV_PARSE_DEFAULT,          0, 0,  -1 },
  { NULL,          NULL, NULL,    NULL,
    KWV_PARSE_DEFAULT,          0, 0,   0 }
};

static int
test_parse(FILE *fp)
{
  int i, rc;
  char *endptr, *name, *str, *value;
  Kwv_parse_conf conf;
  Str_test_parse *stp;

  fprintf(fp, "Testing string parsing functions...\n");
  for (i = 0, stp = str_test_parse_strings; stp->str != NULL; stp++, i++) {
	name = NULL;
	value = NULL;
	endptr = NULL;
	conf.sep_char = stp->sep_char;
	conf.flags = stp->flags;
	conf.end_char = stp->end_char;
	conf.errmsg = NULL;
	rc = kwv_parse_qstr(stp->str, &name, &value, &endptr, &conf);
	if (rc == stp->rc && rc == -1) {
	  /* This is supposed to be an error case, so it's ok. */
	  continue;
	}

	if (rc != stp->rc || name == NULL || value == NULL || endptr == NULL
		|| !streq(name, stp->name) || !streq(value, stp->value)
		|| !streq(endptr, stp->endstr)) {
	  fprintf(fp, "test_parse case %d failed:\n", i);
	  fprintf(fp, "  string:%s\n", stp->str);
	  fprintf(fp, "  rc:%d (expected:%d)\n", rc, stp->rc);
	  fprintf(fp, "  name:%s (expected:%s)\n",
			  name == NULL ? "NULL" : name,
			  stp->name == NULL ? "NULL" : stp->name);
	  fprintf(fp, "  value:%s (expected:%s)\n",
			  value == NULL ? "NULL" : value,
			  stp->value == NULL ? "NULL" : stp->value);
	  fprintf(fp, "  endptr:%s (expected:%s)\n",
			  endptr == NULL ? "NULL" : endptr,
			  stp->endstr == NULL ? "NULL" : stp->endstr);
	  return(-1);
	}
  }

  return(0);
}

/*
 * Destructively carve STR into a vector of substrings.  Each element of STR
 * is separated from the next by DELIMIT, which is a string.
 * At most LIMIT substrings will be returned (a LIMIT of zero implies no
 * limit).
 * If DELIMIT is "", then STR is broken up into individual characters.
 *
 * If INC_DELIMIT is 1 it means that the delimiter should be included
 * as the first character of each vector element; if it is -1 it means the
 * delimiter should be included as the last character of each vector element;
 * and if 0 or any undefined value it means that the delimiter should not
 * be included in vector elements.
 *
 * XXX there is no way to escape the delimiter string or to quote it;
 * for example, strsplit("a,'b,c',d", ",", 0) will return "a", "'b", "c'", "d",
 * when the caller would probably like to specify the single quote as being
 * special so that "a", "b,c", and "d" are parsed out.
 *
 * Example cases:
 *                                  INC_DELIMIT
 *                     0                 1                   -1
 *
 *  "/"         -> ["",""]             ["/"]               ["/"]
 *  "//"        -> ["","",""]          ["/","/"]           ["/","/"]
 *  "/xxx"      -> ["","xxx"]          ["/xxx"]            ["/","xxx"]
 *  "/xxx/"     -> ["","xxx", ""]      ["/xxx","/"]        ["/","xxx/"]
 *  "xxx"       -> ["xxx"]             ["xxx"]             ["xxx"]
 *  "/xxx/yyy"  -> ["","xxx","yyy"]    ["/xxx","/yyy"]     ["/","xxx/","yyy"]
 *  "xxx/yyy"   -> ["xxx","yyy"]       ["xxx","/yyy"]      ["xxx/","yyy"]
 *  "xxx/yyy/"  -> ["xxx","yyy",""]    ["xxx","/yyy","/"]  ["xxx/","yyy/"]
 *  "/xxx/yyy/" -> ["","xxx","yyy",""] ["/xxx","/yyy","/"] ["/","xxx/","yyy/"]
 *
 * Two consecutive DELIMIT strings, or a trailing or initial DELIMIT string,
 * may create an empty element, depending on INC_DELIMIT.
 *
 * Example: strsplit("a=1\nb=2\nc=3\n", "\n", 0)
 * will break the string into a set of three pointers to null-terminated
 * strings (without newlines):
 * [0] -> "a=1"
 * [1] -> "b=2"
 * [2] -> "c=3"
 */
static Dsvec *
strsplit_internal(Dsvec *odsv, char *str, const char *delimit, int limit,
				  int inc_delimit)
{
  int ndel;
  char *e, *dstart, *s, *scanp;
  size_t dlen;
  Dsvec *dsv;

  if (*str == '\0')
	return(NULL);

  dlen = strlen(delimit);
  if (odsv != NULL)
	dsv  = odsv;
  else
	dsv = dsvec_init(NULL, sizeof(char **));

  s = scanp = str;
  ndel = 0;
  while (*s != '\0') {
	if ((limit != 0 && dsvec_len(dsv) == (limit - 1))
		|| (dlen > 0 && (dstart = strstr(scanp, delimit)) == NULL)) {
	  dsvec_add_ptr(dsv, s);
	  break;
	}

	ndel++;
	if (dlen > 0) {
	  /*
	   * A non-empty delimiter has been found, starting at S and ending
	   * at DEND.
	   */
	  if (inc_delimit < 0) {
		/* Include delimiter at end of S. */
		e = dstart + dlen;
		dsvec_add_ptr(dsv, strndup(s, e - s));
		s = scanp = e;
	  }
	  else if (inc_delimit > 0) {
		if (dstart != s) {
		  /*
		   * This element does not begin with a delimiter.  Copy up to
		   * but not including the delimiter.
		   */
		  e = dstart;
		  dsvec_add_ptr(dsv, strndup(s, e - s));
		  s = scanp = dstart;
		}
		else {
		  /*
		   * This element begins with a delimiter.  Just move past it and
		   * continue.
		   */
		  scanp = dstart + dlen;
		}
	  }
	  else if (inc_delimit == 0) {
		dsvec_add_ptr(dsv, s);
		*dstart = '\0';
		s = scanp = dstart + dlen;
	  }
	}
	else {
	  dsvec_add_ptr(dsv, strndup(s, 1));
	  s++;
	}
  }

  /* If STR ends with a delimiter, there is an "empty" element on the end. */
  if (dsvec_len(dsv) == ndel && inc_delimit == 0)
	dsvec_add_ptr(dsv, "");

  return(dsv);
}

Dsvec *
strsplit(const char *str, const char *delimit, int limit)
{

  return(strsplit_internal(NULL, strdup(str), delimit, limit, 0));
}

Dsvec *
strsplitd(const char *str, const char *delimit, int limit, int inc_delimit)
{

  return(strsplit_internal(NULL, strdup(str), delimit, limit, inc_delimit));
}

Dsvec *
strsplit_nocopy(char *str, const char *delimit, int limit)
{

  return(strsplit_internal(NULL, str, delimit, limit, 0));
}

Dsvec *
strsplit_x(Dsvec *dsv, const char *delimit, int limit)
{
  int i;
  char *line;
  Dsvec *words;

  words = dsvec_init(NULL, sizeof(char *));
  for (i = 0; i < dsvec_len(dsv); i++) {
	line = (char *) dsvec_ptr_index(dsv, i);
	strsplit_internal(words, line, delimit, limit, 0);
  }

  return(words);
}

/*
 * Like strsplit(), except regular expression matching is used.
 * If an error occurs and ERRMSG is non-NULL, it is set to a descriptive
 * error message.
 */
Dsvec *
strsplit_re(const char *str, const char *regex, int limit, int inc_delimit,
			char **errmsg)
{
  int n, st;
  char errbuf[100], *s;
  regex_t preg;
  regmatch_t pmatch[1];
  Dsvec *dsv;

  if (*regex == '\0') {
	if (errmsg != NULL)
	  *errmsg = ds_xprintf("Invalid regular expression");
	return(NULL);
  }

  if ((st = regcomp(&preg, regex, REG_EXTENDED)) != 0) {
	if (errmsg != NULL) {
	  regerror(st, &preg, errbuf, sizeof(errbuf));
	  *errmsg = strdup(errbuf);
	}
	return(NULL);
  }

  s = strdup(str);
  dsv = dsvec_init(NULL, sizeof(char **));

  n = 1;
  while (*s != '\0') {
	if (limit != 0 && n == limit) {
	  dsvec_add_ptr(dsv, s);
	  break;
	}

	/* Advance to the next delimiter, if present. */
	st = regexec(&preg, s, 1, pmatch, 0);
	if (st != 0) {
	  if (st != REG_NOMATCH) {
		if (errmsg != NULL) {
		  regerror(st, &preg, errbuf, sizeof(errbuf));
		  *errmsg = strdup(errbuf);
		}
		return(NULL);
	  }
	  /* No more elements, this is the last one. */
	  dsvec_add_ptr(dsv, s);
	  break;
	}
	else {
	  if (inc_delimit == 1)
		dsvec_add_ptr(dsv, strndup(s, pmatch[0].rm_eo));
	  else {
		s[pmatch[0].rm_so] = '\0';
		dsvec_add_ptr(dsv, s);
	  }
	  s += pmatch[0].rm_eo;
	}

	n++;
  }

  regfree(&preg);

  return(dsv);
}

/*
 * Break pathname PATH into its slash-delimited components, any of which may
 * be empty.  Slashes cannot be escaped.
 * The slashes are *not* kept.  A trailing slash does not create an element.
 * Set IS_ABS to non-zero if PATH is absolute, or zero if it is relative.
 * Set TRAILING to non-zero if PATH is not "/" and ends with a slash.
 * Try strjoin() to reconstruct a path from components.
 *
 * Examples:
 *  PATH      Return                    IS_ABS TRAILING
 *  "/"        NULL                        1       0
 *  "//"       one element (null string)   1       1
 *  "a"        one element ("a")           0       0
 *  "a/"       one element ("a")           0       1
 *  "/a"       one element ("a")           1       0
 *  "/a/"      one element ("a")           1       1
 *  "/a/b"     two elements ("a", "b")     1       0
 *  "///"      two elements (null strings) 1       1
 */
Dsvec *
strpathparse(const char *path, int *is_abs, int *trailing)
{
  int is_root;
  const char *p, *s;
  Dsvec *dsv;

  s = path;
  is_root = 0;
  *trailing = 0;

  if (*s == '/') {
	*is_abs = 1;
	s++;
	if (*s == '\0')
	  is_root = 1;
  }
  else
	*is_abs = 0;

  dsv = NULL;
  p = s;
  while (*p != '\0') {
	if (*p == '/') {
	  if (dsv == NULL)
		dsv = dsvec_init(NULL, sizeof(char *));
	  dsvec_add_ptr(dsv, strndup(s, p - s));
	  s = p + 1;
	  if (*(p + 1) == '\0' && !is_root)
		*trailing = 1;
	}
	p++;
  }

  if (*s != '\0')
	dsvec_add_ptr(dsv, strndup(s, p - s + 1));

  return(dsv);
}

/*
 * Convert the elementary types described by DESC into a string,
 * with elements separated by DELIMIT.
 * This is largely an experimental kludge that leverages existing functions
 * to externalize and internalize data structures but it will do until there's
 * a need for something better.
 *
 * XXX consider extending this to handle various string encodings; a catch is
 * that a resulting encoding must either not contain DELIMIT or must
 * automagically escape/unescape every occurrence.
 */
char *
strpack(Strpack *desc, const char *delimit)
{
  int n;
  Ds *ds;
  Strpack *dp;

  dp = desc;
  ds = ds_init(NULL);
  for (n = 0, dp = desc; dp->ptr != NULL; dp++, n++) {
	char *el;
	const char *sep;

	sep = (n == 0) ? "" : delimit;
	switch (dp->type) {
	case STRNUM_STR:
	  el = *(char **) dp->ptr;
	  ds_asprintf(ds, "%s%s", sep, non_null(el));
	  break;
	case STRNUM_I:
	  ds_asprintf(ds, "%s%d", sep, *(int *) dp->ptr);
	  break;
	case STRNUM_L:
	  ds_asprintf(ds, "%s%ld", sep, *(long *) dp->ptr);
	  break;
	case STRNUM_UL:
	  ds_asprintf(ds, "%s%lu", sep, *(unsigned long *) dp->ptr);
	  break;
	case STRNUM_TIME_T:
	  ds_asprintf(ds, "%s%lu", sep, *(time_t *) dp->ptr);
	  break;
	case STRNUM_MODE_T:
	  ds_asprintf(ds, "%s%lu", sep, *(mode_t *) dp->ptr);
	  break;
	case STRNUM_PID_T:
	  ds_asprintf(ds, "%s%lu", sep, *(pid_t *) dp->ptr);
	  break;
	case STRNUM_IN_PORT_T:
	  ds_asprintf(ds, "%s%lu", sep, *(in_port_t *) dp->ptr);
	  break;
	default:
	  return(NULL);
	}
  }

  return(ds_buf(ds));
}

/*
 * Split STR into elements that are separated by DELIMIT (up to LIMIT of them)
 * and convert and assign each element according to DESC, a vector of
 * descriptions.
 *
 * The caller is responsible for ensuring that any string elements are
 * properly encoded so that they do not include DELIMIT.
 *
 * Return the number of unpacked elements, or -1 if an error occurs.
 */
int
strunpack(const char *str, const char *delimit, int limit, Strpack *desc)
{
  int i;
  Dsvec *dsv;
  Strpack *dp;

  if ((dsv = strsplit(str, delimit, limit)) == NULL)
	return(-1);

  dp = desc;
  for (i = 0; i < dsvec_len(dsv); i++) {
	char *el;

	if (dp->ptr == NULL)
	  return(-1);

	el = (char *) dsvec_ptr_index(dsv, i);
	if (strnum(el, dp->type, dp->ptr) == -1)
	  return(-1);

	dp++;
  }

  if (dp->ptr != NULL)
	return(-1);

  return(i);
}

static MAYBE_UNUSED void
strpack_unpack_test()
{
  int st;
  char *desc;
  struct Strpack_test {
	int iii;
	char *str;
	time_t ttt;
	unsigned long ulul;
  } unpack_test;
  Strpack test_desc[] = {
	{ STRNUM_I,      &unpack_test.iii },
	{ STRNUM_STR,    &unpack_test.str },
	{ STRNUM_TIME_T, &unpack_test.ttt },
	{ STRNUM_UL,     &unpack_test.ulul },
	{ STRNUM_ERR,    NULL }
  };

  st = strunpack("17,hello world,1234567,1010101010", ",", 0, test_desc);
  fprintf(stderr, "%d\n", st);
  desc = strpack(test_desc, ",");
  fprintf(stderr, "%s\n", desc);
}

char *
strjoin_subset(Dsvec *dsv, unsigned int start, int len, char *sepstr)
{
  unsigned int ui;
  char *s, *sep;
  Ds ds;

  if (sepstr == NULL)
	sep = "";
  else
	sep = sepstr;

  if (len == -1)
	len = dsvec_len(dsv);

  ds_init(&ds);
  for (ui = start; ui < dsvec_len(dsv) && ui < len; ui++) {
	s = (char *) dsvec_ptr_index(dsv, ui);
	if (s != NULL)
	  ds_asprintf(&ds, "%s%s", (ui == 0) ? "" : sep, s);
  }

  return(ds_buf(&ds));
}

/*
 * Join together the elements of DSV, each separated from the next
 * by SEPSTR (unless it is NULL).  NULL elements are ignored, although
 * empty (nul) strings are not.
 * Return the new string.
 */
char *
strjoin(Dsvec *dsv, char *sepstr)
{
#ifdef NOTDEF
  unsigned int ui;
  char *s, *sep;
  Ds ds;

  if (sepstr == NULL)
	sep = "";
  else
	sep = sepstr;

  ds_init(&ds);
  for (ui = 0; ui < dsvec_len(dsv); ui++) {
	s = (char *) dsvec_ptr_index(dsv, ui);
	if (s != NULL)
	  ds_asprintf(&ds, "%s%s", (ui == 0) ? "" : sep, s);
  }

  return(ds_buf(&ds));
#else

  return(strjoin_subset(dsv, 0, -1, sepstr));
#endif
}

static MAYBE_UNUSED Strsubst test_strsubst[] = {
  { "<",  "&lt;"  },
  { "&",  "&amp;" },
  { "hi", "hello" },
  { NULL, NULL    }
};

/*
 * Return a new string formed by replacing (exactly) matching substrings with
 * corresponding replacement text (without rescanning the replacement text).
 */
Ds *
strsubst(char *str, Strsubst *tab)
{
  char *e, *p;
  Ds *ds;
  Strsubst *t;

  ds = ds_init(NULL);
  p = str;
  while (*p != '\0') {
	for (t = tab; t->src_text != NULL; t++) {
	  if ((e = strprefix(p, t->src_text)) != NULL)
		break;
	}
	if (t->src_text != NULL) {
	  ds_append(ds, t->subst_text);
	  p = e;
	}
	else {
	  ds_appendc(ds, (int) *p);
	  p++;
	}
  }
  ds_appendc(ds, (int) '\0');

  return(ds);
}

/*
 * Scan STR, decoding/unescaping C-style character constants, and return
 * a new string or NULL if invalid encoding is found.
 * Note that the decoded string might be binary (e.g., "\001\002\003").
 */
Ds *
struncescape(char *str)
{
  int cc, i;
  char *p;
  Ds *ds;

  ds = ds_init(NULL);
  p = str;

  while (*p != '\0') {
	if (*p == '\\') {
	  /* Handle C-style character constants. */
	  p++;
	  if (*p == '\0')	{ /* Ends with a backslash? */
		return(NULL);
	  }

	  /* Handle e.g., \0123 */
	  if (is_octdigit((int) *p)) {
		cc = 0;
		for (i = 0; i < 3; i++) {
		  if (!is_octdigit((int) *p))
			break;
		  cc = cc * 8 + octdigit_val((int) *p);
		  p++;
		}
		ds_appendc(ds, cc);
	  }
	  else {
		/* Harbison & Steele 2.7.6 */
		switch ((int) *p) {
		case 'a':
		  cc = '\a'; break;
		case 'b':
		  cc = '\b'; break;
		case 'f':
		  cc = '\f'; break;
		case 'n':
		  cc = '\n'; break;
		case 'r':
		  cc = '\r'; break;
		case 't':
		  cc = '\t'; break;
		case 'v':
		  cc = '\v'; break;
		default:
		  cc = *p; break;
		}

		ds_appendc(ds, cc);
		p++;
	  }
	}
	else
	  ds_appendc(ds, (int) *p++);
  }

  ds_appendc(ds, (int) '\0');
  return(ds);
}

/*
 * Convert the null-terminated string PTR (if SLEN == 0) or the
 * byte sequence PTR of length SLEN (SLEN != 0) into a C-style escaped
 * string.
 */
char *
strcescape(char *ptr, size_t slen)
{
  int ch, esc, i;
  char *s;
  size_t len;
  Ds ds;

  if (slen == 0)
	len = strlen(ptr);
  else
	len = slen;

  s = ptr;
  ds_init(&ds);
  for (i = 0; i < len; i++) {
	ch = *s++;
	switch (ch) {
	case '\a':
	  esc = 'a'; break;
	case '\b':
	  esc = 'b'; break;
	case '\f':
	  esc = 'f'; break;
	case '\n':
	  esc = 'n'; break;
	case '\r':
	  esc = 'r'; break;
	case '\t':
	  esc = 't'; break;
	case '\v':
	  esc = 'v'; break;

	default:
	  esc = 0;
	  if (ch == 0)
		ds_append(&ds, "\\0");		/* A special case */
	  else if (ch == 0177 || ch < 040)
		ds_append(&ds, ds_xprintf("\\%.3o", ch));
	  else
		ds_appendc(&ds, ch);
	  break;
	}

	if (esc) {
	  ds_appendc(&ds, (int) '\\');
	  ds_appendc(&ds, esc);
	}
  }

  ds_appendc(&ds, (int) '\0');
  return(ds_buf(&ds));
}

/*
 * Parse a string into fields.
 *
 * This is a quick & dirty hack to leverage mkargv() to extract fields.
 */
Strfields *
strpfields(const char *str, Strfields *fields)
{
  Mkargv *c;
  Strfields *f;

  if (fields == NULL) {
	f = ALLOC(Strfields);
	f->conf = NULL;
  }
  else
	f = fields;

  f->argc = 0;
  f->argv = NULL;

  if (f->conf == NULL) {
	c = ALLOC(Mkargv);
	c->keepq = c->keepws = 0;
	c->startq = c->endq = NULL;
	c->ifs = " \t";
	f->conf = c;
  }

  if ((f->argc = mkargv((char *) str, f->conf, &f->argv)) == -1)
	  return(NULL);

  return(f);
}

#include <limits.h>

/*
 * Format (join) selected fields into a single string.
 * Return that string, or NULL if an error is encountered.
 *
 * Format:
 *   %j - set field join separator to the string argument
 *   %nf, %:nf, %n:f, %n:mf, %:f - field selector, where n and m are integers
 *     o "%1f" means interpolate spec->argv[1], "%2:3f" means interpolate
 *       spec->argv[2], the join separator (unless NULL), and spec->argv[3])
 *     o a reference to a non-existant field is an error
 *   %s - interpolate the string argument
 *   %% - interpolate a per cent character
 * Anything else is interpolated verbatim.
 */
char *
strffields(const char *fmt, Strfields *spec, ...)
{
  int first, i;
  const char *endp, *f;
  char *errmsg, *j, *range, *s;
  va_list ap;
  Ds ds;
  static const char *range_char = "0123456789-,:+";

  if (fmt == NULL || spec == NULL || spec->argc == 0 || spec->argv == NULL)
	return(NULL);

  va_start(ap, spec);
  ds_init(&ds);
  f = fmt;
  j = NULL;

  while (*f != '\0') {
	if (*f == '%') {
	  f++;

	  if (*f == '%')
		ds_appendc(&ds, '%');
	  else if (*f == 'j')
		j = va_arg(ap, char *);
	  else if (*f == 's') {
		s = va_arg(ap, char *);
		ds_append(&ds, s);
	  }
	  else if (strchr(range_char, (int) *f) != NULL) {
		endp = f;
		while (*endp != '\0' && strchr(range_char, (int) *endp) != NULL)
		  endp++;
		if (*endp != 'f') {
		  va_end(ap);
		  return(NULL);
		}
		range = strndup(f, endp - f);
		if (range_is_valid(range, NULL, &errmsg) == -1) {
		  va_end(ap);
		  return(NULL);
		}
		first = 1;
		for (i = 0; i < spec->argc; i++) {
		  if (range_test(i, range, NULL, NULL) == 1) {
			if (first)
			  first = 0;
			else if (j != NULL)
			  ds_append(&ds, j);
			ds_append(&ds, spec->argv[i]);
		  }
		}
		f = endp;
	  }
	  else
		ds_appendc(&ds, *f);
	}
	else
	  ds_appendc(&ds, *f);
	f++;
  }
  ds_appendc(&ds, '\0');

  va_end(ap);

  return(ds_buf(&ds));
}

#ifndef REG_BASIC
/*
 * A FreeBSD synonym for 0, provided as a counterpart to REG_EXTENDED to improve
 * readability.
 */
#define REG_BASIC		0
#endif

/*
 * Convert a string representation of a regex(3) flag expression into an integer
 * suitable for use as an argument to strsregex(), etc.
 * Refer to regex(3) for a flag definitions; the "REG_" prefix may be omitted, and
 * "0" is recognized as zero.
 * Spaces are ignored in STR, elements are separated by a comma or '|'.
 * Return -1 if STR is invalid, which assumes that -1 is not a legitimate value.
 */
static int
strregexflags(char *str)
{
  int cflags, i;
  char *errmsg, *s;
  Dsvec *dsv;

  s = strelide(str, " ");
  errmsg = NULL;
  dsv = strsplit_re(s, "\\||,", 0, 0, &errmsg);
  if (dsv == NULL) {
	if (errmsg != NULL)
	  fprintf(stderr, "%s\n", errmsg);
	return(-1);
  }

  cflags = 0;
  for (i = 0; i < dsvec_len(dsv); i++) {
	char *flag;

	flag = (char *) dsvec_ptr_index(dsv, i);
	if (streq(flag, "0"))
	  ;
	else if (streq(flag, "REG_ICASE") || streq(flag, "ICASE"))
	  cflags |= REG_ICASE;
	else if (streq(flag, "REG_EXTENDED") || streq(flag, "EXTENDED"))
	  cflags |= REG_EXTENDED;
	else if (streq(flag, "REG_BASIC") || streq(flag, "BASIC"))
	  cflags |= REG_BASIC;
#ifdef REG_NOSPEC
	else if (streq(flag, "REG_NOSPEC") || streq(flag, "NOSPEC"))
	  cflags |= REG_NOSPEC;
#endif
	else if (streq(flag, "REG_NOSUB") || streq(flag, "NOSUB"))
	  cflags |= REG_NOSUB;
	else if (streq(flag, "REG_NEWLINE") || streq(flag, "NEWLINE"))
	  cflags |= REG_NEWLINE;
#ifdef REG_PEND
	else if (streq(flag, "REG_PEND") || streq(flag, "PEND"))
	  cflags |= REG_PEND;
#endif
	else
	  return(-1);
  }

  return(cflags);
}

#ifndef STRREGEX_MAX_MATCHES
#define STRREGEX_MAX_MATCHES	20
#endif

/*
 * Regular expression string matching
 * SRC is the string that is matched against.
 * If REGEX is non-NULL, it is the regular expression; otherwise, PREG_IN
 * is expected to be a compiled regular expression.  If REGEX is non-NULL
 * and PREG_IN is non-NULL, then the compiled REGEX is stored in PREG_IN.
 * CFLAGS are regex compilation flags, or zero if there are none.
 * If MATCHES is non-NULL, element 0 will be the substring of SRC that was
 * matched by the entire regular expression; element 1 will be the substring
 * matched by the first parenthesized subexpression (if any), and so on
 * for each subexpression up to the compile-time maximum number of substrings.
 * If an error occurs and ERRMSG is non-NULL, it will be set to a descriptive
 * error string.
 *
 * Return 0 if there is no match, -1 if there is an error; otherwise,
 * return the number of subexpression matches plus 1.
 *
 * See regcomp(3).
 */
int
strregex(char *src, char *regex, regex_t **preg_in, int cflags,
		 Dsvec *matches, char **errmsg)
{
  int i, n, rc, st;
  Ds ds;
  regmatch_t m[STRREGEX_MAX_MATCHES];
  regex_t *preg;

  if (regex == NULL) {
	if (preg_in != NULL)
	  preg = *preg_in;
	else
	  return(-1);
  }
  else if (preg_in != NULL)
	preg = *preg_in = ALLOC(regex_t);
  else
	preg = ALLOC(regex_t);

  rc = -1;
  if (regex != NULL && (st = regcomp(preg, regex, cflags)) != 0)
	goto fail;

  if ((st = regexec(preg, src, STRREGEX_MAX_MATCHES, m, 0)) == 0) {
	rc = 0;
	for (i = 0; m[i].rm_so != -1 && i < STRREGEX_MAX_MATCHES; i++) {
	  if (matches != NULL) {
		n = m[i].rm_eo - m[i].rm_so;
		ds_init(&ds);
		ds_concatn(&ds, src + m[i].rm_so, n);
		dsvec_add_ptr(matches, ds_buf(&ds));
	  }
	  rc++;
	}
  }
  else if (st != REG_NOMATCH) {
	char errbuf[100];

  fail:
	if (errmsg != NULL) {
	  errbuf[0] = '\0';
	  regerror(st, preg, errbuf, sizeof(errbuf));
	  *errmsg = ds_xprintf("bad regular expression: \"%s\": %s",
						   regex, errbuf);
	}
	rc = -1;
  }
  else
	rc = 0;

  if (preg != NULL && preg_in == NULL)
	regfree(preg);

  return(rc);
}

/*
 * Simple interface to strregex().
 * Match REGEX against SRC, returning -1 if there is a problem with REGEX,
 * 0 if there is no match, or something greater than 0 if there is a match.
 */
int
strmatch(char *src, char *regex)
{
  int st;
  char *errmsg;

  st = strregex(src, regex, NULL, 0, NULL, &errmsg);

  return(st);
}

/*
 * Do the hard part of regex-based substitution, applying REGEX to
 * SRC and replacing the matched part of SRC according to REPL.
 * CFLAGS are regex matching flags, REPEAT is non-zero if
 * matching/replacement should be repeated until unsuccessful (like the
 * 'g' modifier in s/REGEX/REPL/g).
 *
 * Submatches of SRC specified in REGEX (e.g., "foo (.*) baz") are
 * referenced as variables ${1} through ${9}, with '&' and ${0} representing
 * the entire part of SRC that was matched.
 *
 * Return the resulting string, or NULL upon error.
 * If there's no match at all, NULL is returned.
 */
char *
strregexsub(char *src, char *regex, char *repl, int cflags, int repeat,
			char **errmsg)
{
  int ind, st;
  char *r, *text;
  regex_t preg;
  regoff_t start_off, next_off;
  regmatch_t pmatch[10];
  Ds value;

  if ((st = regcomp(&preg, regex, cflags)) != 0)
	goto fail;

  ds_init(&value);

  text = src;
  while (1) {
	if ((st = regexec(&preg, text, 10, pmatch, 0)) != 0) {
	  if (st != REG_NOMATCH)
		goto fail;
	  if (ds_len(&value) == 0)
		ds_set(&value, "");
	  else if (*text != '\0')
		ds_concat(&value, text);
	  break;
	}

	start_off = pmatch[0].rm_so;
	next_off = pmatch[0].rm_eo;
	if (start_off != 0)
	  ds_concatn(&value, text, start_off);

	for (r = repl; *r != '\0'; r++) {
	  if (*r == '&')
		ds_concatn(&value, text + start_off, next_off - start_off);
	  else if (*r == '$' && *(r + 1) == '{'
			   && isdigit((int) *(r + 2))
			   && *(r + 3) == '}') {
		r += 2;
		ind = *r - '0';
		if (pmatch[ind].rm_so == -1 || pmatch[ind].rm_eo == -1)
		  return(NULL);
		ds_concatn(&value, text + pmatch[ind].rm_so,
				   pmatch[ind].rm_eo - pmatch[ind].rm_so);
		r++;
	  }		
	  else {
		if (*r == '\\')
		  r++;
		ds_concatn(&value, r, 1);
	  }
	}

	text += next_off;
	if (!repeat) {
	  if (*text != '\0')
		ds_concat(&value, text);
	  break;
	}
  }

  regfree(&preg);
  return(ds_buf(&value));

 fail:
  if (errmsg != NULL) {
	char errbuf[1024];

	errbuf[0] = '\0';
	regerror(st, &preg, errbuf, sizeof(errbuf));
	*errmsg = ds_xprintf("bad regular expression: \"%s\": %s",
						 regex, errbuf);
  }

  regfree(&preg);

  return(NULL);
}

/*
 * Delete any prefix ending with the last slash character present in STRING
 * (after first stripping trailing slashes), and a SUFFIX, if given.  The
 * suffix is not stripped if it is identical to the remaining
 * characters in string.  The resulting string is returned.
 * A non-existent suffix is ignored.
 */
char *
strbasename(char *pathname, char *suffix)
{
  char *last, *p, *q, *s;
  size_t len;

  len = strlen(pathname);
  if (len == 0)
	return("");

  s = strdup(pathname);
  p = s + len - 1;

  /* Delete trailing slashes first. */
  while (p > s && *p == '/')
	*p-- = '\0';

  /* Is the pathname equivalent to "/"? */
  if (*p == '/')
	return("/");

  if ((last = rindex(s, (int) '/')) != NULL)
	*last++ = '\0';
  else
	last = s;

  /*
   * We've found the end of the last component.
   * If SUFFIX was given, check if it needs to be deleted from the end of
   * the string.
   */
  if (suffix != NULL && !streq(last, suffix)
	  && (q = strsuffix(last, strlen(last), suffix)) != NULL)
	*q = '\0';

  return(last);
}

/*
 * Delete the filename portion of the argument, beginning with the last
 * slash character to the end of string (after first stripping trailing
 * slashes) and return the result.
 * Note that dirname("/") is "/".
 * Note that dirname("") is dirname("./") is dirname(".") is ".".
 */
char *
strdirname(char *pathname)
{
  char *p, *s;
  size_t len;

  len = strlen(pathname);
  if (len == 0)
	return(".");

  s = strdup(pathname);
  p = s + len - 1;

  /* Delete trailing slashes */
  while (p > s && *p == '/')
	*p-- = '\0';

  if (*p == '/')
	return("/");

  while (p > s && *p != '/')
	p--;

  if (*p == '/')
	*p = '\0';

  return(s);
}

/*
 * Return a copy of the extension part (including the dot) of the last
 * component of PATHNAME, or NULL if no extension is found.
 */
char *
strextname(char *pathname)
{
  char *dot, *slash;

  dot = strrchr(pathname, (int) '.');
  slash = strrchr(pathname, (int) '/');

  if (dot != NULL) {
	if (slash == NULL || slash < dot)
	  return(strdup(dot));
  }

  return(NULL);
}

/*
 * XXX needs work
 * Create a copy of STR with some simple formatting.
 * If any line of STR has more than MAX_LEN characters, try to split it.
 * Any character in BRKSTR can be replaced with a line break; if BRKSTR
 * is NULL, a space is used as the break character.
 * Return the new string, or the original string if no formatting is possible.
 */
char *
strfmt(const char *str, const char *brkstr, size_t max_len)
{
  char *lastp, *nstr, *p;
  const char *brks;
  size_t len;

  if (brkstr == NULL)
	brks = " ";
  else
	brks = brkstr;

  nstr = strdup(str);

  lastp = NULL;
  len = 0;
  for (p = nstr; *p != '\0'; p++) {
	len++;
	if (strchr(brks, (int) *p) == NULL)
	  continue;
	if (len > max_len) {
	  if (lastp == NULL) {
		/* XXX What to do? */
		return(strdup(str));
	  }
	  *lastp++ = '\n';
	  len = p - lastp;
	}
	lastp = p;
  }

  if (len > max_len) {
	if (lastp == NULL) {
	  /* XXX What to do? */
	  return(strdup(str));
	}
	*lastp = '\n';
  }

  return(nstr);
}

/*
 * Convert a pair of hex digit characters into a byte value
 * (0 <= value <= 255), or return -1 if the pair is invalid.
 */
int
hexpair2int(char *pair)
{
  int byte, hi, lo;

  lo = pair[1];
  hi = pair[0];

  if (!isxdigit(lo) || !isxdigit(hi))
	return(-1);

  hi = tolower(hi);
  lo = tolower(lo);
  byte = (hi >= 'a') ? (hi - 'a' + 10) : (hi - '0');
  byte <<= 4;
  byte += ((lo >= 'a') ? (lo - 'a' + 10) : (lo - '0'));

  return(byte);
}

/*
 * Convert STR, a null-terminated string consisting of hex character pairs,
 * into binary.  Set LENP to the size of the binary vector, in bytes.
 * Return the buffer or NULL if the string contains an invalid character.
 */
unsigned char *
strhextob(char *str, unsigned int *lenp)
{
  int bval, i, n;
  unsigned int blen;
  char *p;
  size_t slen;
  unsigned char *uc;

  slen = strlen(str);
  if (slen == 0 || (slen & 01))
	return(NULL);

  blen = slen / 2;
  if (lenp != NULL)
	*lenp = blen;
  uc = (unsigned char *) malloc(blen);

  p = str;
  n = 0;
  for (i = 0; i < blen; i++) {
	if ((bval = hexpair2int(p)) == -1)
	  return(NULL);
	uc[n++] = bval;
	p += 2;
  }

  return(uc);
} 

/*
 * Convert binary string BUF of length LEN bytes into an ASCII hex character
 * encoding.  Use upper case hex digits if flag STRBTOHEX_UPPER is present,
 * suppress a leading zero if flag STRBTOHEX_NOZ is present.
 * The flags can be ORed; if no flags are needed, FLAGS should be
 * STRBTOHEX_DEFAULT (which is zero).
 */
char *
strbtohex(unsigned char *buf, size_t len, int flags)
{
  int hi, lo;
  size_t ui;
  Ds ds;
  static char *hex_upper = "0123456789ABCDEF";
  static char *hex_lower = "0123456789abcdef";

  ds_init(&ds);
  for (ui = 0; ui < len; ui++) {
	hi = (buf[ui] & 0xf0) >> 4;
	lo = (buf[ui] & 0x0f);
	if (flags & STRBTOHEX_UPPER) {
	  if (! (ui == 0 && hi == 0 && (flags & STRBTOHEX_NOZ)))
		ds_appendc(&ds, (int) hex_upper[hi]);
	  ds_appendc(&ds, (int) hex_upper[lo]);
	}
	else {
	  if (! (ui == 0 && hi == 0 && (flags & STRBTOHEX_NOZ)))
		ds_appendc(&ds, (int) hex_lower[hi]);
	  ds_appendc(&ds, (int) hex_lower[lo]);
	}
  }
  ds_appendc(&ds, (int) '\0');

  return(ds_buf(&ds));
}

/*
 * Return 1 if STR is composed of characters from ALLOWABLE, which is a bit
 * mask formed from defined character classes, 0 otherwise.
 * Special control flags that flag arguments that follow ALLOWABLE are processed
 * in the following order:
 * 1) If ALLOWABLE includes STRCLASS_SPEC, then an argument is expected
 * that is a string specifying valid characters.
 * 2) If ALLOWABLE includes STRCLASS_STOPCH, then an argument is
 * expected; if 0 is returned, that argument will be set to point at the
 * first invalid character.
 *
 * This is an attempt at reducing the proliferation of functions that test
 * for various string syntaxes.
 */
int
is_strclass(char *str, int allowable, ...)
{
  int is_first, is_last;
  char *p, *spec, **stopch;
  va_list ap;

  if (*str == '\0')
	return(IS_STRCLASS(allowable, STRCLASS_NUL));

  va_start(ap, allowable);
  if (allowable & STRCLASS_SPEC)
	spec = va_arg(ap, char *);
  else
	spec = NULL;
  if (allowable & STRCLASS_STOPCH)
	stopch = va_arg(ap, char **);
  else
	stopch = NULL;
  va_end(ap);

  for (p = str; *p != '\0'; p++) {
	if (IS_STRCLASS(allowable, STRCLASS_ALNUM) && isalnum((int) *p))
	  continue;
	if (IS_STRCLASS(allowable, STRCLASS_ALPHA) && isalpha((int) *p))
	  continue;
	if (IS_STRCLASS(allowable, STRCLASS_DECIMAL) && isdigit((int) *p))
	  continue;
	if (IS_STRCLASS(allowable, STRCLASS_LC) && islower((int) *p))
	  continue;
	if (IS_STRCLASS(allowable, STRCLASS_UC) && isupper((int) *p))
	  continue;
	if (IS_STRCLASS(allowable, STRCLASS_SPACE) && *p == ' ')
	  continue;
	if (IS_STRCLASS(allowable, STRCLASS_EOL) && (*p == '\n' || *p == '\r'))
	  continue;
	if (IS_STRCLASS(allowable, STRCLASS_PUNCT) && ispunct((int) *p))
	  continue;
	if (IS_STRCLASS(allowable, STRCLASS_OCTAL) && (*p >= '0' && *p <= '7'))
	  continue;
	if (IS_STRCLASS(allowable, STRCLASS_HEX_UC) && (*p >= 'A' && *p <= 'F'))
	  continue;
	if (IS_STRCLASS(allowable, STRCLASS_HEX_LC) && (*p >= 'a' && *p <= 'f'))
	  continue;

	is_first = (p == str);
	is_last = (*(p + 1) == '\0');

	if (IS_STRCLASS(allowable, STRCLASS_SIGNED_DIGITS)
		&& ((is_first && (*p == '+' || *p == '-' || isdigit((int) *p)))
			|| (!is_first && isdigit((int) *p))))
	  continue;

	if (IS_STRCLASS(allowable, STRCLASS_HOSTNAME)
		&& ((is_first && isalnum((int) *p))
			|| (is_last && isalnum((int) *p))
			|| (!is_first && !is_last
				&& (isalnum((int) *p) || *p == '-' || *p == '.'))))
	  continue;

	if (IS_STRCLASS(allowable, STRCLASS_SPEC)
		&& spec != NULL
		&& strchr(spec, (int) *p) != NULL)
	  continue;

	if (stopch != NULL)
	  *stopch = p;

	return(0);
  }

  return(1);
}

/*
 * Convert binary byte string BUF of length LEN bytes into a
 * UTF-16LE encoding.
 * http://en.wikipedia.org/wiki/UTF-16
 */
Ds *
strbtoutf16le(unsigned char *buf, size_t nbytes)
{
  int i;
  Ds *ds;

  ds = ds_init(NULL);
  for (i = 0; i < nbytes; i++) {
	ds_appendc(ds, (int) buf[i]);
	ds_appendc(ds, (int) '\0');
  }

  return(ds);
}

/*
 * Return 1 iff STR is (safely) printable.  If ALLOW_SP_WS is non-zero,
 * treat tabs, newlines, and carriage returns as safe, otherwise reject them.
 * If NBYTES is 0, then compute the length of STR, assuming that it is
 * null-terminated and that the first null really is supposed to be the end.
 * If NBYTES is not 0, then it is the number of bytes to look at in STR,
 * assuming that a null character is invalid.
 */
int
strprintable(char *str, size_t nbytes, int allow_sp_ws)
{
  int i;
  unsigned char *p;
  size_t len;

  if (nbytes == 0)
	len = strlen(str);
  else
	len = nbytes;

  p = (unsigned char *) str;
  for (i = 0; i < len; i++) {
	if (*p == '\t' || *p == '\n' || *p == '\r') {
	  if (!allow_sp_ws)
		return(0);
	}
	else if (!isprint((int) *p))
	  return(0);

	p++;
  }

  return(1);
}

/*
 * Return a string consisting of unique characters in STR.
 * The returned string, which is null-terminated, will not contain any duplicate
 * characters. STR may contain any byte values (except a null byte).
 */
char *
strunique(char *str)
{
  char *p, *q, *s;
  size_t len;
  Ds ds;

  ds_init(&ds);
  s = strdup(str);
  len = strlen(s);

  for (p = s; (p - s) < len; p++) {
	if (*p == '\0') {
	  /* A repeated character appeared here. */
	  continue;
	}

	for (q = p + 1; (q - s) < len; q++) {
	  if (*p == *q) {
		/* "Erase" the repeated character. */
		*q = '\0';
	  }
	}
	ds_appendc(&ds, (int) *p);
  }
  ds_appendc(&ds, (int) '\0');

  free(s);
  return(ds_buf(&ds));
}

/*
 * Return a new string with every instance of characters in DEL deleted from
 * STR.
 */
char *
strelide(char *str, char *del)
{
  char *p;
  Ds ds;

  ds_init(&ds);
  for (p = str; *p != '\0'; p++) {
	if (strchr(del, (int) *p) == NULL)
	  ds_appendc(&ds, (int) *p);
  }
  ds_appendc(&ds, (int) '\0');

  return(ds_buf(&ds));
}

/*
 * Return a new string with any contiguous sequence of characters from DEL
 * deleted from the end of STR; e.g.,
 *   strchop("foo4.859", ".56789") -> "foo4"
 *   strchop("foo7.859", ".5679")  -> "foo7.8"
 *   strchop("/a/b/c///", "/")     -> "/a/b/c"
 *   strchop("hello    ", " ")     -> "hello"
 * Particularly useful for deleting whitespace or newlines from the end of STR.
 * XXX should probably be a regex version
 */
char *
strchop(char *str, char *del)
{
  char *e;
  Ds ds;

  if (del == NULL || *del == '\0')
	return(strdup(str));

  e = str + strlen(str) - 1;
  while (e >= str) {
	if (strchr(del, (int) *e) == NULL)
	  break;
	e--;
  }

  ds_init(&ds);
  ds_setn(&ds, (unsigned char *) str, e - str + 1);
  ds_appendc(&ds, (int) '\0');

  return(ds_buf(&ds));
}

/*
 * Simple tabular formatting for strings.
 * NCOLS is the number of columns in the table, COLUMN_SEP is the minimum
 * amount of blank space between columns, and if ALIGN is non-zero, the
 * lhs of each column is aligned (left justified).
 * Originally written to pretty print program usage displays (command flags).
 */
Strtable *
strtable_init(int ncols, int column_sep, int align)
{
  int i;
  Strtable *stab;

  stab = ALLOC(Strtable);
  stab->ncols = ncols;
  stab->nrows = 0;
  stab->column_sep = column_sep;
  stab->align = align;
  stab->columns = ALLOC_N(Dsvec *, ncols);
  for (i = 0; i < ncols; i++)
	stab->columns[i] = dsvec_init(NULL, sizeof(char *));

  return(stab);
}

/*
 * Add a row to table ST.
 * The caller must supply the correct number of string arguments.
 */
void
strtable_add_row(Strtable *stab, ...)
{
  int col, i, row;
  char *s;
  Dsvec *dsv;
  va_list ap;
  
  va_start(ap, stab);
  for (col = 0; col < stab->ncols; col++) {
	dsv = stab->columns[col];
	s = va_arg(ap, char *);
	dsvec_add_ptr(dsv, s);
  }
  va_end(ap);
  stab->nrows++;
}

/*
 * Default comparison function for sorting a column (case sensitive).
 */
static int
strtable_column_compar(const void *ap, const void *bp)
{
  Strtable_row_sort *a, *b;

  a = *(Strtable_row_sort **) ap;
  b = *(Strtable_row_sort **) bp;

  return(strcmp(a->colval, b->colval));
}

/*
 * Sort column COL of table ST using COMPAR.
 * Column numbers start at zero.
 * If COMPAR is NULL, use the default collation.
 * Returns a vector that describes the sorted row permutation.
 */
Dsvec *
strtable_sort(Strtable *stab, int col,
			  int (*compar)(const void *, const void *))
{
  int row, *row_indexes;
  int (*c)(const void *, const void *);
  Dsvec *dsv;

  if (compar == NULL)
	c = strtable_column_compar;
  else
	c = compar;

  dsv = dsvec_init(NULL, sizeof(Strtable_row_sort *));
  for (row = 0; row < stab->nrows; row++) {
	char *s;
	Strtable_row_sort *cs;

	s = (char *) dsvec_ptr_index(stab->columns[col], row);
	cs = ALLOC(Strtable_row_sort);
	cs->rownum = row;
	cs->colval = s;
	dsvec_add_ptr(dsv, cs);
  }

  dsvec_sort(dsv, c);

  return(dsv);
}

/*
 * Format and output table ST to FP_OUT.
 * If DSV_SORTED is non-NULL, use the row permutation that it describes;
 * otherwise the rows are unsorted.
 */
void
strtable_format(FILE *fp_out, Strtable *stab, Dsvec *dsv_sorted)
{
  int col, i, len, maxlen, row, *widths;
  char *s;
  Dsvec *dsv;
  FILE *fp;

  if (stab == NULL || stab->nrows == 0)
	return;

  if ((fp = fp_out) == NULL)
	fp = stdout;

  widths = ALLOC_N(int, stab->ncols);
  for (col = 0; col < stab->ncols; col++) {
	dsv = stab->columns[col];
	maxlen = 0;
	for (row = 0; row < stab->nrows; row++) {
	  s = dsvec_ptr_index(dsv, row);
	  if ((len = strlen(s)) > maxlen)
		maxlen = len;
	}
	widths[col] = maxlen;
  }

  for (row = 0; row < stab->nrows; row++) {
	Strtable_row_sort *cs;

	if (dsv_sorted != NULL)
	  cs = (Strtable_row_sort *) dsvec_ptr_index(dsv_sorted, row);
	else
	  cs = NULL;

	for (col = 0; col < stab->ncols; col++) {
	  dsv = stab->columns[col];
	  if (cs != NULL)
		s = dsvec_ptr_index(dsv, cs->rownum);
	  else
		s = dsvec_ptr_index(dsv, row);

	  len = fprintf(fp, "%s", s);
	  if (col == (stab->ncols - 1))
		fprintf(fp, "\n");
	  else
		fprintf(fp, "%*s", stab->column_sep + widths[col] - len, " ");
	}
  }

  free(widths);
}

void
strtable_free(Strtable *stab)
{

}

#define STRTABLE_RULER	\
  "123456789a123456789b123456789c123456789d123456789e123456789f"

static void
strtable_test(void)
{
  int align, column_sep, i, ncols, sort_col;
  Dsvec *dsv_sorted;
  Strtable *stab;

  ncols = 3;
  align = 1;
  column_sep = 3;
  sort_col = 2;
  fprintf(stderr, "columns=%d, column_sep=%d, align=%d\n",
		  ncols, column_sep, align);

  stab = strtable_init(ncols, column_sep, align);
  strtable_add_row(stab, "First",    "hello",     "zippy");
  strtable_add_row(stab, "Second",   "alphabeta", "zardoz");
  strtable_add_row(stab, "Third",    "xx",        "zinc");
  strtable_add_row(stab, "4th",      "flibber",   "zero");
  strtable_add_row(stab, "Careless", "frogs",     "zoltan");

  fprintf(stderr, "%s (unsorted)\n", STRTABLE_RULER);
  strtable_format(stderr, stab, NULL);
  fprintf(stderr, "\n");

  dsv_sorted = strtable_sort(stab, sort_col, NULL);
  fprintf(stderr, "Sorted on column %d:", sort_col);
  for (i = 0; i < dsvec_len(dsv_sorted); i++) {
	Strtable_row_sort *cs;

	cs = (Strtable_row_sort *) dsvec_ptr_index(dsv_sorted, i);
	printf("%s%d", (i == 0) ? " " : ", ", cs->rownum);
  }
  fprintf(stderr, "\n");

  fprintf(stderr, "%s (sorted)\n", STRTABLE_RULER);
  strtable_format(stderr, stab, dsv_sorted);
  fprintf(stderr, "%s\n", STRTABLE_RULER);

  strtable_free(stab);
}

/*
 * "Fast Hashing of Variable-Length Text Strings", Peter K. Pearson,
 * Computing Practices, CACM, Vol. 33, No. 6, (June, 1990), pp. 677-680.
 * See also RFC 3074
 */

static unsigned char *hash_pearson_T = NULL;

/*
 * As a test, the following permutation table should yield these hash values:
 * "a"   = 1, "for"  =  9, "in"   = 17, "the"   = 25
 * "and" = 2, "from" = 10, "is"   = 18, "this"  = 26
 * "are" = 3, "had"  = 11, "it"   = 19, "to"    = 27
 * "as"  = 4, "have" = 12, "not"  = 20, "was"   = 28
 * "at"  = 5, "he"   = 13, "of"   = 21, "which" = 29
 * "be"  = 6, "her"  = 14, "on"   = 22, "with"  = 30
 * "but" = 7, "his"  = 15, "or"   = 23, "you"   = 31
 * "by"  = 8, "i"    = 16, "that" = 24
 */
static unsigned char hash_pearson_test_T[256] = {
 39, 159, 180, 252,  71,   6,  13, 164, 232,  35, 226, 155,  98, 120, 154,  69,
157,  24, 137,  29, 147,  78, 121,  85, 112,   8, 248, 130,  55, 117, 190, 160,
176, 131, 228,  64, 211, 106,  38,  27, 140,  30,  88, 210, 227, 104,  84,  77,
 75, 107, 169, 138, 195, 184,  70,  90,  61, 166,   7, 244, 165, 108, 219,  51,
  9, 139, 209,  40,  31, 202,  58, 179, 116,  33, 207, 146,  76,  60, 242, 124,
254, 197,  80, 167, 153, 145, 129, 233, 132,  48, 246,  86, 156, 177,  36, 187,
 45,   1,  96,  18,  19,  62, 185, 234,  99,  16, 218,  95, 128, 224, 123, 253,
 42, 109,   4, 247,  72,   5, 151, 136,   0, 152, 148, 127, 204, 133,  17,  14,
182, 217,  54, 199, 119, 174,  82,  57, 215,  41, 114, 208, 206, 110, 239,  23,
189,  15,   3,  22, 188,  79, 113, 172,  28,   2, 222,  21, 251, 225, 237, 105,
102,  32,  56, 181, 126,  83, 230,  53, 158,  52,  59, 213, 118, 100,  67, 142,
220, 170, 144, 115, 205,  26, 125, 168, 249,  66, 175,  97, 255,  92, 229,  91,
214, 236, 178, 243,  46,  44, 201, 250, 135, 186, 150, 221, 163, 216, 162,  43,
 11, 101,  34,  37, 194,  25,  50,  12,  87, 198, 173, 240, 193, 171, 143, 231,
111, 141, 191, 103,  74, 245, 223,  20, 161, 235, 122,  63,  89, 149,  73, 238,
134,  68,  93, 183, 241,  81, 196,  49, 192,  65, 212,  94, 203,  10, 200,  47
};

static unsigned char *
hash_pearson_get(void)
{

  return(hash_pearson_T);
}

static void
hash_pearson_test_init(void)
{

  hash_pearson_T = hash_pearson_test_T;
}

static unsigned char *
hash_pearson_set(unsigned char *new_T)
{
  int i;
  unsigned char b, *r, *T;
  extern unsigned char *crypto_make_random_buffer(unsigned int len);

  if (new_T != NULL) {
	hash_pearson_T = new_T;
	return(new_T);
  }
  
  T = (unsigned char *) malloc(256);

  /* The only bad T is supposedly the unpermuted one. */
  for (i = 0; i < 256; i++)
	T[i] = i;

#ifdef _DACS_CRYPTO_H_
  r = crypto_make_random_buffer(64);
#else
  r = (unsigned char *) malloc(64);
  r[0] = getpid() & 0377;
  for (i = 1; i < 64; i++) {
	time_t now = time(NULL);

	r[i] = r[i - 1] ^ ((now + 1) & 0377);
  }	
#endif

  /* Shuffle */
  for (i = 0; i < 64; i++) {
	b = T[i];
	T[i] = T[r[i]];
	T[r[i]] = b;
  }

  hash_pearson_T = T;

  return(T);
}

static DACS_ui8
hash_pearson8(unsigned char *str, size_t slen, unsigned char first_ch)
{
  int i;
  unsigned char ch, *s, *T;
  DACS_ui8 h;
  size_t len;

  if ((T = hash_pearson_T) == NULL)
	T = hash_pearson_set(NULL);

  s = str;
  if (slen == 0)
	len = strlen((char *) str);
  else
	len = slen;

  if (len == 0)
	return(0);

  if ((ch = first_ch) == 0)
	ch = s[0];

  h = T[ch];
  for (i = 1; i < len; i++) {
	h = T[h ^ s[i]];
  }

  return(h);
}

static DACS_ui16
hash_pearson16(unsigned char *str, size_t slen)
{
  unsigned char ch, h1, h2;
  DACS_ui16 h;

  h1 = hash_pearson8(str, slen, 0);

  ch = str[0] + 1;
  h2 = hash_pearson8(str, slen, ch);

  h = (h1 << 8) | h2;

  return(h);
}

/*
 * Bernstein hash (2nd version w/ xor)
 */
DACS_ui32
hash_djb2(unsigned char *str, size_t slen)
{
  unsigned char *s;
  size_t len;
  DACS_ui32 h;

  s = str;
  if (slen == 0)
	len = strlen((char *) str);
  else
	len = slen;

  h = 0;
  while (len-- > 0) {
	/* hash = hash * 33 ^ c */
	h = ((h << 5) + h) ^ *s++;
  }

  return(h);
}

/*
 * From sdbm, an ndbm work-alike hashed database library
 * Author: Ozan Yigit <oz@nexus.yorku.ca>
 * Status: public domain.
 *
 * polynomial conversion ignoring overflows
 * [this seems to work remarkably well, in fact better
 * then the ndbm hash function. Replace at your own risk]
 * use: 65599   nice.
 *      65587   even better.
 *
 * [In one experiment, this function hashed 84165 symbols (English words
 * plus symbol table values) with no collisions. -bjb]
 * Do NOT use for cryptographic purposes.
 */
static DACS_ui32
hash_sdbm32(unsigned char *str, size_t slen, DACS_ui32 level)
{
  unsigned char *s;
  size_t len;
  DACS_ui32 n;

  s = str;
  if (slen == 0)
	len = strlen((char *) str);
  else
	len = slen;
  n = level;

#define HASHC   n = *s++ + 65587 * n

#ifdef NODUFF
  while (len--)
	n = HASHC;
#else

  if (len > 0) {
	DACS_ui32 loop;

	loop = (len + 8 - 1) >> 3;
	switch (len & (8 - 1)) {
	case 0:
	  do {
		HASHC;
	  case 7: HASHC;
	  case 6: HASHC;
	  case 5: HASHC;
	  case 4: HASHC;
	  case 3: HASHC;
	  case 2: HASHC;
	  case 1: HASHC;
	  } while (--loop);
	}
  }
#endif
  return(n);
}

/*
 * lookup8.c, by Bob Jenkins, January 4 1997
 * You can use this free for any purpose.  It has no warranty.
 */

#define hashsize(n) ((DACS_ui64) 1 << (n))
#define hashmask(n) (hashsize(n) - 1)

/*
 * mix -- mix 3 64-bit values reversibly.
 * mix() takes 48 machine instructions, but only 24 cycles on a superscalar
 *   machine (like Intel's new MMX architecture).  It requires 4 64-bit
 *   registers for 4::2 parallelism.
 * All 1-bit deltas, all 2-bit deltas, all deltas composed of top bits of
 *   (a,b,c), and all deltas of bottom bits were tested.  All deltas were
 *   tested both on random keys and on keys that were nearly all zero.
 *   These deltas all cause every bit of c to change between 1/3 and 2/3
 *   of the time (well, only 113/400 to 287/400 of the time for some
 *   2-bit delta).  These deltas all cause at least 80 bits to change
 *   among (a,b,c) when the mix is run either forward or backward (yes it
 *   is reversible).
 * This implies that a hash using mix64 has no funnels.  There may be
 *   characteristics with 3-bit deltas or bigger, I didn't test for
 *   those.
 */

#define mix64(a, b, c) { \
  a -= b; a -= c; a ^= (c >> 43); \
  b -= c; b -= a; b ^= (a <<  9); \
  c -= a; c -= b; c ^= (b >>  8); \
  a -= b; a -= c; a ^= (c >> 38); \
  b -= c; b -= a; b ^= (a << 23); \
  c -= a; c -= b; c ^= (b >>  5); \
  a -= b; a -= c; a ^= (c >> 35); \
  b -= c; b -= a; b ^= (a << 49); \
  c -= a; c -= b; c ^= (b >> 11); \
  a -= b; a -= c; a ^= (c >> 12); \
  b -= c; b -= a; b ^= (a << 18); \
  c -= a; c -= b; c ^= (b >> 22); \
}

/*
 * hash() -- hash a variable-length key into a 64-bit value
 *   k     : the key (the unaligned variable-length array of bytes)
 *   len   : the length of the key, counting by bytes
 *   level : can be any 8-byte value
 * Returns a 64-bit value.  Every bit of the key affects every bit of
 * the return value.  No funnels.  Every 1-bit and 2-bit delta achieves
 * avalanche.  About 41+5len instructions.
 * 
 * The best hash table sizes are powers of 2.  There is no need to do
 * mod a prime (mod is sooo slow!).  If you need less than 64 bits,
 * use a bitmask.  For example, if you need only 10 bits, do
 *   h = (h & hashmask(10));
 * In which case, the hash table should have hashsize(10) elements.
 * 
 * If you are hashing n strings (u_int8 **)k, do it like this:
 *   for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
 * 
 * By Bob Jenkins, Jan 4 1997.  bob_jenkins@compuserve.com.  You may
 * use this code any way you wish, private, educational, or commercial,
 * as long as this whole comment accompanies it.
 * 
 * See http://ourworld.compuserve.com/homepages/bob_jenkins/evahash.htm
 * Use for hash table lookup, or anything where one collision in 2^^64
 * is acceptable.  Do NOT use for cryptographic purposes.
 */

/*
 * The key, the length of the key, the previous hash, or an arbitrary value.
 */
static DACS_ui64
hash_bj64(unsigned char *str, size_t slen, DACS_ui64 level)
{
  unsigned char *k;
  size_t len;
  DACS_ui64 a, b, c;

  /* Set up the internal state */
  if (slen == 0)
	len = strlen((char *) str);
  else
	len = slen;
  a = b = level;                /* the previous hash value */
  c = 0x9e3779b97f4a7c13LL;		/* the golden ratio; an arbitrary value */
  k = str;

  /* Handle most of the key */
  while (len >= 24) {
    a += (k[0]
		  + ((DACS_ui64) k[ 1] <<  8)
		  + ((DACS_ui64) k[ 2] << 16) + ((DACS_ui64) k[ 3] << 24)
		  + ((DACS_ui64) k[ 4] << 32) + ((DACS_ui64) k[ 5] << 40)
		  + ((DACS_ui64) k[ 6] << 48) + ((DACS_ui64) k[ 7] << 56));

    b += (k[8]
		  + ((DACS_ui64) k[ 9] <<  8) + ((DACS_ui64) k[10] << 16)
		  + ((DACS_ui64) k[11] << 24) + ((DACS_ui64) k[12] << 32)
		  + ((DACS_ui64) k[13] << 40) + ((DACS_ui64) k[14] << 48)
		  + ((DACS_ui64) k[15] << 56));

    c += (k[16]
		  + ((DACS_ui64) k[17] <<  8) + ((DACS_ui64) k[18] << 16)
		  + ((DACS_ui64) k[19] << 24) + ((DACS_ui64) k[20] << 32)
		  + ((DACS_ui64) k[21] << 40) + ((DACS_ui64) k[22] << 48)
		  + ((DACS_ui64) k[23] << 56));

    mix64(a, b, c);
    k += 24;
	len -= 24;
  }

  /* Handle the last 63 bytes */
  c += len;
  switch(len) {
	/* All the case statements fall through... */
  case 23: c += ((DACS_ui64) k[22] << 56);
  case 22: c += ((DACS_ui64) k[21] << 48);
  case 21: c += ((DACS_ui64) k[20] << 40);
  case 20: c += ((DACS_ui64) k[19] << 32);
  case 19: c += ((DACS_ui64) k[18] << 24);
  case 18: c += ((DACS_ui64) k[17] << 16);
  case 17: c += ((DACS_ui64) k[16] <<  8);

    /* the first byte of c is reserved for the length */
  case 16: b += ((DACS_ui64) k[15] << 56);
  case 15: b += ((DACS_ui64) k[14] << 48);
  case 14: b += ((DACS_ui64) k[13] << 40);
  case 13: b += ((DACS_ui64) k[12] << 32);
  case 12: b += ((DACS_ui64) k[11] << 24);
  case 11: b += ((DACS_ui64) k[10] << 16);
  case 10: b += ((DACS_ui64) k[ 9] << 8);
  case  9: b += ((DACS_ui64) k[ 8]);
  case  8: a += ((DACS_ui64) k[ 7] << 56);
  case  7: a += ((DACS_ui64) k[ 6] << 48);
  case  6: a += ((DACS_ui64) k[ 5] << 40);
  case  5: a += ((DACS_ui64) k[ 4] << 32);
  case  4: a += ((DACS_ui64) k[ 3] << 24);
  case  3: a += ((DACS_ui64) k[ 2] << 16);
  case  2: a += ((DACS_ui64) k[ 1] <<  8);
  case  1:  a+= ((DACS_ui64) k[ 0]);
    /* case 0: nothing left to add */
  }

  mix64(a, b, c);

  return(c);
}

DACS_ui8
strhash8(unsigned char *str, size_t slen)
{

  return(hash_pearson8(str, slen, 0));
}

DACS_ui16
strhash16(unsigned char *str, size_t slen)
{

  return(hash_pearson16(str, slen));
}

DACS_ui32
strhash32(unsigned char *str, size_t slen)
{

  return(hash_sdbm32(str, slen, 0));
}

DACS_ui64
strhash64(unsigned char *str, size_t slen)
{

  return(hash_bj64(str, slen, 0));
}

#ifdef NOTDEF
int
strhash(char *alg, unsigned char *str, size_t slen, void *value)
{

  if (strcaseeq(alg, "bj") || strcaseeq(alg, "bj_64")) {
	DACS_ui64 val;

	val = hash_bj64(str, slen);
	*(DACS_ui64 **) value = val;
  }
  else if (strcaseeq(alg, "sdbm") || strcaseeq(alg, "sdbm_32")) {
	DACS_ui32 val;

	val = hash_sdbm32(str, slen);
	*(DACS_ui32 **) value = val;
  }
  else if (strcaseeq(alg, "djb2") || strcaseeq(alg, "djb2_32")) {
	DACS_ui32 val;

	val = hash_djb2(str, slen);
	*(DACS_ui32 **) value = val;
  }
  else if (strcaseeq(alg, "pearson_16")) {
	DACS_ui16 val;

	hash_pearson_test_init();
	val = hash_pearson16(str, slen);
	*(DACS_ui16 **) value = val;
  }
  else if (strcaseeq(alg, "pearson") || strcaseeq(alg, "pearson_8")) {
	DACS_ui8 val;

	hash_pearson_test_init();
	val = hash_pearson8(str, slen, 0);
	*(DACS_ui8 **) value = val;
  }
  else
	return(-1);

  return(0);
}
#endif

#ifdef PROG

typedef struct Strstr_test_word {
  char *word;
  int count;
  int expected_count;
} Strstr_test_word;

static Strstr_test_word strstr_words[] = {
  { "car", 0, -1 },  { "dog", 0, -1 }, {"cat", 0, -1 }, { "mond", 0, -1 },
  { "cing", 0, -1 }, { "diate", 0, -1 }, { "strange", 0, -1 },
  { "astrocylonic", 0, -1 }, { NULL, 0 }
};

static int
test_strstr(FILE *fp, char *search_path)
{
  int i;
  char *buf, *path, *w;
  size_t size;
  Dsvec *dsv;
  FILE *out;

  if (fp == NULL)
	out = stderr;
  else
	out = fp;

  if (search_path == NULL || *search_path == '\0') {
	fprintf(out, "No path specified, skipping this test.\n");
	return(0);
  }

  fprintf(fp, "Testing strstr() functions using search path \"%s\"\n",
		  search_path);
  if ((dsv = strsplit(search_path, ":", 0)) == NULL) {
	fprintf(out, "Invalid path specified, skipping this test.\n");
	return(0);
  }

  for (i = 0; i < dsvec_len(dsv); i++) {
	path = (char *) dsvec_ptr_index(dsv, i);
	if (load_file(path, &buf, &size) != -1)
	  break;
  }

  if (i == dsvec_len(dsv)) {
	fprintf(out, "Cannot load a file, skipping this test.\n");
	return(0);
  }
  fprintf(out, "Using \"%s\"\n", path);

  for (i = 0; strstr_words[i].word != NULL; i++)
	strstr_words[i].expected_count = -1;

#ifdef HAVE_STRSTR
  for (i = 0; strstr_words[i].word != NULL; i++) {
	w = buf;
	strstr_words[i].count = 0;
	while ((w = strstr(w, strstr_words[i].word)) != NULL) {
	  strstr_words[i].count++;
	  w++;
	}
  }

  fprintf(out, "strstr() [system implementation]:\n");
  for (i = 0; strstr_words[i].word != NULL; i++) {
	fprintf(out, "Word %d: \"%s\": %d\n",
			i, strstr_words[i].word, strstr_words[i].count);
	if (strstr_words[i].expected_count == -1)
	  strstr_words[i].expected_count = strstr_words[i].count;
	else if (strstr_words[i].expected_count != strstr_words[i].count) {
	  fprintf(out, "Inconsistent result!\n");
	  return(-1);
	}
  }
  fprintf(out, "\n");
#endif

  for (i = 0; strstr_words[i].word != NULL; i++) {
	w = buf;
	strstr_words[i].count = 0;
	while ((w = dacs_strstr(w, strstr_words[i].word)) != NULL) {
	  strstr_words[i].count++;
	  w++;
	}
  }

  fprintf(out, "dacs_strstr [DACS implementation]:\n");
  for (i = 0; strstr_words[i].word != NULL; i++) {
	fprintf(out, "Word %d: \"%s\": %d\n",
			i, strstr_words[i].word, strstr_words[i].count);
	if (strstr_words[i].expected_count == -1)
	  strstr_words[i].expected_count = strstr_words[i].count;
	else if (strstr_words[i].expected_count != strstr_words[i].count) {
	  fprintf(out, "Inconsistent result!\n");
	  return(-1);
	}
  }
  fprintf(out, "\n");

  for (i = 0; strstr_words[i].word != NULL; i++) {
	unsigned int *td;
	size_t plen;

	w = buf;
	strstr_words[i].count = 0;
	plen = strlen(strstr_words[i].word);
	td = strqs_init(NULL, strstr_words[i].word, plen, 0);

	while ((w = strqsx(w, buf + size, strstr_words[i].word, td, plen, 0))
		   != NULL) {
	  strstr_words[i].count++;
	  w++;
	}
  }

  fprintf(out, "strqs [DACS Quick Search implementation]:\n");
  for (i = 0; strstr_words[i].word != NULL; i++) {
	fprintf(out, "Word %d: \"%s\": %d\n",
			i, strstr_words[i].word, strstr_words[i].count);
	if (strstr_words[i].expected_count == -1)
	  strstr_words[i].expected_count = strstr_words[i].count;
	else if (strstr_words[i].expected_count != strstr_words[i].count) {
	  fprintf(out, "Inconsistent result!\n");
	  return(-1);
	}
  }

  return(1);
}

/*
 * A colon-separated list of paths.
 * Any large document or list of words should be ok.
 * Set to the empty string to disable the test.
 */
#define WORD_LIST_FILE_PATH	"/usr/share/dict/words:/usr/dict/words"

int main(int argc, char **argv)
{
  int i, j;

  fprintf(stderr, "Testing string functions...\n");
  if (test_strstr(stderr, WORD_LIST_FILE_PATH) == -1) {
	fprintf(stderr, "strstr() failed!\n");
	exit(1);
  }
  if (test_parse(stderr) == -1) {
	fprintf(stderr, "String parsing test failed!\n");
	exit(1);
  }
  fprintf(stderr, "All string tests succeeded!\n");

  exit(0);
}
#endif
