/* $Id: packlist.c,v 1.16 2002/02/13 21:14:01 richdawe Exp $ */

/*
 *  packlist.c - Package list construction
 *  Copyright (C) 1999-2001 by Richard Dawe
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License as published by the Free Software Foundation; either
 *  version 2 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public
 *  License along with this library; if not, write to the Free
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#include "common.h"

#include <stdlib.h>
#include <string.h>
#include <search.h>
#include <ctype.h>
#include <errno.h>
#include <sys/types.h>
#include <regex.h>

/* libzippo includes */
#include <libzippo/dsm.h>
#include <libzippo/mft.h>
#include <libzippo/package.h>
#include <libzippo/packlist.h>
#include <libzippo/user.h>
#include <libzippo/util.h>

/* Data structure for passing context to packlist_find()'s package list
 * browse functions. */
typedef struct {
  /* These fields should be updated, before a packlist_browse() is run
   * with a find browser. */
  int            err;  /* 0 if no error, else error code */
  void          *data; /* Data in whatever format the browse function
			* requires. */

  /* These fields should only be touched by packlist_find()'s set-up code
   * and find_add(). */
  PACKAGE_INFO **list;        /* Unlinked list of matched packages */
  size_t         entries;     /* Current number of entries in list */
  size_t         max_entries; /* Maximum number of entries in list */  
} pl_find_context_t;

/* Data structure to pass into pl_find_user(). */
typedef struct {
  regex_t               *re;
  const PACKAGE_VERSION *version;
} pl_find_user_data_t;

/* ----------------
 * - packlist_add -
 * ---------------- */

int
packlist_add (PACKAGE_INFO *list, PACKAGE_INFO *p)
{
  PACKAGE_INFO *np = NULL;
  PACKAGE_INFO *lp = NULL;
	
  /* Er, no list or package to add? */
  if ((list == NULL) || (p == NULL)) return(0);
	
  /* Find the end of the package list. */
  for (np = list; np != NULL; np = (PACKAGE_INFO *) np->q_forw) {
    lp = np;
  }

  /* Add the package to the end of the list. */
  insque((struct qelem *) p, (struct qelem *) lp);

  return(1);
}

/* -------------------
 * - packlist_free -
 * ------------------- */

/* This should only be used on lists of dynamically-allocated
 * PACKAGE_INFO structs. */

int
packlist_free (PACKAGE_INFO *list)
{
  PACKAGE_INFO *p = NULL;
  PACKAGE_INFO *hp = NULL;
  PACKAGE_INFO *np = NULL;

  /* No list to delete! */
  if (list == NULL) return(1);

  /* Find the first element. */
  for (np = list; np != NULL; np = (PACKAGE_INFO *) np->q_back) {
    hp = np;
  }
	
  /* Now clear out the list. */
  for (p = hp; p != NULL; p = np) {		
    np = (PACKAGE_INFO *) p->q_forw; /* Store next package ptr. */
    remque((struct qelem *) p);      /* Remove pack. from list. */
    p->q_forw = p->q_back = NULL;
    package_free(p);                 /* Free package data */
    free(p), p = NULL;               /* Free package struct */
  }
	
  return(1);
}

/* -------------------
 * - packlist_remove -
 * ------------------- */

/* This just removes the package from the list - it doesn't free up its
 * memory. */

int
packlist_remove (PACKAGE_INFO *p)
{
  if (p == NULL) return(1);
  remque((struct qelem *) p);
  p->q_forw = p->q_back = NULL;
  return(1);
}
	
/* -------------------
 * - packlist_dedupe -
 * ------------------- */

/* This removes duplicates from the package list. It checks for package names
 * and versions for multiple entries for the same package. If a package has
 * more than one entry, then the following courses of action are taken:
 *
 * - If one is a manifest file, the other a DSM, then the DSM is preferred,
 *   i.e. the other package is removed from the list.
 *
 * - If both are manifests or DSMs, then the found duplicate is removed.
 */

int
packlist_dedupe (PACKAGE_INFO *packages)
{
  PACKAGE_INFO *hp       = NULL;
  PACKAGE_INFO *np_i     = NULL;
  PACKAGE_INFO *np_j     = NULL;
  PACKAGE_INFO *dupelist = NULL;	/* List of duplicate package infos */
	
  if (packages == NULL) return(1);
	
  /* Find the head of the package list. */
  for (hp = packages; hp->q_back != NULL; hp = (PACKAGE_INFO *) hp->q_back) {
    ;
  }

  /* Now dedupe */
  for (np_i = hp; np_i != NULL; np_i = (PACKAGE_INFO *) np_i->q_forw) {
    if (strlen(np_i->name) == 0) continue;

    for (np_j = hp; np_j != NULL; np_j = (PACKAGE_INFO *) np_j->q_forw) {
      if (strlen(np_j->name) == 0) continue;
      if (np_i == np_j) continue;

      /* Remove packages that have been added via a manifest
       * in addition to their DSM. */
      if (strcmp(np_i->manifest, np_j->manifest) == 0) {
	/* i = DSM + manifest, j = manifest alone */
	if (np_i->has_dsm && !np_j->has_dsm) {
	  /* Remove from the package list to the duplicate list. */
	  packlist_remove(np_j);

	  if (dupelist != NULL)
	    packlist_add(dupelist, np_j);
	  else
	    dupelist = np_j;
					
	  continue;
	}

	/* i = manifest alone, j = DSM + manifest */
	if (!np_i->has_dsm && np_j->has_dsm) {
	  packlist_remove(np_i);
					
	  if (dupelist != NULL)
	    packlist_add(dupelist, np_i);
	  else
	    dupelist = np_i;
					
	  continue;
	}

	/* Resort to normal dedupe procedure now. */
      }
			
      /* String-sensitive comparison on name */
      if (strcmp(np_i->name, np_j->name) != 0)
	continue;

      /* Version comparison */
      if (package_vercmp(&np_i->version, &np_j->version) != 0)
	continue;

      /* Type comparison */
      if (np_i->version.type != np_j->version.type)
	continue;

      /* Remove package from list - move it to the duplicate list. */
      packlist_remove(np_j);
			
      if (dupelist != NULL)
	packlist_add(dupelist, np_j);
      else
	dupelist = np_j;
    }
  }

  /* Free the duplicate list. */
  packlist_free(dupelist);
	
  return(1);
}

/* -------------------
 * - packlist_browse -
 * ------------------- */

int
packlist_browse (PACKAGE_INFO *list, package_browse_fn * browser,
		 void * context)
{
  PACKAGE_INFO * ptr = list;
  while (ptr)
    {
      if ((*browser)(ptr, context))
	return 0;
      ptr = ptr->q_forw;
    }
  return 1;
}

/* -------------------
 * - packtree_browse -
 * ------------------- */

PACKAGE_INFO *
packtree_browse (PACKAGE_INFO *p, package_browse_fn * browser, void * context)
{
  PACKAGE_DEP ** deps;
  PACKAGE_INFO * return_ptr;
	
  /* Mark current package as visited, and if we come there again, */
  /* we are in trouble.                                           */
  if (p->visited)
    return p;
	
  p->visited = 1;
  deps = p->deps;
	
  if (deps) {
    while (*deps) {
      /* TODO: what about 'NULL' dependencies ? */
      if ((*deps)->dep) {
	return_ptr = packtree_browse((*deps)->dep, browser, context);
	if (return_ptr)
	  return return_ptr;
      }
      deps++;
    }
  }
	
  if ((*browser)(p, context))
    return 0;
	
  p->visited = 0;
  return NULL;
}

/* ---------------
 * - pl_find_add -
 * --------------- */

/* This function adds a package to the list held in 'context'. On success 1
 * is returned. On failure 0 is returned and an error code is set in
 * the context. */

static int
pl_find_add (PACKAGE_INFO *p, pl_find_context_t *context)
{
  size_t i;
  PACKAGE_INFO **list = NULL;

  /* No list => can't add */
  if (context->list == NULL) {
    context->err = ENOENT;
    return(0);
  }

  /* Is there enough room in the package list? If not, double its size. */
  if (context->entries == context->max_entries) {
    context->max_entries *= 2;
    list = realloc(context->list,
		   sizeof(*context->list) * (context->max_entries + 1));

    /* This is serious - no memory! */
    if (list == NULL) {
      context->err = ENOMEM;
      return(0);
    } else {
      /* Use the new list */
      context->list = list;
    }

    /* Zero new entries */
    for (i = context->entries; i <= context->max_entries; i++) {
      context->list[i] = NULL;
    }
  }

  /* Add package to end of (unlinked) list. */
  context->list[context->entries] = p;
  context->entries++;

  /* OK */
  return(1);
}

/* ------------------
 * - pl_find_dedupe -
 * ------------------ */

static int
pl_find_dedupe (pl_find_context_t *context)
{
  PACKAGE_INFO **new_list = NULL;
  size_t i, j, new_entries;
  int is_dup;

  /* No entries => nothing to do. */
  if (context->list[0] == NULL)
    return(1);

  /* Allocate a new list to dedupe to. */
  new_list = malloc(sizeof(*new_list) * (context->max_entries + 1));
  if (new_list == NULL) {
    context->err = ENOMEM;
    return(0);
  }

  for (i = 0; i <= context->max_entries; i++) { new_list[i] = NULL; }

  /* Dedupe - since the packages can be identified by pointers, we will
   * dedupe by comparing pointers. */
  for (i = 0; i < context->max_entries; i++) {
    for (is_dup = 0, j = 0; j < i; j++) {
      if (new_list[j] == context->list[i]) {
	is_dup = 1;
	break;
      }
    }

    if (!is_dup)
      new_list[i] = context->list[i];
  }

  /* Count the number of entries */
  for (new_entries = 0; new_list[new_entries] != NULL; new_entries++) {;}

  free(context->list);
  context->list = new_list;
  context->max_entries = context->entries = new_entries;

  /* OK */
  return(1);
}

/* ------------------
 * - pl_find_simple -
 * ------------------ */

/* A simple search browser function for packlist_find(). This does an exact
 * match on the data. */

static int
pl_find_simple (PACKAGE_INFO *p, void *context_in)
{
  pl_find_context_t *context = (pl_find_context_t *) context_in;
  const char        *str     = (const char *) context->data;
  int                ret     = 0;

  if (strcmp(str, p->name) == 0) {
    if (!pl_find_add(p, context)) {
      /* Abort the browsing */
      ret = 1;
    }
  }

  return(ret);
}

/* ----------------
 * - pl_find_stem -
 * ---------------- */

/*
 * This matches on the stem of the package name, i.e. part without any
 * numerical part (version numbering). E.g. for grep20b, the '20b' would
 * be dropped to give the stem 'grep'. This could cause confusion with
 * packages that have a version number as part of the name, but the other
 * search types should match those.
 */

/* TODO: How can the confusion on packages with version numbers be avoided?
 * Is it possible? */

static int
pl_find_stem (PACKAGE_INFO *p, void *context_in)
{
  pl_find_context_t   *context = (pl_find_context_t *) context_in;
  const char          *stem    = (const char *) context->data;
  char                 p_stem[PACKAGE_MAXNAMELEN];
  char                *q       = NULL;
  int                  ret     = 0;  

  /* Construct stem from package name */
  strncpy(p_stem, p->name, sizeof(p_stem));
  p->name[sizeof(p_stem) - 1] = '\0';

  for (q = p_stem; !isdigit(*q) && (*q != '\0'); q++) {;}
  *q = '\0';

  /* Do we have a match? */
  if (strcmp(stem, p_stem) == 0) {
    if (!pl_find_add(p, context)) {
      /* Abort the browsing */
      ret = 1;
    }
  }

  return(ret);
}

/* ----------------
 * - pl_find_user -
 * ---------------- */

static int
pl_find_user (PACKAGE_INFO *p, void *context_in)
{
  pl_find_context_t   *context   = (pl_find_context_t *) context_in;
  pl_find_user_data_t *user_data = (pl_find_user_data_t *) context->data;
  int                  ret       = 0;

  /* Do we have a match? */
  if (   (regexec(user_data->re, p->name, 0, NULL, 0) == 0)
      && (package_vercmp_partial(user_data->version, &p->version) == 0) ){
    if (!pl_find_add(p, context)) {
      /* Abort the browsing */
      ret = 1;
    }
  }

  return(ret);
}

/* ------------------
 * - pl_find_regexp -
 * ------------------ */

static int
pl_find_regexp (PACKAGE_INFO *p, void *context_in)
{
  pl_find_context_t *context = (pl_find_context_t *) context_in;
  const regex_t     *re      = (const regex_t *) context->data;
  int                ret     = 0;

  if (regexec(re, p->name, 0, NULL, 0) == 0) {
    if (!pl_find_add(p, context)) {
      /* Abort the browsing */
      ret = 1;
    }
  }

  return(ret);
}

/* ----------------
 * - shell2regexp -
 * ---------------- */

/* Convert a shell-style wildcard to a regular expression. */

static int
shell2regexp (char *shell, char **re)
{
  char   *buf    = NULL;
  size_t  buflen = 0;
  char   *p      = NULL;
  size_t  i, j; /* Position in shell buffer, output buffer respectively. */

  /* Find all the '*' wildcards in the shell buffer, so we can add some
   * padding for its expansion into regexp. This could be a little more
   * intelligent (e.g. don't count '\*' sequences). */
  buflen = strlen(shell) + 1 + 2 /* for ^ and $ */;
  for (p = buf; (p = strchr(p, '*')) != NULL; p++) { buflen++; }

  buf = calloc(1, buflen);
  if (buf == NULL)
    return(0);

  /* Now perform conversion */
  buf[0] = '^', j = 1;

  for (i = 0; shell[i] != '\0'; i++) {
    /* Escaped character? */
    if (shell[i] == '\\') {      
      buf[j] = shell[i + 1], j++;
      i++;
      continue;
    }

    /* '*' => match 0 or more characters */
    if (shell[i] == '*') {
      strcat(buf, ".*"), j += 2;
      continue;
    }

    /* '?' matches one character */
    if (shell[i] == '?') {
      buf[j] = '.', j++;
      continue;
    }

    /* Character group? */
    if (shell[i] == '[') {
      buf[j] = shell[i], j++;
      i++;

      while ((shell[i] != ']') && (shell[i] != '\0')) {
	buf[j] = shell[i], j++;
	i++;
      }

      if (shell[i] == ']') {
	buf[j] = shell[i], j++;
      }

      continue;
    }

    /* Plain old character */
    buf[j] = shell[i], j++;
  }

  /* Put end of string indicator, then null terminate */
  buf[j] = '$', j++;
  buf[j] = '\0';

  *re = buf;
  return(1);
}

/* -----------------
 * - packlist_find -
 * ----------------- */

/*
 * This function looks for a package using a string. This string could be
 * just its name or a fully qualified version or a regular expression.
 * The actual matching is down using package browser functions.
 *
 * On success a pointer to an unlinked list (an array) of package pointers
 * is returned. This list is NULL terminated. On failure NULL is returned.
 * It is the duty of the caller to free the memory returned via the pointer.
 */

PACKAGE_INFO **
packlist_find (PACKAGE_INFO *packages, const char *str, const int how)
{
  regex_t            re;
  pl_find_context_t  context;
  char              *q = NULL;
  size_t             i;

  /* Vars to pass to pl_find_user() */
  pl_find_user_data_t user_data;
  char name[PACKAGE_MAXNAMELEN];
  PACKAGE_VERSION version;

  /* Vars to pass to pl_find_stem() */
  char stem[PACKAGE_MAXNAMELEN];  
  
  /* Start off with a list capable of holding 4 matched packages. This is
   * an estimate of the maximum number of installed versions & types
   * of a package. */
  context.entries     = 0;
  context.max_entries = 4;

  context.list = malloc(sizeof(*context.list) * (context.max_entries + 1));
  
  if (context.list == NULL)
    return(NULL);

  for (i = 0; i <= context.max_entries; i++) { context.list[i] = NULL; }

  /* Use the appropriate search method(s) to add packages to the list.
   * Some pre-processing of the data may be necessary before calling
   * the browser function. */

  /* TODO: Handle errors! */

  if (how & PACKLIST_FIND_SIMPLE) {
    /* Exact text match */
    context.err  = 0;
    context.data = (void *) str;
    packlist_browse(packages, pl_find_simple, &context);
  }

  if (how & PACKLIST_FIND_STEM) {
    /* Build the stem for the package name. */
    strncpy(stem, str, sizeof(stem));
    stem[sizeof(stem) - 1] = '\0';

    for (q = stem; !isdigit(*q) && (*q != '\0'); q++) {;}
    *q = '\0';

    /* Stem-based text match */
    context.err  = 0;
    context.data = (void *) stem;
    packlist_browse(packages, pl_find_stem, &context);
  }

  if (how & PACKLIST_FIND_USER) {
    if (user_parse_package_string(str, name, sizeof(name), &version)) {
      char *re_str = NULL;
      int   re_ok  = 0;

      re_ok = shell2regexp(name, &re_str);

      if (!re_ok)
	warnf("Could not parse wildcards in '%s'", name);

      if (re_ok) {
	if (regcomp(&re, re_str, REG_EXTENDED | REG_NOSUB) != 0) {
	  warnf("Could not compile regular expression '%s'", re_str);
	  re_ok = 0;
	}
      }

      if (re_ok) {
	/* User package spec-based text match with regexps on name */
	user_data.re      = &re;
	user_data.version = &version;

	context.err  = 0;
	context.data = (void *) &user_data;

	packlist_browse(packages, pl_find_user, &context);	
      }

      /* Tidy up */
      if (re_str != NULL)
	free(re_str), re_str = NULL;
    } else {
      /* TODO: Return parsing error! */
    }
  }

  if (how & PACKLIST_FIND_REGEXP) {
    /* Compile the regular expression */
    if (regcomp(&re, str, REG_EXTENDED /* | REG_ICASE */ | REG_NOSUB) == 0) {
      /* TODO: How to signal error? */
      /* Regular expression-based text match */
      context.err  = 0;
      context.data = (void *) &re;
      packlist_browse(packages, pl_find_regexp, &context);

      /* Tidy up */
      regfree(&re);
    } else {
      warnf("Could not compile regular expression '%s'", str);
    }
  }

  /* Remove duplicates from the find results */
  pl_find_dedupe(&context);

  /* No entries => destroy list */
  if (context.entries == 0) {
    free(context.list);
    context.list = NULL;
  }

  return(context.list);
}
