/*
  This source is part of PCOak, an electronic mailer for DOS based on PCElm.

  PCElm is Copyright (c) 1988-1993 Martin Freiss and Wolfgang Siebeck
           Copyright (c) 1992-1999 Demon Internet
  PCOak is Copyright (c) 2000-2002 Simon Turner, Pete Disdale and dispc members

  Thanks to an agreement between the original PCElm authors and Demon Internet
  made in late 1999:

	This program is free software; you can redistribute it and/or modify
	it under the terms of the GNU General Public License, version 1, as
	published by the Free Software Foundation.

	This program is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
	GNU General Public License for more details.

	See the file COPYING, which contains a copy of the GNU General
	Public License.
*/

/*
 * weedout.c -- header weeding code
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "pcoak.h"
#include "header.h"
#include "macros.h"
#include "ustring.h"
#include "chars.h"

/*
 * Functions defined and used only in this module
 */
static int weedtest0(const char *line, const char **list);	/* style 0 */
static int weedtest1(const char *line, const char **list);	/* style 1 */
static int weedtest2(const char *line, const char **list);	/* style 2 */
static void wildcheck(char **list);

/*
 * Global variables local to this module
 */
static int level = 0;			/* Are we doing weeding at all? */
static int status = -2;			/* Status of current/last line */
static char last_eol = 0;		/* Did last <line> end with LF? */
static long last_eolpos = -1L;		/* File pos after <last_eol> line */
static BOOL line_isnew = FALSE;		/* Was last line seen a new one? */
static int (*ckfn)(const char *, const char **) = weedtest0;  /* Check fn */

/*
 * External global variables used in this module
 */
extern BOOL mmdf;
extern char **weedlist1;	/* Headers to weedout for weedlevel 1 */
extern char **weedlist2;	/* Headers to display for weedlevel 2 */
extern int weedstyle;		/* Style of weeding to use: 0/1/2 */
extern int weedlevel;		/* Level of weedout to apply (0-3) (Chgable) */
extern int printweed;		/* Level of weedout for printing (0-3) */

/*
 * Global functions defined and used in this module
 */
BOOL weedlevelok(int lvl);

/*
 * Functions defined elsewhere and used in this module
 */
char eoltest(const char *s);
BOOL htype_match(const char *p, HEADERTYPE type);
int wildmat(const char *text, const char *p, int ignore_case);


/*
 * set_weedlist()
 *
 * Set the contents of weedout list <list> (a pointer to either <weedlist1>
 * or <weedlist2>) to the settings given in <line>.  Return 0 if it goes OK,
 * < 0 if we run out of memory, or > 0 if the list is already allocated.
 *
 * We do two passes through the list, the first one to count the number of
 * entries and the second one to allocate and store the individual strings.
 *
 * Lists destined for wildcard-style matching need to have a '*' at the end of
 * each item to make them work properly; we can't guarantee to know what style
 * of matching the user wants at the moment, so for any entries that don't end
 * with a '*', we allocate an extra byte for the buffer to give ourselves room
 * to add a '*' to the end of the string later (when weedconfig() is called).
 *
 * Another interesting problem with wildcards is that order may matter if the
 * user has any exclusion patterns ("!..."); for e.g. "X-* !X-RBL-Warning:*",
 * where the intent is to lose all "X-*" except for "X-RBL-Warning:*", the
 * inclusive pattern will be tested (and matched) first, so the exclusive one
 * will never have a chance to take effect.  This can always be avoided by
 * testing the exclusive patterns BEFORE the inclusive ones; to save the user
 * from having to order the list carefully, we note during the first pass any
 * entries that start with '!'; the first <nx> spaces in the list are then
 * reserved for these exclusions, with the non-exclusions being added after
 * that point.  This ensures that all exclusions are at the front of the list,
 * which should make things work properly without further fiddling around.
 */
int set_weedlist(char ***list, const char *line)
{
    char buf[LLINELEN];		/* Make sure this is big enough! */
    char *s;
    size_t nx, ni, n, l;

    if (*list != NULL)		/* Already allocated! */
	return 1;

    /*
     * Run through the list once, counting the entries so we know how big to
     * make the <*list> array
     */
    AUSTRCPY(buf, line);
    for (s = strtok(buf, " "), nx = ni = 0; s != NULL; s = strtok(NULL, " "))
	if (s[0] == '!')	/* May be exclusive wildcard pattern */
	    nx++;
	else			/* Normal pattern */
	    ni++;

    /*
     * Allocate a list of pointers; use calloc() so they all start off NULL.
     * Make the list one bigger than we need, so it is guaranteed to end with
     * a NULL pointer.
     */
    if ((*list = calloc(nx + ni + 1, sizeof(char *))) == NULL)
	return -1;

    /*
     * Run through it again, allocating the entries this time.  We *ALWAYS*
     * allocate one more space than required if the entry doesn't end with
     * '*', so that we will have room later to append a '*' for wildcard
     * matching.  Exclusion patterns (start with '!') get the first <nx>
     * spaces in the list; normal patterns start at index <nx>.
     */
    AUSTRCPY(buf, line);
    for (s = strtok(buf, " "), ni = nx, nx = 0; s != NULL;
	 s = strtok(NULL, " "))
    {
	n = (s[0] == '!') ? (nx++) : (ni++);	/* Index for this pattern */
	if ((l = strlen(s)) > 0 && s[l - 1] != '*')
	    l++;		/* Add room to append a '*' later */
	if (((*list)[n] = malloc(l + 1)) == NULL)	/* out of memory */
	    return -2;
	memcpy((*list)[n], s, l);	/* May include <s> terminator */
	(*list)[n][l] = '\0';		/* Terminate the new string */
    }

    /* We don't need to set the final pointer to NULL; calloc() did that */
    return 0;
}


/*
 * weedconfig()
 *
 * Configure the weeding system; we're at program startup, and various things
 * need to be set (like the global weedout-checking function pointer <ckfn>).
 * Also, if we're using weedstyle 2 (wildmat), we need to ensure that each
 * entry ends with a '*' if it doesn't already.
 */
void weedconfig(void)
{
    /*
     * Ensure <weedlevel> is set to something we've got a list for; reduce the
     * level of weeding, rather than increase it, if the chosen level is
     * unavailable (this ensures backwards compatibility: if an empty
     * "weedout" list is given, we should fall back from weedlevel 1 to
     * weedlevel 0 rather than increase to weedlevel 2 or 3).
     */
    while (!weedlevelok(weedlevel))
	weedlevel = (weedlevel + NUM_WEEDLEVEL - 1) % NUM_WEEDLEVEL;

    /*
     * Ensure <printweed> is available; increase the level rather than
     * decrease it (it defaults to 0, HW_FULL)
     */
    while (!weedlevelok(printweed))
	printweed = (printweed + 1) % NUM_WEEDLEVEL;

    /* Set check function and check wildcard lists if necessary */
    ckfn = (weedstyle == 0) ? weedtest0 : (weedstyle == 1) ? weedtest1 :
	weedtest2;
    if (weedstyle == 2)		/* Check lists for wildcard suitability */
    {
	if (weedlist1 != NULL)
	    wildcheck(weedlist1);
	if (weedlist2 != NULL)
	    wildcheck(weedlist2);
    }
}


/*
 * weedlevelok()
 *
 * Return TRUE if the given weedlevel <lvl> is OK -- i.e. it has an associated
 * list, or it's one of the all-or-nothing ones -- or FALSE if it can't be
 * used.
 */
BOOL weedlevelok(int lvl)
{
    return ((lvl == 1 && weedlist1 == NULL) ||
	    (lvl == 2 && weedlist2 == NULL)) ? FALSE : TRUE;
}


/*
 * weedset()
 *
 * In order to handle weedout checking of message header lines, we need to
 * know various things: (a) are we supposed to be weeding out at the moment?
 * (b) was the last line weeded out (to be used again if the next line is a
 * continuation)? (c) did the last line end with a linefeed, i.e. the next
 * line will be a new line?  We keep this information in the module-wide
 * variables <level>, <status>, <last_eol> and <last_eolpos>; before weedit()
 * can be called for a new message, these need to be reset to a known starting
 * condition.  We reset to a "next line is a new one" state, and note to use
 * weeding level <lvl>.  We also check, for weedlevels 1 and 2, that the
 * associated weedlist is non-NULL -- if it is NULL, we set the level to 0 or
 * 3 respectively to ensure correct behaviour.
 */
void weedset(int lvl)
{
    level = lvl;		/* Level of weeding to apply */
    if (level == 1 && weedlist1 == NULL)	/* Nothing to weedout */
	level = 0;
    else if (level == 2 && weedlist2 == NULL)	/* Nothing to display */
	level = 3;
    status = 0;			/* Status of last line examined */
    last_eol = 1;		/* Did last line end with linefeed? */
    last_eolpos = -1L;		/* Any <fpos> >= 0 is > -1 ...*/
    line_isnew = FALSE;		/* Haven't seen anything yet! */
}


/*
 * weedit()
 *
 * Given a message header line <line>, examine it to see if it should be
 * weeded out according to the settings currently in force.  Return one of
 * the following:
 *	-1	End-of-header blank line (display it)
 *	 0	Regular line (display it)
 *	 1	Weeded line (don't display it)
 *
 * This may be called from doviewmsg(), which always does an fgets() at the
 * start of the line it's about to display.  With a big buffer, it may read
 * considerably more bytes than can be displayed on a single line; this means
 * that, say, a 120-char line would be passed to us first in all its 120-char
 * glory, then (after print_string() wraps it at 80 chars) we will see it
 * again, but this time only the last 40 chars of it.  In this second case, it
 * is effectively a continuation line of the last one we saw, so we don't
 * re-examine it (we don't even look to see if it is just [CR]LF, which would
 * normally indicate the end of the header).
 *
 * <fpos>, if >= 0, is the file position for the start of this line; if it's
 * less than <last_eolpos>, this is the same line we say last time, so we
 * don't consider it as a new line.  We do, however, reset <last_eol> and
 * <last_eolpos> to match this line, so that we always know whether the last
 * line we saw ended with LF.
 */
int weedit(const char *line, long fpos)
{
    if (status < 0)		/* Eh? Last line was end of header! */
	return status;

    /*
     * Is this the start of a new line?  If the last line we saw didn't end
     * with LF, or if this is actually the same line we saw last time (<fpos>
     * is less than <last_eolpos>), it isn't a new line: if not, it is.
     */
    line_isnew = (last_eol && (fpos < 0L || fpos >= last_eolpos)) ? TRUE :
	FALSE;
    
    /* zap SOH first, if MMDF */
    if (mmdf && htype_match(line, HT_SOH))
	status = 1;
    else if (line_isnew && ISBLANKLINE(line))	/* end of header */
	status = -1;
    else if (level == 1)	/* Don't show if in <weedlist1> */
    {
	/*
	 * If this is a new header field (new line, doesn't start with
	 * whitespace), set <status> accordingly; if it's a continuation of
	 * the last header field, retain the current <status>
	 */
	if (line_isnew && !islwsp(line[0]))	/* New header field */
	    status = (*ckfn)(line, weedlist1);
    }
    else if (level == 2)	/* Don't show UNLESS in <weedlist2> */
    {
	/*
	 * If this is a new header field (new line, doesn't start with
	 * whitespace), set <status> accordingly; if it's a continuation of
	 * the last header field, retain the current <status>
	 */
	if (line_isnew && !islwsp(line[0]))	/* New header field */
	    status = ((*ckfn)(line, weedlist2) == 0) ? 1 : 0;
    }
    else if (level == 3)	/* Don't show any header fields at all */
	status = 1;
    else			/* Show all header fields regardless */
	status = 0;
    last_eol = eoltest(line);
    if (fpos >= 0L)
	last_eolpos = fpos + strlen(line);
    return status;
}


/*
 * weedlineisnew()
 *
 * Return TRUE if the line we've just processed was a new one, FALSE if not;
 * this is simply the value if <line_isnew>.
 */
BOOL weedlineisnew(void)
{
    return line_isnew;
}


/*
 * weedtest0()
 *
 * Weedout check function: original, case-sensitive pure string comparison
 */
static int weedtest0(const char *line, const char **list)
{
    const char **weeditem;

    for (weeditem = list; *weeditem != NULL; weeditem++)
	if (strncmp(line, *weeditem, strlen(*weeditem)) == 0)	/* Match */
	    return 1;
    return 0;
}


/*
 * weedtest1()
 *
 * Weedout check function: case-insensitive pure string comparison
 */
static int weedtest1(const char *line, const char **list)
{
    const char **weeditem;

    for (weeditem = list; *weeditem != NULL; weeditem++)
	if (strnicmp(line, *weeditem, strlen(*weeditem)) == 0)	/* Match */
	    return 1;
    return 0;
}


/*
 * weedtest2()
 *
 * Weedout check function: case-insensitive wildmat() comparison
 */
static int weedtest2(const char *line, const char **list)
{
    const char **weeditem;

    for (weeditem = list; *weeditem != NULL; weeditem++)
    {
	/* "don't match" (leading '!') has priority */
	if ((*weeditem)[0] == '!' &&  (*weeditem)[1] /* != '\0'*/)
	{
	    if (wildmat(line, *weeditem + 1, 1))	/* Definite no match */
		return 0;
	}
	else if (wildmat(line, *weeditem, 1))		/* Definite match */
	    return 1;
    }
    return 0;
}


/*
 * wildcheck()
 *
 * Check weedlist <list> for wildcard suitability; all non-blank entries must
 * end with '*'.  When the list was allocated, any entries that didn't end
 * with a '*' had an extra space allocated in the string buffer to leave room
 * for one to be added later, so it should be quite safe to do so.
 */
static void wildcheck(char **list)
{
    char **weeditem;
    size_t l;

    for (weeditem = list; *weeditem != NULL; weeditem++)
    {
	l = strlen(*weeditem);
	if (l > 0 && (*weeditem)[l - 1] != '*')
	    strcpy(*weeditem + l, "*");
    }
}
