/*
 * Pan - A Newsreader for X
 * Copyright (C) 1999, 2000  Pan Development Team (pan@superpimp.org)
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 * 
 */

#include <config.h>

#include <glib.h>
#include <libgnome/gnome-defs.h>
#include <libgnome/gnome-i18n.h>

#include <ctype.h>
#include <stdlib.h>
#include <string.h>

#include "article.h"
#include "article-thread.h"
#include "util.h"

/**
 * Skip the "Re: " part of a subject header, if any
 * @param subject
 * @return the non-"Re:" portion of the subject header
 */
#define skip_reply_leader(a) \
	(((a!=NULL) && \
	  (a[0]=='R' || a[0]=='r') && \
	  (a[1]=='E' || a[1]=='e') && \
	  (a[2]==':') && \
	  (a[3]==' ')) ? a+4 : a)

/**
 * Normalized Article
 */
typedef struct
{
	gchar * subject;
	gboolean is_reply;
	Article * a;
}
Norm;

/**
 * Normalizing a subject header involves tearing out the multipart
 * substrings ("(21/42)" or "[12|213]") and converting it all to
 * one case so that we can use strcmp instead of g_strcasecmp.
 *
 * When we're threading articles, it's a big speedup to normalize the
 * subjects at the outset instead of normalizing them in each comparison.
 */
static gchar*
normalize_subject (const Article * a)
{
	register const gchar * in = skip_reply_leader (a->subject);
	register gchar * out = g_new (char, strlen(in)+1);
	gchar * retval = out;
	const gboolean multipart = a->parts!=0;

	/* skip the leading noise */
	while (*in && !isalnum((int)*in) && !isdigit((int)*in))
		++in;

	while (*in)
	{
		/* strip multipart information */
		if (multipart && (*in=='('||*in=='[') && isdigit((int)in[1])) {
			const char ch = *in=='(' ? ')' : ']';
			while (*in && *in!=ch)
				++in;
			continue;
		}

		/* strip out junk that breaks sorting  */
		if (isalnum((int)*in) || isdigit((int)*in) || isspace((int)*in))
			*out++ = ('A'<=*in && *in<='Z') ? tolower(*in) : *in;

		++in;
	}

	*out = '\0';
	return retval;
}


/**
 * Normalized articles are used for sorting efficiently...
 */
static void
normalize_article (Article * a, Norm * setme)
{
	setme->subject = normalize_subject (a);
	setme->is_reply = skip_reply_leader(a->subject) != a->subject ? 1 : 0;
	setme->a = a;
}

static int
compare_pN_to_pN_by_subject (const void * va, const void * vb)
{
	register int value;
	const register Norm * a = (const Norm *)va;
	const register Norm * b = (const Norm *)vb;

	/* subject is the primary key, of course... */
	if ((value = *a->subject - *b->subject))
		return value;
	if ((value = strcmp (a->subject, b->subject)))
		return value;

	/* if one but not both is a reply, the reply goes second */
	if (a->is_reply != b->is_reply)
		return a->is_reply ? 1 : -1;

	/* check multipart */
	if ((value = a->a->part - b->a->part))
		return value;

	/* oldest goes first... */
	return (int) difftime (a->a->date, b->a->date);
}


static int
compare_ppA_to_ppA_by_linecount (const void* va, const void* vb)
{
        const Article * a;
        const Article * b;
	long a_loc;
	long b_loc;

	/* get a's linecount... */
	a = *(const Article**)va;
	a_loc = a->linecount;
	if (a->parts!=0 && a->threads!=NULL) {
		GSList * l;
		for (l=a->threads; l; l=l->next)
			a_loc += ARTICLE(l->data)->linecount;
	}

	/* get b's linecount... */
	b = *(const Article**)vb;
	b_loc = b->linecount;
	if (b->parts!=0 && b->threads!=NULL) {
		GSList * l;
		for (l=b->threads; l; l=l->next)
			b_loc += ARTICLE(l->data)->linecount;
	}

	/* return the difference... */
	return a_loc - b_loc;
} 

static int
compare_ppA_to_ppA_by_date (const void* va, const void* vb)
{
	return (int) difftime ((**(const Article**)va).date,
	                       (**(const Article**)vb).date);
}

static int
compare_ppA_to_ppA_by_unread_children (const void* a, const void* b)
{
	return (**(const Article**)a).unread_children -
	       (**(const Article**)b).unread_children;
}

static int
compare_ppA_to_ppA_by_message_id (const void* a, const void* b)
{
	const gchar * msg_id_a = (**(const Article **)a).message_id;
	const gchar * msg_id_b = (**(const Article **)b).message_id;
	return strcmp (msg_id_a, msg_id_b);
}

typedef struct
{
	gchar * data;
	Article * article;
}
ArticleStruct;

static int
compare_pAS_to_pAS_by_data (const void * va, const void * vb)
{
	const ArticleStruct * a = (const ArticleStruct*)va;
	const ArticleStruct * b = (const ArticleStruct*)vb;
	return strcmp (a->data, b->data);
}

void
sort_articles (Article      ** buf,
               size_t          article_qty,
               int             sort_type,
               gboolean        ascending)
{
	switch (sort_type)
	{
		case ARTICLE_SORT_AUTHOR:
		{
			size_t i;
			ArticleStruct * as = g_new (ArticleStruct, article_qty);
			for (i=0; i<article_qty; ++i)
			{
				const gchar * author = buf[i]->author_real;
				if (!is_nonempty_string(author))
					author = buf[i]->author_addr;
				if (!is_nonempty_string(author))
					author = _("?");
				as[i].data = g_strdup(author);
				g_strdown (as[i].data);
				as[i].article = buf[i];
			}
			msort (as,
			       article_qty,
			       sizeof(ArticleStruct),
			       compare_pAS_to_pAS_by_data);
			for (i=0; i<article_qty; ++i) {
				buf[i] = as[i].article;
				g_free (as[i].data);
			}
			g_free (as);
			break;
		}
		case ARTICLE_SORT_LINES:
		{
			msort (buf, article_qty, sizeof(Article*), compare_ppA_to_ppA_by_linecount);
			break;
		}
		case ARTICLE_SORT_DATE:
		{
			msort (buf, article_qty, sizeof(Article*), compare_ppA_to_ppA_by_date);
			break;
		}
		case ARTICLE_SORT_UNREAD_CHILDREN:
		{
			msort (buf, article_qty, sizeof(Article*), compare_ppA_to_ppA_by_unread_children);
			break;
		}
		case ARTICLE_SORT_SUBJECT:
		default:
		{
			gint i;
			Norm * norm = g_new (Norm, article_qty);
			for (i=0; i<article_qty; ++i)
				normalize_article (buf[i], &norm[i]);
			msort (norm, article_qty, sizeof(Norm), compare_pN_to_pN_by_subject);
			for (i=0; i<article_qty; ++i) {
				g_free (norm[i].subject);
				buf[i] = ARTICLE(norm[i].a);
			}
			g_free (norm);
		}
	}

	/* if not ascending, reverse the order */
	if (!ascending) {
		const size_t mid = article_qty/2;
		size_t i;
		for (i=0; i!=mid; ++i) { /* swap */
			Article * tmp = buf[i];
			buf[i] = buf[article_qty-1-i];
			buf[article_qty-1-i] = tmp;
		}
	}
}


static gboolean
is_child_of (const Article * child,
             const Article * parent)
{
	g_return_val_if_fail (child!=NULL, FALSE);
	g_return_val_if_fail (parent!=NULL, FALSE);

	for (;;)
	{
		if (!child)
			return FALSE;
		if (child == parent)
			return TRUE;
		child = child->parent;
	}
}

/**
 * Thread the articles specified in list
 */
void
thread_articles (GPtrArray    * articles,
                 StatusItem   * status)
{
	guint i;
	guint qty = articles->len;
	Article ** refs;
	Article search_a;
	Article * p_search_a=&search_a;
	Norm * norm;
	Norm * sorted_norm;

	/* entry assertions */
	g_return_if_fail (articles!=NULL);

	/* if nothing to do, do nothing */
	if (qty<1 || !articles)
		return;

	/* let the user know what we're doing */
	if (status != NULL)
		status_item_emit_status_va (status, _("Threading %u articles"), qty);

	/* unthread the articles, just in case they were threaded before */
	for (i=0; i!=qty; ++i) {
		Article * a = ARTICLE(g_ptr_array_index(articles,i));
		a->parent = NULL;
		a->unread_children = 0;
		g_slist_free (a->threads);
		a->threads = NULL;
	}

	/* make a message-id-sorted array of the articles */
	refs = g_memdup (articles->pdata, sizeof(gpointer)*qty);
	qsort (refs, qty, sizeof(Article*), compare_ppA_to_ppA_by_message_id);

	/* normalize the articles */
	norm = g_new (Norm, qty);
	for (i=0; i!=qty; ++i)
		normalize_article (ARTICLE(g_ptr_array_index(articles,i)), &norm[i]);

	/* sort the normalized articles */
	sorted_norm = g_memdup (norm, sizeof(Norm)*qty);
	qsort (sorted_norm, qty, sizeof(Norm), compare_pN_to_pN_by_subject);

	/* thread the articles */
	for (i=0; i!=qty; ++i)
	{
		const gchar * pch;
		Article * parent = NULL;
		Article * a = ARTICLE(g_ptr_array_index(articles,i));
		gint index = -1;

		/* let the user know what we're doing */
		if (status != NULL) {
			status_item_emit_next_step (status);
			if (i==qty-1 || !(i % 256))
				status_item_emit_status_va (status,
					_("Threaded %u of %u articles"), i, qty);
		}

		/* thread by reference
		   (except for multiparts, which are top-level) */
		if (a->parts==0 && ((pch=article_get_header(a,HEADER_REFERENCES)))!=NULL && *pch=='<')
		{
			gboolean exact = FALSE;
			search_a.message_id = strrchr (pch, '<' );

			index = lower_bound (
				&p_search_a,
				refs,
			       	qty,
				sizeof(Article*),
				compare_ppA_to_ppA_by_message_id,
				&exact);

			if (exact && !is_child_of(refs[index],a))
			{
				parent = refs[index];
			}
		}

		/* thread by multipart */
		if (!parent && a->parts>1 && a->part>1)
		{
			Norm n = norm[i];
			search_a.part = 1;
			search_a.date = 0; /* unlikely to get an exact match.. :) */
			n.a = &search_a;

			index = lower_bound (
				&n,
				sorted_norm,
				qty,
				sizeof(Norm),
				compare_pN_to_pN_by_subject,
				NULL);

			if (0<=index && index<qty)
			{
				Norm * match = &sorted_norm[index];
				if ((match->a != a)
					&& (match->a->parts == a->parts)
					&& (!strcmp(match->subject,n.subject))
					&& (!is_child_of(match->a,a)))
				{
					parent = match->a;
				}
			}
		}

		/* thread by subject */
		if (!parent && skip_reply_leader(a->subject)!=a->subject)
		{
			Norm n = norm[i];
			search_a.part = 0;
			search_a.date = 0; /* unlikely to get an exact match.. :) */
			n.a = &search_a;

			index = lower_bound (
				&n,
				sorted_norm,
				qty,
				sizeof(Norm),
				compare_pN_to_pN_by_subject,
				NULL);

			if (0<=index && index<qty && !is_child_of(sorted_norm[index].a,a))
			{
				Norm * match = &sorted_norm[index];

				if (!strcmp(match->subject,n.subject))
				{
					/* 1 original, 1 reply */
					parent = match->a;
				}
				else if (!strcmp(match->subject, a->subject) && difftime(match->a->date,a->date)<0)
				{
					/* 2 replies, no top --  oldest on top */
					parent = match->a;
				}
			}
		}

		if (parent != NULL) /* this article has a parent */
		{
			g_assert (!is_child_of(parent,a));

			/* link the two articles */
			a->parent = parent;
			parent->threads = g_slist_insert_sorted (
				parent->threads,
				&norm[i],
				compare_pN_to_pN_by_subject);
		}
	}

	/* right now all the children are normalized; point to articles */
	for (i=0; i!=qty; ++i) {
		GSList * l;
		Article * a = ARTICLE(g_ptr_array_index(articles,i));
		for (l=a->threads; l!=NULL; l=l->next)
			l->data = ((Norm*)l->data)->a;
	}

	/* calculate unread child counts */
	for (i=0; i!=qty; ++i) {
		Article * a = ARTICLE(g_ptr_array_index(articles,i));
		if (a->parent!=NULL && !article_is_read(a)) {
			for (a=a->parent; a!=NULL; a=a->parent)
				++a->unread_children;
		}
	}

	/* cleanup */
	g_free (refs);
	for (i=0; i!=qty; ++i)
		g_free (norm[i].subject);
	g_free (norm);
	g_free (sorted_norm);
}


void
check_multipart_articles (GPtrArray * articles)
{
	guint i;
	gint j;

	/* entry assertions */
	g_return_if_fail (articles!=NULL);

	/* set the multipart state (all/partial) */
	for (i=0; i!=articles->len; ++i)
	{
		GSList * p;
		Article * a = ARTICLE(g_ptr_array_index(articles,i));

		/* clear old state */
		a->state &= ~(STATE_MULTIPART_ALL&STATE_MULTIPART_SOME);

		/* not a multipart because it has no parts */
		if (!a->parts)
			continue;

		/* not a multipart because it's not the first of the set */
		if (a->part!=1 || a->parent!=NULL)
			continue;

		/* handle the single-part attachment message */
		if (a->parts==1) {
			a->state |= STATE_MULTIPART_ALL;
			continue;
		}

		/* make sure we have each multipart. */
		for (j=a->part+1, p=a->threads;
		     j<=a->parts && p!=NULL;
		     p=p->next)
		{
			Article* b = ARTICLE(p->data);
			if (b->part > j)
				break; /* some */
			else if (b->part == j)
				++j; /* okay so far */
			else
				; /* a repost of a multipart section? */
		}
		if (j==a->parts+1) {
			a->state |= STATE_MULTIPART_ALL;
		} else {
			a->state |= STATE_MULTIPART_SOME;
		}
	}
}
