/*
 *  Copyright (C) 1999,2001,2004,2005,2007  Anders Gavare.  All rights reserved.
 *
 *  Redistribution and use in source and binary forms, with or without
 *  modification, are permitted provided that the following conditions are met:
 *
 *  1. Redistributions of source code must retain the above copyright
 *     notice, this list of conditions and the following disclaimer.
 *  2. Redistributions in binary form must reproduce the above copyright
 *     notice, this list of conditions and the following disclaimer in the
 *     documentation and/or other materials provided with the distribution.
 *  3. The name of the author may not be used to endorse or promote products
 *     derived from this software without specific prior written permission.
 *
 *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 *  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 *  ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 *  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 *  OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 *  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 *  OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 *  SUCH DAMAGE.
 *
 *
 *  $Id: scan_c.c,v 1.4 2005/03/03 20:40:15 debug Exp $
 *
 *  File comment:
 *	This program ("functionfinder") scans C source files for function
 *	definitions and preprocessor defines, and outputs one line
 *	for each found functions, compatible with "sb_db_builder"
 *	(sourcebrowser database builder)
 *
 *	I KNOW that it really sucks, I should do preprocessing etc... but
 *	it works on many files, and that's enough for my tests with
 *	sourcebrowser. :)
 *
 *  File revision history:
 *	? Mar 1999	0.0.0	buggy
 *	2 Apr 1999	-	Finds functions of any type.
 *				Allows preprocessor directives to continue
 *				on the next line.
 *	3 Apr 1999	0.0.5	Adding stringhash stuff
 *	6 Apr 1999	0.0.7	Fixed comment stuff
 *	16 Apr 1999	0.0.12	Attempt to fix some #else #endif bugs by a
 *				really cheap hack.
 *	21 Jan 2004	revitializing
 */


#include "global.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

#define	MAXLEN	256


int size;


int stringhash(char *s)
{ 
	unsigned int c=0x918fa7b, d=0x19f8a37;
	int pos = 0;

	if (s == NULL)
		return 0;

	while (s[pos] != '\0') {
		c ^= (d - s[pos+1]);
		d -= (c << (s[pos] & 15));
		d ^= (c - s[pos]*11);
		d += (c >> (s[pos] >> 3));
		d += 0xc0debabe;
		pos++;
	}

	return d;
}


void skip_quote(char *buf, int *pos, char quotetype)
{
	(*pos) = (*pos) + 1;
	while ( (buf[(*pos)]!=quotetype) && (*pos < size) ) {
		if (buf[(*pos)]=='\\')
			(*pos) ++;
		(*pos) ++;
	}
}


void skip_box(char *buf, int *pos)
{
	int p = *pos;

    p++;
    while ( (buf[p]!='}') && (p < size) )
      {
//printf ("%c.", buf[p]);

	if (buf[p]=='#')	/* Cheap hack... */
	  {
	    /*  If we find a #else thing, let's skip until the next #end...
		FIX:  Maybe this sould be recursive!  */
	    if (!strncmp(buf+p+1, "else", 4))
	      {
		p++;
		while ( (buf[p]) && (strncmp(buf+p, "#end", 4)) )
			p++;
	      }
	  }
	else
	if ( (buf[p]=='/') && (buf[p+1]=='/') )
	  {		//  Skip until EOL
		//  FIX:  these comments may occupy several lines!!!
	    p += 2;
	    while ( (buf[p]!='\0') && (buf[p]!='\n') )
		p++;
	  }
	else
	if ( (buf[p]=='/') && (buf[p+1]=='*') )
	  {
	    //  Skip the comment:
	    p += 2;
	    while ( (buf[p]!='\0') && !( (buf[p]=='*') && (buf[p+1]=='/') ) )
		p++;
	    p++;
	  }
	else
	if (buf[p]=='"')
	  skip_quote (buf, &p, '"');
	else
	if (buf[p]=='\'')
	  skip_quote (buf, &p, '\'');
	else
	if (buf[p]=='{')
	  skip_box (buf, &p);

	p++;
      }

    *pos = p;
  }


void skip_parenthesis(char *buf, int *pos)
{
	(*pos) = (*pos) + 1;
	while (buf[(*pos)] != ')' && buf[(*pos)] != '\0' && *pos < size) {
		if (buf[(*pos)] == '"')
			skip_quote(buf, pos, '"');

		if (buf[(*pos)] == '\'')
			skip_quote (buf, pos, '\'');

		if (buf[(*pos)] == '(')
			skip_parenthesis(buf, pos);

		(*pos) = (*pos) + 1;
	}
}


int scanfile(FILE *fout, char *fname)
{
	FILE *f;
	char *buf;
	int p, oldp, p2;
	char c;
	char funcname[MAXLEN];
	int in_preproc;
	long len2;
	int skipped_a_box = 1;
		/*  Must be 1 to be able to add a function name  */

	if (strlen(fname) < 3)
		return -1;

	if ((strncmp(fname+strlen(fname)-2,".c",2)) &&
	    (strncmp(fname+strlen(fname)-2,".h",2)) )
		return -1;

	f = fopen(fname, "r");
	if (f == NULL) {
		fprintf(stderr, "(error opening %s)\n", fname);
		return -1;
	}

	/*  Allocate buffer for the entire file and read it:  */

	fseek(f, 0, SEEK_END);
	size = ftell(f);
	fseek(f, 0, SEEK_SET);

	buf = (char *) malloc(size + 1);
	if (buf == NULL) {
		fprintf(stderr, "out of memory mallocing %i bytes "
		    "for %s in scanfile()\n", size, fname);
		exit(1);
	}

	buf[size] = '\0';

	len2 = fread(buf, 1, size, f);
	fclose(f);

	printf("fname = %s\n", fname);

	size = len2;
	if (size < 1)
		return 0;

	/*  Scan buf:  */
	p = 0;
    while (p<size)
      {
	c = buf[p];
//printf ("%c'", c);

	if (c=='{')
	  {
	    skip_box (buf, &p);
	    skipped_a_box = 1;
	  }
	else
	if (c=='"')
	  skip_quote (buf, &p, '"');
	else
	if (c=='\'')
	  skip_quote (buf, &p, '\'');
	else
	if ( (c=='/') && (buf[p+1]=='/') )
	  {		//  Skip until EOL
		//  FIX:  these comments may occupy several lines!!!
	    p += 2;
	    while ( (buf[p]!='\0') && (buf[p]!='\n') )
		p++;
	  }
	else
	if ( (c=='/') && (buf[p+1]=='*') )
	  {
	    //  Skip the comment:
	    p += 2;
	    while ( (buf[p]!='\0') && !( (buf[p]=='*') && (buf[p+1]=='/') ) )
		p++;
	    p++;
	  }
	else
	if (c=='#')
	  {
	    //  A #define thing?  Only scan for those in .h files so far...
	    if ( (!strncasecmp(buf+p, "#define", 7))
		&& (!strncmp(fname+strlen(fname)-2,".h",2)) )
	      {
		//  Treat the define as a function definition...
		//  (this is quite common for example in the OpenBSD kernel)
		p2 = p+7;
		while ( (p2<size) && ( (buf[p2]==' ') || (buf[p2]=='\t') ||
		    (buf[p2]=='\\') || (buf[p2]=='\n') || (buf[p2]=='\r') ) )
		  p2++;
		//  here, p2 points to the first char of the name
		oldp = p2;
		while ( (p2<size) && ( (buf[p2]=='_') || (isalnum(buf[p2])) ) )
			p2++;
		p2--;
		if ( (isdigit(buf[oldp])) || (p2-oldp<1) )
			goto no_good_define;
		strncpy (funcname, buf+oldp, p2-oldp+1);
		funcname[p2-oldp+1]=0;

		//  UGLY HACK:
		if (!strcmp(funcname, "__P") || is_reserved(funcname) )
			goto no_good_define;

		fprintf (fout, "%07i %s %s\n", stringhash(funcname)&HASHMASK,
		    funcname, fname);
no_good_define:
		;
	      }

	    in_preproc = 1;
	    while ( (in_preproc) && (p<size) )
	      {
		//  Skip the rest of the line:
		while ( (buf[p]!='\0') && (buf[p]!='\n') && (buf[p]!='\r') )
			p++;
		in_preproc=0;
		if ( (buf[p]=='\n') || (buf[p]=='\r') )
		  if (buf[p-1]=='\\')
		    {
		      in_preproc=1;
		      while ( (buf[p]=='\n') || (buf[p]=='\r') )
			p++;
		    }
	      }
	  }
	else
	//  Function definition?  <type> <name> (<parameters>) <NOT_A_SEMICOLON>
	if (buf[p]=='(')
	  {
	    //  If this is a function _definition_, we should NOT have a semicolon
	    //  after the parentheses-expression...
	    oldp = p;
	    skip_parenthesis(buf, &p);  p++;

	    //  Skip any "empty" characters before looking for a semicolon:
	    while ( (p<size) && ( (buf[p]==' ') || (buf[p]=='\t') || (buf[p]=='\n')
		|| (buf[p]=='\r') || (buf[p]=='\\') ) )
	      p++;

	    if ( (buf[p]!=';') && (skipped_a_box) )
	      {
		//  Let's get the function name. It's the thing before the
		//  first parenthesis at position 'oldp'.
		funcname[0]=0;
		p2 = oldp-1;
		while ( (p2>0) && ( (buf[p2]==' ') || (buf[p2]=='\t') || (buf[p2]=='\n')
			|| (buf[p2]=='\r') || (buf[p2]=='\\') ) )
		  p2--;
		//  here: p2 points to the last character of the function name...
		oldp = p2;
		//  ... hopefully. It has to be underscore or alphanumeric though.
		if ( (buf[p2]!='_') && (!isalnum(buf[p2])) )
			goto no_function;
		//  Find the beginning of the function name:
		while ( (p2>0) && ( (buf[p2]=='_') || (isalnum(buf[p2])) ) )
			p2--;
		p2++;
		//  A true function name doesn't have a digit as the first char,
		//  and it has at least one character (len>=1)
		if ( (isdigit(buf[p2])) || (oldp-p2<1) )
			goto no_function;
		//  Copy the function name:
		strncpy (funcname, buf+p2, oldp-p2+1);
		funcname [oldp-p2+1] = 0;

		//  UGLY HACK:
		if (!strcmp(funcname, "__P") || is_reserved(funcname) )
			goto no_function;

		//  Print it:
		fprintf (fout, "%07i %s %s\n", stringhash(funcname)&HASHMASK, funcname,
			fname);
no_function:

		/*  This is set to zero, and re-set to 1 when we've skipped a box.
		    This is to prevent function names _between_ the function header
		    and the actual code to be interpreted as a function...  */
		skipped_a_box = 0;
	      }
	    p--;
	  }
//	else
//	printf ("%c", c);

	//  Next byte:
	p ++;
      }

	free(buf);
	return 0;
}

