// XMIInspect path expression handling

/* warning, horrible parsing code down here! (see findtoken() and the two buildExprInfo() methods..) */

#include "stdafx.h"

#ifndef WIN32
int max(int a, int b)
{
   if (a<b)
      return b;
   else 
      return a;
}
#endif

/* ExprInfo (de)constructors & methods */
ExprInfo::ExprInfo()
{
	numcontainers = -1;
	containertype = XMIINS_SC_UNKNOWN;
	subs = NULL;
	numsubs = 0;
	usedsubs = 0;
	eindex = -1;
	eindextotal = -1;
	constant_or_param = NULL;
}

ExprInfo::~ExprInfo()
{
	for (int i=0; i<numsubs; i++) {
		trydel (subs[i]);
	}
	trydela (subs);
	trydela (constant_or_param);
}

void ExprInfo::setNumContainers(int n, bool takemax)
{
	numcontainers = takemax ? max(n, numcontainers) : n;
}

int ExprInfo::getNumContainers()
{
	return numcontainers;
}

ExprInfo* ExprInfo::getSubContainer(int sub)
{
	if (subs && sub < numsubs) {
		return subs[sub];
	} else {
		return NULL;
	}
}

int ExprInfo::getSubContainerType(int sub)
{
	ExprInfo *expr = getSubContainer(sub);
	if (expr) {
		return expr->getContainerType();
	} else {
		return XMIINS_SC_UNKNOWN;
	}
}

void ExprInfo::setSubContainerType(int container, int type)
{
	if (subs && container < numsubs) {
		subs[container]->setContainerType(type);
	}
}

int ExprInfo::getContainerType()
{
	return containertype;
}

void ExprInfo::setContainerType(int type)
{
	if (containertype == XMIINS_SC_UNKNOWN) {
		containertype = type;
	} else {
		/* type already set, me have a mixed type now */
		containertype = XMIINS_SC_MIXED;
	}
}

bool ExprInfo::isCombining(int type)
{
	return  (   type == XMIINS_SC_OR
				|| type == XMIINS_SC_SEQ
				|| type == XMIINS_SC_REP
				|| type == XMIINS_SC_SEQCOMB);
}

int ExprInfo::numContainers(int type)
{
	if (  type == XMIINS_SC_P
		|| type == XMIINS_SC_SEQ
		|| type == XMIINS_SC_C
		|| type == XMIINS_SC_SEQCOMB) {
		/* these have no data of their own */
		return 0;
	} else if (type == XMIINS_SC_UNKNOWN || type == XMIINS_SC_MIXED) {
		/* unknown */
		return -1;
	} else {
		/* all others use just one container */
		return 1;
	}
}

int ExprInfo::token2type(char *dum)
{
	       if (!strcmp(dum, XMIINS_SC_T_STR)) {
		return XMIINS_SC_T;
	} else if (!strcmp(dum, XMIINS_SC_E_STR)) {
		return XMIINS_SC_E;
	} else if (!strcmp(dum, XMIINS_SC_U_STR)) {
		return XMIINS_SC_U;
	} else if (!strcmp(dum, XMIINS_SC_I_STR)) {
		return XMIINS_SC_I;
	} else if (!strcmp(dum, XMIINS_SC_RL_STR)) {
		return XMIINS_SC_RL;
	} else if (!strcmp(dum, XMIINS_SC_U8_STR)) {
		return XMIINS_SC_U8;
	} else if (!strcmp(dum, XMIINS_SC_DI_STR)) {
		return XMIINS_SC_DI;
	} else if (!strcmp(dum, XMIINS_SC_BASEX_STR)) {
		return XMIINS_SC_BASEX;
	} else if (!strcmp(dum, XMIINS_SC_BASE2_STR)) {
		return XMIINS_SC_BASE2;
	} else if (!strcmp(dum, XMIINS_SC_BASE16_STR)) {
		return XMIINS_SC_BASE16;
	} else if (!strcmp(dum, XMIINS_SC_BASE22_STR)) {
		return XMIINS_SC_BASE22;
	} else if (!strcmp(dum, XMIINS_SC_BASE64_STR)) {
		return XMIINS_SC_BASE64;
	} else if (!strcmp(dum, XMIINS_SC_P_STR)) {
		return XMIINS_SC_P;
	} else if (!strcmp(dum, XMIINS_SC_OR_STR)) {
		return XMIINS_SC_OR;
	} else if (!strcmp(dum, XMIINS_SC_SEQ_STR)) {
		return XMIINS_SC_SEQ;
	} else if (!strcmp(dum, XMIINS_SC_REP_STR)) {
		return XMIINS_SC_REP;
	} else if (!strcmp(dum, XMIINS_SC_SEQCOMB_STR)) {
		return XMIINS_SC_SEQCOMB;
	} else {
		return XMIINS_SC_UNKNOWN;
	}
}

char* ExprInfo::type2text(int type)
{
	switch (type) {
		case XMIINS_SC_T:
			return XMIINS_SC_T_STR;
		case XMIINS_SC_E:
			return XMIINS_SC_E_STR;
		case XMIINS_SC_U:
			return XMIINS_SC_U_STR;
		case XMIINS_SC_I:
			return XMIINS_SC_I_STR;
		case XMIINS_SC_RL:
			return XMIINS_SC_RL_STR;
		case XMIINS_SC_U8:
			return XMIINS_SC_U8_STR;
		case XMIINS_SC_DI:
			return XMIINS_SC_DI_STR;
		case XMIINS_SC_BASEX:
			return XMIINS_SC_BASEX_STR;
		case XMIINS_SC_BASE2:
			return XMIINS_SC_BASE2_STR;
		case XMIINS_SC_BASE16:
			return XMIINS_SC_BASE16_STR;
		case XMIINS_SC_BASE22:
			return XMIINS_SC_BASE22_STR;
		case XMIINS_SC_BASE64:
			return XMIINS_SC_BASE64_STR;
		case XMIINS_SC_P:
			return XMIINS_SC_P_STR;
		case XMIINS_SC_OR:
			return XMIINS_SC_OR_STR;
		case XMIINS_SC_SEQ:
			return XMIINS_SC_SEQ_STR;
		case XMIINS_SC_REP:
			return XMIINS_SC_REP_STR;
		case XMIINS_SC_SEQCOMB:
			return XMIINS_SC_SEQCOMB_STR;
		case XMIINS_SC_C:
			return "constant";
		case XMIINS_SC_MIXED:
			return "mixed";
		default:
			return "unknown";
	}
}

ExprInfo** ExprInfo::createSubs()
{
	numsubs = MAXSUBS;
	subs = new ExprInfo*[numsubs];
	for (int i=0; i<numsubs; i++) {
		subs[i] = new ExprInfo();
	}
	return subs;
}

ExprInfo** ExprInfo::getSubs(int offset)
{
	return &subs[offset];
}

/* some static functions */
static char* skipTo (char *str, char skip)
{
	char c;

	while (str && (c=str[0]) != skip) {
		if (skip == '"' && c == '\\') {
			str++;
		}
		str++;
	}
	if (str) {
		return str+1;
	} else {
		return NULL;
	}
}

static char* findtoken(char *str, char *tokenstr, char *tokensep, char **nextstr, bool setnull = true)
{
	char c, *origstr;
	int i, len = strlen(tokenstr);
	bool found = true;
	*tokensep = '\0';

	/* skip leading white spaces */
	while (found && str && (c=str[0])) {
		if (isspace(c)) {
			str++;
		} else {
			found = false;
		}
	}
	origstr = str;

	/* find end delimiter */
	while (str && (c=str[0])) {
		if (c == '"') {
#if 0
			/* string -> skip it */
			str = skipTo(str+1, c);
			if (setnull && str) {
				str[-1] = '\0';
			}
#endif
			*tokensep = c;
			goto cleanup;
		}
		for (i=0; i<len; i++) {
			if (c == tokenstr[i]) {
				/* token delimiter found, now find start of next token */
				if (setnull) {
					str[0] = '\0';
				}
				*tokensep = c;
				do {
					str++;
					if (!(c=str[0])) {
						goto cleanup;
					}
					for (i=0; i<len; i++) {
						if (c == '(' || c == ')') {
							/* parentheses have preference over other token separators */
							*tokensep = c;
						}
						if (c == tokenstr[i]) {
							continue;
						}
					}
					goto cleanup;
				} while (1);
			}
		}
		str++;
	}

cleanup:
	*nextstr = str;
	return origstr;
}

int ExprInfo::getUsedSubs()
{
	return usedsubs;
}

/* get the used # subs in a subcompressor */
int ExprInfo::getUsedSubs(int subnum)
{
	if (subnum < getUsedSubs()) {
		return subs[subnum]->getUsedSubs();
	} else {
		return -1;
	}
}

void ExprInfo::setUsedSubs(int num)
{
	usedsubs = num;
}

int ExprInfo::getTranslatedContainers(int contnum, ExprInfo **exprs, int maxexprs)
{
	/* initialize with top container */
	int contfound = numContainers(containertype);
	int subnum = 0, numexprs = 0;
	ExprInfo *expr = this;

	while (contnum > contfound && subnum < usedsubs) {
		/* not found yet */
		expr = subs[subnum++];
		contfound += numContainers(expr->containertype);
		if (expr->containertype == XMIINS_SC_SEQCOMB && contfound+1 == contnum) {
			/* add all subs */
			while (contnum > contfound && subnum < usedsubs && numexprs < maxexprs) {
				/* todo: add only the first one if the current is a combining container */
				expr = subs[subnum++];
				if (numContainers(expr->containertype) > 0) {
					exprs[numexprs++] = expr;
				}
			}
		}
	}

	if (numexprs == 0) {
		if (expr->containertype == XMIINS_SC_SEQCOMB) {
			/* add all subs */
			while (contnum > contfound && subnum < usedsubs && numexprs < maxexprs) {
				/* todo: add only the first one if the current is a combining container */
				expr = subs[subnum++];
				if (numContainers(expr->containertype) > 0) {
					exprs[numexprs++] = expr;
				}
			}
		} else {
			/* add this one */
			exprs[numexprs++] = expr;
		}
	}

	/* container(s) found, now return the number */
	return numexprs;
}

#define MAXEXPRS	1024

int ExprInfo::getTranslatedContainerType(int contnum)
{
	int maxexprs = MAXEXPRS;
	ExprInfo *exprs[MAXEXPRS];
	int numexprs = getTranslatedContainers(contnum, exprs, maxexprs);
	int type;

	if (numexprs == 1) {
		type = exprs[0]->containertype;
	} else {
		type = XMIINS_SC_MIXED;
	}

	/* container found, now return the type */
	return type;
}

ExprInfo* ExprInfo::getTranslatedExpr(int contnum, int *subnump)
{
	/* initialize with top container */
	ExprInfo *expr = this;
	int contfound = numContainers(containertype);
	int subnum = 0, offset = 0, seqnum = 0, curnum = 0, maxnum = 0;
   bool isseq = false;

	while (contnum >= contfound && subnum < usedsubs) {
		/* not found yet */
		expr = subs[subnum++];
      if (expr->containertype == XMIINS_SC_SEQCOMB) {
         offset = contfound;
         isseq = true;
         maxnum = 0;
         seqnum = expr->usedsubs;
      }
      curnum = numContainers(expr->containertype);
      if (seqnum == 0) {
         /* all seqcomb subs done, leave isseq state */
         isseq = false;
      }
      if (isseq) {
         /* seqcomb handling: add max. # containers that we encountered as of yet */
         maxnum = max(maxnum, curnum);
   		contfound = offset + maxnum;
         seqnum--;
      } else {
         /* normal handling, just increase container count */
   		contfound += curnum;
      }
	}

   if (subnump) {
      *subnump = subnum;
   }
   return expr;
}

/* retrieve normalized sub count for the 'or' compressor */
int ExprInfo::getTranslatedUsedSubs(int contnum)
{
	int subnum = 0, totalnumsubs = 0;
	ExprInfo *expr = getTranslatedExpr(contnum, &subnum);
	int numsubs = expr->getUsedSubs();

	/* container found, now subtract all used subs for second and deeper nested combining compressors */
	totalnumsubs = numsubs;
	while (subnum < totalnumsubs) {
		/* not found yet */
		expr = subs[subnum++];
		numsubs -= expr->getUsedSubs();
	}

	/* container found, now return the # of used subs */
	return numsubs;
}

/* report the # of blocks that refer to the same pathexpr */
int XMIInspect::numReferringBlocks(int pathexprnum)
{
	int numref = 0;

	for (int j=0; j<numblocks; j++) {
		if (pathindex[j] == pathexprnum) {
			numref++;
		}
	}

	return numref;
}

/* report the # of blocks before the current that refer to the same pathexpr */
int XMIInspect::numPrecedingBlocks(int blocknum, int pathexprnum)
{
	int numref = 0;

	for (int j=0; j<blocknum && j<numblocks; j++) {
		if (pathindex[j] == pathexprnum) {
			numref++;
		}
	}

	return numref;
}

void ExprInfo::calcEnumIndexes(XMIInspect *inspect, int pathexprnum, int &cureindex)
{
	ExprInfo *expr = this;
	int same = inspect->numReferringBlocks(pathexprnum);
	int curused = 0, subnum = 0;

	if (containertype == XMIINS_SC_E) {
		/* top container is e */
		eindex = cureindex+(curused++);
	}

	/* visit all subs */
	while (subnum < usedsubs) {
		expr = subs[subnum++];
		if (expr->containertype == XMIINS_SC_E) {
			/* found an 'e' compressor, claim a global block */
			expr->eindex = cureindex+(curused++);
		}
	}

	/* claim this amount of global blocks for each block that references this path expression */
	eindextotal = curused;
	curused *= same;

	/* make the global block claim final */
	cureindex += curused;
}

int XMIInspect::matchEnumIndexes()
{
	int stat = 0;
	ExprInfo *expr = NULL;
	int cureindex = 0;

	for (int i=1; i<numexprs; i++) {
		expr = exprinfo[i];
		expr->calcEnumIndexes(this, i, cureindex); 
	}

	if (cureindex != numglobals) {
		if (level >= XMIINS_REPORT_ERRORS) {
			printf("global data could not be matched with the path expressions and containers. %ld != %ld!\n", 
					cureindex, numglobals);
		}
		stat = XMIINS_ERR_INTERNAL;
		goto cleanup;
	}

cleanup:
	return stat;
}

/* get enum index for the 'e' compressor 
   eindexes should be pre-calulated with CalcEnumIndexes 
 */
int ExprInfo::getEnumIndex(int blocknum /* not used */, int contnum, int numprec)
{
	ExprInfo *expr = getTranslatedExpr(contnum, NULL);
	return expr->eindex + numprec * eindextotal;
}

static char *unescape(char *s, int len)
{
	char *t = new char[len+1], c;
	int i = 0, j = 0;

	while (i<len) {
		if ((c = s[i++]) == '\\') {
			/* unescape */
			switch ((c=s[i++])) {
				case 't':
					c = '\t';
					break;
				case 'n':
					c = '\n';
					break;
				case 'r':
					c = '\r';
					break;
				default:
					/* leave char as-is */
					break;
			}
		}
		t[j++] = c;
	}
	t[j] = '\0';

	return t;
}

void ExprInfo::setCorP(char *s, int len, bool isconstant)
{
	if (len == 0) {
		len = strlen(s);
	}
	trydela(constant_or_param);
	if (isconstant) {
		constant_or_param = unescape(s, len);
	} else {
		/* copy as-is */
		constant_or_param = new char[len+1];
		strncpy(constant_or_param, s, len);
		constant_or_param[len] = '\0';
	}
}

char* ExprInfo::getCorP()
{
	return constant_or_param;
}

/* the path expression parsing */

/* build container info for a path expression */
int XMIInspect::buildExprInfo(ExprInfo *info, char *expr, ExprInfo **infos, int *numsubsused, int *charsused, int *pretnumcont, bool overlayed)
{
	int stat = 0;
	char *token = NULL, tokensep, *nextstr = NULL;
	int type = XMIINS_SC_UNKNOWN, numcont = 0, i = 0;
	bool iscomb = false, alreadyset = false, isrecurse = (charsused != NULL);
	char *str = expr;
	int usedchars = 0, retnumcont = 0, prevretnumcont = 0, prevnumcont = 0, curnum = 0;
	ExprInfo *curinfo = NULL;
	int curused = 0;
	char *constant_or_param = NULL;
	int c_or_p_len = 0;
	bool isconstant = false;

	if (strlen(str)==0) {
		/* no path expr; default to the text container */
		type = XMIINS_SC_T;
		info->setNumContainers(ExprInfo::numContainers(type));
		info->setContainerType (type);
	} else {
		/* parse a new token */
		while ((token = findtoken(str, "() \t\n\r\"", &tokensep, &nextstr)) && token[0]) {
			constant_or_param = NULL;
			if (tokensep == '"') {
				/* we've just parsed a constant compressor */
				type = XMIINS_SC_C;
				constant_or_param = nextstr+1;
  				nextstr = skipTo(constant_or_param, tokensep);
				nextstr[-1] = '\0';
				isconstant = true;
			} else if ((type = ExprInfo::token2type(token)) == XMIINS_SC_UNKNOWN) {
				if (level >= XMIINS_REPORT_ERRORS) {
					printf("Semantic compressor unknown: %s\n", token);
				}
				stat = XMIINS_ERR_SEMCOMPUNKNOWN;
				goto cleanup;
			}
			if (alreadyset) {
				if (level >= XMIINS_REPORT_ERRORS) {
					printf("Too many compressors found in %s at %s\n", expr, str);
				}
				stat = XMIINS_ERR_PATHEXPR_PARSE;
				goto cleanup;
			}
			/* get info about this compressor */
			iscomb = ExprInfo::isCombining(type);
			if (overlayed) {
				/* seqcomb handling */
            numcont -= prevnumcont;
				curnum = max(prevnumcont, ExprInfo::numContainers(type));
				numcont += curnum;
				prevnumcont = curnum;
			} else {
            numcont += ExprInfo::numContainers(type);
			}
			if (iscomb) {
				/* combining compressor, we must recurse now */
				if (tokensep != '(') {
					/* can't recurse; no opening parenthesis found! */
					if (level >= XMIINS_REPORT_ERRORS) {
						printf("No opening parenthesis found in %s at %s\n", expr, str);
					}
					stat = XMIINS_ERR_PATHEXPR_PARSE;
					goto cleanup;
				}
				/* set type */
				if (!isrecurse) {
					info->setContainerType (type);
				} else {
					/* allocate space for this combining compressor in the subs list & store the type */
					curused = *numsubsused;
					(*numsubsused)++;
					curinfo = infos[curused++];
					curinfo->setContainerType (type);
				}
				/* recurse */
				if ((stat = buildExprInfo(infos[*numsubsused], nextstr, infos, numsubsused, &usedchars, &retnumcont, type == XMIINS_SC_SEQCOMB)) != 0) {
					goto cleanup;
				}
				/* set # subs used in the subs of this combining compressor */
				if (isrecurse) {
					curinfo->setUsedSubs(*numsubsused - curused);
				}
				/* calculate # containers */
				if (type == XMIINS_SC_SEQCOMB) {
					retnumcont = max(prevretnumcont, retnumcont);
				}
				numcont += retnumcont;
				prevretnumcont = retnumcont;
				info->setNumContainers(numcont);
				/* advance pointer; skip the chars that the recursion processed */
				nextstr = &nextstr[usedchars];
			} else {
				/* basic compressor */
				info->setNumContainers(numcont, overlayed);
				info->setContainerType (type);
				/* save arguments of the basic compressor */
				if (tokensep == '(') {
					constant_or_param = nextstr;
					nextstr = skipTo(nextstr, ')');
					c_or_p_len = nextstr - constant_or_param - 1;
				}
				if (constant_or_param) {
					info->setCorP(constant_or_param, c_or_p_len, isconstant);
					isconstant = false;
				}
				if (!isrecurse) {
					/* we're not recursing, so we should not hit another compressor token */
					alreadyset = true;
				} else /*if (!overlayed)*/ {
					/* advance to next subcompressor ExprInfo */
					info = infos[++(*numsubsused)];
				}
			}
			if (tokensep == '\0' || tokensep == ')') {
				/* end of (sub)string reached */
				goto cleanup;
			}
			/* advance to next token */
			str = nextstr;
		}
	}

cleanup:
	/* report progress data back to recursion caller */
	if (charsused) {
		*charsused = nextstr - expr;
	}
	if (pretnumcont) {
		*pretnumcont = numcont;
	}

	return stat;
}

/* build container info for the path expressions */
int XMIInspect::buildExprInfo(char **exprs, int numexprs)
{
	int stat = 0, numsubs = 0, type = 0, numcontainers = 0, num = 0, j;
	ExprInfo **subs = NULL, *expr = NULL, *subexprs[2], *subexpr = NULL;
	char *origstr = NULL;

	/* exprinfo[0] is left empty; it's a dummy for the special container block 0 */
	exprinfo = new ExprInfo*[numexprs+1];
	exprinfo[0] = new ExprInfo();
	for (int i=0; i<numexprs; i++) {
		/* create mem */
		expr = exprinfo[i+1] = new ExprInfo();
		subs = expr->createSubs();
		origstr = new char[strlen(exprs[i])+1];
		strcpy (origstr, exprs[i]);
		numsubs = 0;
		/* parse expression */
		if ((stat = buildExprInfo(expr, origstr, subs, &numsubs)) != 0) {
			goto cleanup;
		}
		/* set the right number of subs */
		expr->setUsedSubs(numsubs);
		/* drop temp string */
		trydela (origstr);
		/* check parsing result */
		if (expr->getNumContainers() < 0) {
			if (level >= XMIINS_REPORT_ERRORS) {
				printf("path expression %ld/%ld: '%s' resulted in %ld containers. Can't be good! (internal error)\n", 
					i+1, numexprs, exprs[i], expr->getNumContainers());
			}
			stat = XMIINS_ERR_PATHEXPR_PARSE;
			goto cleanup;
		}
		/* print some info */
		if (level >= XMIINS_REPORT_MOREINFO) {
			numcontainers = expr->getNumContainers();
			printf("path expression %ld/%ld: '%s' resulted in %ld containers and %ld subs.\n top container type: %s", 
				i+1, numexprs, exprs[i], numcontainers, numsubs, ExprInfo::type2text(expr->getContainerType()));
			/* print top arguments or parameters */
			if (expr->getCorP()) {
				printf (" \"%s\"", expr->getCorP());
			}
			printf ("\n");
			/* print per sub */
			for (j=0; j < numsubs; j++) {
				subexpr = expr->getSubContainer(j);
				type = subexpr->getContainerType();
				printf(" subcompressor %ld/%ld type: %s", j+1, numsubs, ExprInfo::type2text(type));
				if (ExprInfo::isCombining(type)) {
					printf (", # subs: %ld", expr->getUsedSubs(j));
				}
				if (subexpr && subexpr->getCorP()) {
					printf (" \"%s\"", subexpr->getCorP());
				}
				printf("\n");
			}
			/* print per container (translation from sub to container is done inside ExprInfo) */
			for (j=1; j<numcontainers+1; j++) {
				type = expr->getTranslatedContainerType(j);
				printf(" container %ld/%ld type: %s", j, numcontainers, ExprInfo::type2text(type));
				num = expr->getTranslatedContainers(j, subexprs, 2);
				if (num == 1 && subexprs[0] && subexprs[0]->getCorP()) {
					printf (" \"%s\"", subexprs[0]->getCorP());
				}
				printf("\n");
			}
		}
	}

cleanup:
	/* drop temp string */
	trydela (origstr);

	return stat;
}

