/*
This product contains certain software code or other information
("AT&T Software") proprietary to AT&T Corp. ("AT&T").  The AT&T
Software is provided to you "AS IS".  YOU ASSUME TOTAL RESPONSIBILITY
AND RISK FOR USE OF THE AT&T SOFTWARE.  AT&T DOES NOT MAKE, AND
EXPRESSLY DISCLAIMS, ANY EXPRESS OR IMPLIED WARRANTIES OF ANY KIND
WHATSOEVER, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, WARRANTIES OF
TITLE OR NON-INFRINGEMENT OF ANY INTELLECTUAL PROPERTY RIGHTS, ANY
WARRANTIES ARISING BY USAGE OF TRADE, COURSE OF DEALING OR COURSE OF
PERFORMANCE, OR ANY WARRANTY THAT THE AT&T SOFTWARE IS "ERROR FREE" OR
WILL MEET YOUR REQUIREMENTS.

Unless you accept a license to use the AT&T Software, you shall not
reverse compile, disassemble or otherwise reverse engineer this
product to ascertain the source code for any AT&T Software.

(c) AT&T Corp. All rights reserved.  AT&T is a registered trademark of AT&T Corp.

***********************************************************************

History:

      24/11/99  - initial release by Hartmut Liefke, liefke@seas.upenn.edu
                                     Dan Suciu,      suciu@research.att.com
*/

//**************************************************************************
//**************************************************************************

// This module implements the management of container path expressions

#include "stdafx.h"

#ifdef FULL_PATHEXPR
#include "VRegExpr.hpp"
#endif

/* class VPath Expr implementation */
void *VPathExpr::operator new(size_t size, MemStreamer *mem)  
{  
	return mem->GetByteBlock(size); 
}
void VPathExpr::operator delete(void *ptr)  {}
#ifdef SPECIAL_DELETE
void VPathExpr::operator delete(void *ptr,MemStreamer *mem)  {}
#endif

VPathExpr::VPathExpr(Session *s) 
{
	session = s;
}

unsigned long VPathExpr::GetIdx()  {  return idx; }
VPathExpr *VPathExpr::GetNext() {  return next;   }

void VPathExpr::PrintRegExpr()
   // Outputs the path expression string
{
   fwrite(regexprstr,regexprendptr-regexprstr,1,stdout);
}

void VPathExpr::PathParseError(char *errmsg,char *errptr)
   // Prints an error message, if the parsing of some path expression failed.
{
   XMillException *e = new XMillException(XMILL_ERR_PARSE, "Error while parsing path expression:\n\n   ");
   e->ErrorCont(regexprstr,regexprendptr-regexprstr);
   e->ErrorCont("\n");
   for(int i=0;i<errptr-regexprstr+3;i++)
      e->ErrorCont(" ",1);
   e->ErrorCont("^\n");
   e->ErrorCont(errmsg);
   throw e;
}

inline void VPathExpr::HandlePathExprOption(char * &str,char *endptr)
   // Parses one single option
   // Options are separated by ':'
{
   // The user can specify options of the form l(i|g|t) or r(i|g|t) to
   // influence the handling of left/right white spaces directly for
   // a given path expression
   if(str+2<=endptr)
   {
      switch(*str)
      {
      case 'l':switch(str[1]) // Left white space option?
               {
               case 'i':      leftwhitespacescompress=WHITESPACE_IGNORE;str+=2;return;
               case 'g':      leftwhitespacescompress=WHITESPACE_STOREGLOBAL;str+=2;return;
               case 't':      leftwhitespacescompress=WHITESPACE_STORETEXT;str+=2;return;
               }
               break;
      case 'r':switch(str[1]) // Right white space option?
               {
               case 'i':      rightwhitespacescompress=WHITESPACE_IGNORE;str+=2;return;
               case 'g':      rightwhitespacescompress=WHITESPACE_STOREGLOBAL;str+=2;return;
               case 't':      rightwhitespacescompress=WHITESPACE_STORETEXT;str+=2;return;
               }
               break;
/*
      case 'w':switch(str[1])
               {
               case 'i':      fullwhitespacescompress=WHITESPACE_IGNORE;str+=2;return;
               case 'g':      fullwhitespacescompress=WHITESPACE_STOREGLOBAL;str+=2;return;
               case 't':      fullwhitespacescompress=WHITESPACE_STORETEXT;str+=2;return;
               }
               break;
*/
      }
   }

   // Otherwise, it must be a compressor:

	CreateCompressorInstance(str,endptr);
}

/* class CompVPathExpr implementation */
CompVPathExpr::CompVPathExpr(Session *s): VPathExpr(s) 
{
#ifdef USE_FORWARD_DATAGUIDE
   forwardfsm=NULL;
#endif
   reversefsm=NULL;
   next=NULL;
	usercompressor = NULL;
	compressorisdefault = false;
}

void CompVPathExpr::DeInit()
{
	if (compressorisdefault) {
		trydel (usercompressor);
	}
	reversefsm = NULL;
	session = NULL;
}

unsigned long CompVPathExpr::GetUserContNum()   
{  
	return usercompressor->GetUserContNum(); 
}
unsigned long CompVPathExpr::GetUserDataSize()  
{  
	return usercompressor->GetUserDataSize();  
}

void CompVPathExpr::InitCompress(CompressContainer *cont,char *dataptr)
{
   usercompressor->InitCompress(cont,dataptr);
}

void CompVPathExpr::FinishCompress(CompressContainer *cont,char *dataptr)
{
   usercompressor->FinishCompress(cont,dataptr);
}

UserCompressor *CompVPathExpr::GetUserCompressor()
{
   return usercompressor;
}

void CompVPathExpr::CreateCompressorInstance(char *&str,char *endptr)
{
   usercompressor=session->compressman->CreateCompressorInstance(str,endptr);
	compressorisdefault = false;
}

/* class DecompVPathExpr implementation */ 
DecompVPathExpr::DecompVPathExpr(Session *s): VPathExpr(s) {}

unsigned long DecompVPathExpr::GetUserContNum()
{  
	return useruncompressor->GetUserContNum(); 
}
unsigned long DecompVPathExpr::GetUserDataSize()  
{  
	return useruncompressor->GetUserDataSize();  
}

UserUncompressor *DecompVPathExpr::GetUserUncompressor()
{
   return useruncompressor;
}

void DecompVPathExpr::CreateCompressorInstance(char *&str,char *endptr)
{
   useruncompressor=session->decompressman->CreateUncompressorInstance(str,endptr);
}

inline void VPathExpr::ParseUserCompressorString(char * &str,char *endptr)
   // Parses the user compressor string
   // It parses the options. Note that the actual user compressor *must*
   // come at the end
{
   // We continue parsing and look for ':'
   while(str<endptr)
   {
      // We exit if we find a white-space
      if((*str==0)||(*str==' ')||(*str=='\t')||(*str=='\r')||(*str=='\n'))
      {
         regexprendptr=str;
         return;
      }

      // Let's handle the option or user compressor
      HandlePathExprOption(str,endptr);

      if(str==endptr)
      {
         regexprendptr=str;
         return;
      }

      // We exit, if we find a white-space
      if((*str==0)||(*str==' ')||(*str=='\t')||(*str=='\r')||(*str=='\n'))
      {
         regexprendptr=str;
         return;
      }

      if(*str!=':')
      {
			XMillException *e = new XMillException(XMILL_ERR_PARSE, "Character ':' expected at '...'");
         if(endptr-str>5)
         {
            e->ErrorCont(str,5);
            e->ErrorCont("...'");
			} else {
            e->ErrorCont(str,endptr-str);
			}
         throw e;
      }
      str++;

   }
}

inline void CompVPathExpr::CreateXPathEdge(char *from,char *to,FSM *fsm,FSMState *fromstate,FSMState *tostate,char ignore_pounds)
   // Reads the atomic symbol betwen 'from' and 'to' and generates
   // the corresponding edge between 'fromstate' and 'tostate' in 'fsm.
   // If ignore_pound is 1, then pound symbols are simply treated as '*' symbols.
{
   if(from==to)   // Empty string?
      PathParseError("Unexpected character",from);

   switch(*from)
   {
   case '@':   // Do we have '@#' or '@name' ?
               // ==> Create a corresponding
      if((from+2==to)&&(from[1]=='#'))
         fsm->CreateLabelEdge(fromstate,tostate,session->attribpoundlabelid);
      else
			fsm->CreateLabelEdge(fromstate,tostate,session->globalclabeldict->GetLabelOrAttrib(from+1,to-from-1,1));
      return;

   case '#':   // Do we have '#' or '##'
      if((from+2==to)&&(from[1]=='#'))
         // Do we have a double-pound '##' ?
      {
         FSMState *middlestate=fsm->CreateState();
         fsm->CreateEmptyEdge(fromstate,middlestate);
         fsm->CreateEmptyEdge(middlestate,tostate);
         if(ignore_pounds)
            fsm->CreateNegEdge(middlestate,middlestate);
         else
         {
            fsm->CreateLabelEdge(middlestate,middlestate,session->elementpoundlabelid);
            fsm->CreateLabelEdge(middlestate,middlestate,session->attribpoundlabelid);
         }
      }
      else  // we have '#'
      {
         if(from+1!=to)
            PathParseError("Symbol '/' or '|' expected after '#'",from+1);

         if(ignore_pounds)
            fsm->CreateNegEdge(fromstate,tostate);
         else
         {
            fsm->CreateLabelEdge(fromstate,tostate,session->elementpoundlabelid);
            fsm->CreateLabelEdge(fromstate,tostate,session->attribpoundlabelid);
         }
      }
      return;

   case '*':   // We have '*'
      if(from+1!=to)
         PathParseError("Symbol '/' or '|' expected after '*'",from+1);
  
      fsm->CreateNegEdge(fromstate,tostate);
      return;

   default:
         fsm->CreateLabelEdge(fromstate,tostate,session->globalclabeldict->GetLabelOrAttrib(from,to-from,0));
   }
}

inline void CompVPathExpr::ParseXPathItem(char * &startptr,char *endptr,FSM *fsm,FSMState *fromstate,FSMState *tostate,char ignore_pounds)
   // Reads the path expression betwen 'from' and 'to' and generates
   // the corresponding edges and states between 'fromstate' and 'tostate' in 'fsm.
   // If ignore_pound is 1, then pound symbols are simply treated as '*' symbols.
{
   FSMState *curfromstate=fromstate;
   FSMState *curtostate=fsm->CreateState();
   char     *to;

   do
   {
      if(*startptr=='(')
         // We try to consume a label or an expression enclosed in  '(...)'
      {
         startptr++;
         ParseXPathItem(startptr,endptr,fsm,curfromstate,curtostate,ignore_pounds);

         // Afterwards, there must be a symbol ')'
         if((startptr==endptr)||(*startptr!=')'))
            PathParseError("Missing closed parenthesis ')'",startptr);
         startptr++;
      }
      else
      {
         // First, we find the end of the label
         to=startptr;
         while((to<endptr)&&(*to!='/')&&(*to!='=')&&(*to!='|')&&(*to!=')'))
            to++;

         // We create the actual edge
         CreateXPathEdge(startptr,to,fsm,curfromstate,curtostate,ignore_pounds);

         startptr=to;
      }

      if(startptr==endptr) // The path expression is finished after the label
         break;

      // We look at the character coming after the label
      switch(*startptr)
      {
      case '/':   // We have a separator
         startptr++;

         if((startptr<endptr)&&(*startptr=='/'))   // Do we have another '/' following ?
                                                   // i.e. we have '//'
         {
            // Let's create a middle state with a self-loop
            // and an empty edge from 'curfromstate' to 'middlestate'
            // and an empty edge from 'middlestate' to 'curtostate'
            FSMState *middlestate=fsm->CreateState();
            fsm->CreateEmptyEdge(curtostate,middlestate);
            fsm->CreateNegEdge(middlestate,middlestate,NULL);

            curtostate=fsm->CreateState();
            fsm->CreateEmptyEdge(middlestate,curtostate);
            startptr++;
         }
         if((startptr==endptr)||(*startptr=='=')||(*startptr==')'))
            // Is '/' the last character in the expression ? ==> We are done
         {
            fsm->CreateEmptyEdge(curtostate,tostate);
            return;
         }

         curfromstate=curtostate;
         curtostate=fsm->CreateState();
         break;

      case '|':   
         startptr++;
         break;

      case ')':
      case '=':
         fsm->CreateEmptyEdge(curtostate,tostate);
         return;

      default:
         PathParseError("Invalid symbol",startptr);
      }
   }
   while(1);

   fsm->CreateEmptyEdge(curtostate,tostate);
}

inline FSM *CompVPathExpr::ParseXPath(char * &str,char *endptr,char ignore_pounds)
   // Generates the actual FSM for a given string
   // If ignore_pound is 1, then pound symbols are simply treated as '*' symbols.
{
   FSM *fsm=new(session->fsmmem) FSM(session);
   FSMState *startstate=fsm->CreateState();

   fsm->SetStartState(startstate);

   str++;   // We skip the starting '/'

   if(str==endptr)   // We have the single XPath expression '/'
   {
      startstate->SetFinal();
      return fsm;
   }

   if(str[0]=='/')   // Do we have '//...'
   {
      // We loop in start state
      fsm->CreateNegEdge(startstate,startstate,NULL);

      if(str+1==endptr)
         // We have path expression '//'
      {
         str++;
         startstate->SetFinal();
         return fsm;
      }
      FSMState *middlestate=fsm->CreateState();
      fsm->CreateEmptyEdge(startstate,middlestate);
      startstate=middlestate;
      str++;
   }

   // Let's create the final state
   FSMState *finalstate=fsm->CreateState();
   finalstate->SetFinal();

   // We can now parse the path expression and create states/edges between
   // 'startstate' and 'finalstate'
   // If there '=', then we simply have '//=>...' or '/=>...'
   if(*str!='=')
      ParseXPathItem(str,endptr,fsm,startstate,finalstate,ignore_pounds);
   else
      fsm->CreateEmptyEdge(startstate,finalstate);

   // If we are not at the end, then we must have '=>' at the end
   if(str<endptr)
   {
      if((str+1==endptr)||(*str!='=')||(str[1]!='>'))
         PathParseError("Unexpected character",str);

      str+=2;  // We move the 'str'-pointer to the string coming
               // after '=>'
   }
   return fsm;
}

void CompVPathExpr::CreateFromString(char * &str,char *endptr)
   // This function initializes the object with the path expression
   // found between 'str' and 'endptr.
   // It creates the forward and backward FSM and parses the
   // user compressor string
{
#ifdef FULL_PATHEXPR
   VRegExpr       *regexpr;
#endif
   FSM            *tmpforwardfsm;
   char           *savestr=str;

   regexprstr=str;
   regexprendptr=endptr;   // The end ptr will be set later

   // We start a new block of temporary data
   session->tmpmem->StartNewMemBlock();

   // The forward FSM is only generated in temporary memory
   session->fsmmem=session->tmpmem;
   session->fsmtmpmem=session->tmpmem;

   // For now, it is required that paths start with '/'
   if(*str=='/')
      tmpforwardfsm=ParseXPath(str,endptr,0);
   else
   {
#ifdef FULL_PATHEXPR
      // Let's firstly take care of the main expression

      vregexprmem=&tmpmem;
      regexpr=VRegExpr::ParseVRegExpr(str,endptr);

      // Let's convert the regular expression into an automaton
      // Let's create an FSM
      tmpforwardfsm=regexpr->CreateNonDetFSM();
#else
      PathParseError("Character '/' expected",str);
#endif
   }

   // Let's make the FSM deterministic
   tmpforwardfsm=tmpforwardfsm->MakeDeterministic();

   // Let's minimize
   tmpforwardfsm=tmpforwardfsm->Minimize();

   // We compute which states are accepting
   //tmpforwardfsm->FindAcceptingStates();

   // We store the following automata in the temporary memory
   session->fsmmem=session->tmpmem;

   // Now we reverse the FSM
   reversefsm=tmpforwardfsm->CreateReverseFSM();

   reversefsm=reversefsm->MakeDeterministic();

   // We store the following automaton in the main memory
   session->fsmmem=session->mainmem;
   session->mainmem->WordAlign();

   // Only now we create the FSM in main memory
   reversefsm=reversefsm->Minimize();

   // We compute which states are accepting
   reversefsm->FindAcceptingStates();

   // For each state, we also determine whether there
   // are pounds coming afterwards
   reversefsm->ComputeStatesHasPoundsAhead();

#ifdef USE_FORWARD_DATAGUIDE
   if(*savestr=='/')
      forwardfsm=ParseXPath(savestr,endptr,1);
   else
   {
      throw new XMillException (XMILL_ERR_FATAL, "Fatal Error in VPathExpr::CreateFromString\n");
   }

   // Let's make the FSM deterministic
   
   forwardfsm=forwardfsm->MakeDeterministic();

   fsmmem=session->mainmem;
   session->mainmem->WordAlign();

   // Let's minimize
   forwardfsm=forwardfsm->Minimize();

#endif

   // We remove all the temporary data
   session->tmpmem->RemoveLastMemBlock();

//*************************************************************************

   // We use the global setting as default for the white space handling
   // for the specific path expression. This might be overwritten by
   // function 'ParseUserCompressorString' below, which parses the user compressor
   // string
   leftwhitespacescompress=WHITESPACE_DEFAULT;
   rightwhitespacescompress=WHITESPACE_DEFAULT;

   // As the default compressor, we set the plain text compressor
	char *textstring="t";
   usercompressor=session->compressman->CreateCompressorInstance(textstring,textstring+1);
	compressorisdefault = true;

   regexprusercompressptr=str;

   // Let's parse the compressor string now
   ParseUserCompressorString(str,endptr);

   regexprendptr=str;
}

void VPathExpr::InitWhitespaceHandling()
   // If the default white space handling for the path expression
   // is the global setting, then we replace that reference
   // by the global default value
{
   if(leftwhitespacescompress==WHITESPACE_DEFAULT)
      leftwhitespacescompress=session->settings->globalleftwhitespacescompress;

   if(rightwhitespacescompress==WHITESPACE_DEFAULT)
      rightwhitespacescompress=session->settings->globalrightwhitespacescompress;
}

VPathExprMan::VPathExprMan(Session *s)
{
	session = s;
   pathexprnum=0;
   pathexprs=lastpathexpr=NULL;
}

VPathExprMan::~VPathExprMan()
{
}

VPathExpr *VPathExprMan::GetPathExpr(unsigned long idx)
   // Returns the path expression with index 'idx'
{
   VPathExpr *curpathexpr=pathexprs;
   while(idx--)
      curpathexpr=curpathexpr->next;
   return curpathexpr;
}

VPathExpr *VPathExprMan::GetVPathExprs() {  return pathexprs; }

void VPathExprMan::InitWhitespaceHandling()
   // If the default white space handling for some path expressions
   // is the global setting, then we replace that reference
   // by the global default value
{
   VPathExpr *pathexpr=pathexprs;
   while(pathexpr!=NULL)
   {
      pathexpr->InitWhitespaceHandling();
      pathexpr=pathexpr->next;
   }
}

//*******************************************************************
//*******************************************************************
//*******************************************************************
//*******************************************************************

CompVPathExprMan::~CompVPathExprMan()
{
	DropAllVPathExprs();
}

void CompVPathExprMan::AddNewVPathExpr(char * &str,char *endptr)
   // Adds a new path expression to the set of paths
{
   // Create the path expression
   CompVPathExpr *item=new(session->mainmem) CompVPathExpr(session);

   item->idx=pathexprnum+1;
   pathexprnum++;

   // Parse the path expression string
   item->CreateFromString(str,endptr);

   // Add the path expression to the list
   if(pathexprs==NULL)
      pathexprs=lastpathexpr=item;
   else
   {
      lastpathexpr->next=item;
      lastpathexpr=item;
   }
}

void CompVPathExprMan::DropAllVPathExprs()
{
	VPathExpr *item = pathexprs;
	VPathExpr *nextitem = NULL;

	while (item) {
		nextitem = item->next;
		item->DeInit();
		/* delete has no real function here; delete operator has been overloaded :-( */
		trydel (item);
		item = nextitem;
	}
	pathexprs = NULL;
	pathexprnum = 0;
}

//***************************************************************************************
//***************************************************************************************
//***************************************************************************************

inline void CompVPathExpr::Store(MemStreamer *moutput)
   // Stores the path expression in 'output'
{
   // We only store the user compressor expression
   // This can even be the empty string, if there is no user compressor string
   moutput->StoreUInt32(regexprendptr-regexprusercompressptr);
   moutput->StoreData(regexprusercompressptr,regexprendptr-regexprusercompressptr);
}

void DecompVPathExpr::Load(SmallBlockUncompressor *uncompress)
   // Loads the user compressor string from 'uncompress'
   // It parses the user compressor string and creates the corresponding user compressor
{
   char           *ptr;
   unsigned long  len=uncompress->myLoadString(&ptr);

   // We allocate some memory for the user compressor string
   regexprusercompressptr=session->mainmem->GetByteBlock(len);
   session->mainmem->WordAlign();

   memcpy(regexprusercompressptr,ptr,len);

   regexprendptr=regexprusercompressptr+len;
   regexprstr=NULL;

   char *str=regexprusercompressptr;

   // The default user compressor
   char *textstring="t";
   useruncompressor=session->decompressman->CreateUncompressorInstance(textstring,textstring+1);

   // Let's parse the user compressor
   ParseUserCompressorString(str,regexprendptr);
}

//************************************************************************
//************************************************************************

void CompVPathExprMan::Store(MemStreamer *memstream)
   // Stores all path expressions
{
   // Store the number of path expressions
   memstream->StoreUInt32(pathexprnum);

   CompVPathExpr   *curpathexpr=(CompVPathExpr*)pathexprs;

   // We store all paths
   while(curpathexpr!=NULL)
   {
      curpathexpr->Store(memstream);
      curpathexpr=(CompVPathExpr*)curpathexpr->next;
   }
}

void DecompVPathExprMan::Load(SmallBlockUncompressor *uncompress)
   // Load the set of path expressions from 'uncompress'
{
   // Load the number
   pathexprnum=uncompress->LoadUInt32();

   DecompVPathExpr **pathexprref=(DecompVPathExpr**)&pathexprs;

   // Load all path expressions

   for(unsigned long i=0;i<pathexprnum;i++)
   {
      *pathexprref=new(session->mainmem) DecompVPathExpr(session);

      (*pathexprref)->idx=i;

      (*pathexprref)->Load(uncompress);

      pathexprref=(DecompVPathExpr**)&((*pathexprref)->next);
   }
}

FSMState *CompVPathExpr::GetReverseFSMStartState() 
{  
	return reversefsm->GetStartState();
}
#ifdef USE_FORWARD_DATAGUIDE
FSMState *CompVPathExpr::GetForwardFSMStartState() 
{  
	return forwardfsm->GetStartState();
}
#endif

#ifdef NOTHREAD
void *FSMManStateItem::operator new(size_t size)  
{  
	return session->pathtreemem->GetByteBlock(size); 
}
void FSMManStateItem::operator delete(void *ptr)  {}
#endif

PathDictNode *FSMManStateItem::GetPathDictNode()
{
   return pathdictnode;
}
