// XMIInspect main methods

#include "stdafx.h"

/* Warning: my AntiPatterns book tells me this project is contaminated with the Functional Decomposition AntiPattern.

	FD means: programming OO with an imperative style, e.g., placing all data in an object as member variables and
	using them as one would use global variables in a badly progammed C-style program. Which is what I do here; 
	check out all the	vars in the XMillInspect class..

	The ExprInfo stuph (parsing of path expressions) may also be an example of an Input Kludge, meaning that I should
	use Lex & Yacc i.s.o. implementing my own parser, which probably fails on a lot of inputs. Luckily, the path exprs
	as stored in the XMI have already been parsed by the XMill compressor, so probably they are all well-formed.

	If you want to resolve these issues, be my guest! :-) 
*/

int XMIInspect::endThisRun()
{
	int stat = 0;

	totalrunsize += runsize;
	if (level >= XMIINS_REPORT_SOMEINFO) {
		printf ("%ld large containers and %ld large global blocks read, "
					"run # %ld has ended\n %ld header + %ld data = %ld Bytes total\n now checking for next run\n", 
			foundcontainers, 
			numlargeglobals,
			numheaders, 
			headersize, runsize, headersize + runsize);
	}
	/* read the next run (if present) */
	runsize = numlargeglobals = foundcontainers = expectedcontainers = 0;
	state = XMIINS_READ_RUN_HEADER;

cleanup:
	return stat;
}

/* guess the XMI general compressor type, based on the first few Bytes */
int XMIInspect::guessGPC(Input *input, char *&gpc, char &gpctype, unsigned char &idx)
{
	char header[XMILL_HEADER_PEEK_SIZE+1];
   unsigned int ppmdistart = XMILL_HEADER_PPMDI_START;
	int stat = 0;

	/* GP compressor heuristic */
	input->PeekData(header, XMILL_HEADER_PEEK_SIZE);
	if (!strncmp(header, XMILL_HEADER_BZIP, sizeof(XMILL_HEADER_BZIP)-1)) {
		gpctype = XMILL_GPC_BZIP;
		gpc = "bzip2";
	} else if (!strncmp(header, XMILL_HEADER_NOZIP, sizeof(XMILL_HEADER_NOZIP)-1)) {
		gpctype = XMILL_GPC_NONE;
		gpc = "nozip";
	} else if (!strncmp(header, XMILL_HEADER_XML, sizeof(XMILL_HEADER_XML)-1)) {
		stat = XMIINS_ERR_XMLFILE;
		if (level >= XMIINS_REPORT_ERRORS) {
			printf("this file is an XML file!\n");
		}
		goto cleanup;
	} else if (!memcmp(header, &ppmdistart, sizeof(ppmdistart)-1)) {
      /* we only checked the first 3 Bytes because the compression index is 'hidden' in the last Byte. 
         Now extract this info */
      memcpy(&ppmdistart, header, sizeof(ppmdistart));
      if (ppmdistart >> 24 < XMILL_PPMDI_IDXS) {
  		   gpctype = XMILL_GPC_PPMDI;
		   gpc = "ppmdi";
         idx = ppmdistart >> 24;
      }
	} else {
		/* assume GZIP */
		gpctype = XMILL_GPC_GZIP;
		gpc = "gzip";
	}

cleanup:
	return stat;
}

int XMIInspect::endReport(Input *input)
{
	int stat = 0, count = 0, totalcount = 0;

	/* end report */
	if (level >= XMIINS_REPORT_MININFO) {
		printf ("end report:\n %ld Bytes of XMI data processed\n %ld uncompressed buffers total: %ld Bytes\n", 
			totalcompsize, numbuffers, totaluncompsize);
		printf (" # runs: %ld\n"
				  " global header size: %ld Bytes\n"
				  " total run header size: %ld Bytes\n"
				  " total run size: %ld Bytes\n"
  				  " global data size: %ld Bytes\n",
			numheaders, globalheadersize, totalheadersize - globalheadersize, totalrunsize, globaldatasize);
	}
	if (level >= XMIINS_REPORT_INFO) {
		printf(" # XML elements: %ld\n"
				 " # data elements including constants: %ld\n"
				 " # whitespace elements: %ld\n"
				 " # special elements: %ld\n",
				 numopen, numdata, numwhite, numspecial);
		if (level >= XMIINS_REPORT_MOREINFO) {
			printf(" # data elements per data container block:\n");
			/* skip container 0, it's the special container and should not be referenced */
			for (int i=1; i<numblocks; i++) {
				printf("  container block %ld/%ld: %ld structure elements\n", 
					i+1, numblocks, numcontdata[i]);
				for (int k=0; k<numcontainers[i]; k++) {
					/* data counts per container */
					count = numrealcontdata[i+numblocks*k];
					totalcount += count;
					if (level >= XMIINS_REPORT_CONTENTS 
						/* only report if we have multiple containers, else this info is a bit too much... */
						&& numcontainers[i] > 1) {
						printf("   container block %ld/%ld, container %ld/%ld: %ld data elements\n", 
							i, numblocks, k+1, numcontainers[i], count);
					}
				}
			}
			printf("  total # of data elements: %ld\n", totalcount);
		}
	}

	/* error checks */
	if (totalcompsize != xmifilesize) {
		if (level >= XMIINS_REPORT_ERRORS) {
			printf("XMI file is %ld Bytes, but processed %ld (diff: %ld)!\n", 
				xmifilesize, totalcompsize, xmifilesize - totalcompsize);
		}
		stat = XMIINS_ERR_XMISIZEMISMATCH;
		goto cleanup;
	}
	if (expectedcontainers != foundcontainers) {
		if (level >= XMIINS_REPORT_ERRORS) {
			printf("expected %ld containers, but found %ld!\n", expectedcontainers, curcontainer);
		}
		stat = XMIINS_ERR_CONTAINERNOTFOUND;
		goto cleanup;
	}
	if (totalrunsize + globaldatasize != totaldatasize) {
		if (level >= XMIINS_REPORT_ERRORS) {
			printf ("%ld Bytes unaccounted for!\n", totalrunsize + globaldatasize - totaldatasize);
		}
		stat = XMIINS_ERR_TOTALSIZE;
		goto cleanup;
	}
	if (input->curptr != input->endptr) {
		if (level >= XMIINS_REPORT_ERRORS) {
			printf ("%ld Bytes of XMI data not processed!\n", (long)(input->endptr - input->curptr));
		}
		stat = XMIINS_ERR_TOTALSIZE;
		goto cleanup;
	}
	if (numopen != numclose) {
		if (level >= XMIINS_REPORT_ERRORS) {
			printf("# open and # closed XML elements differ! (%ld != %ld)\n", numopen, numclose);
		}
		stat = XMIINS_ERR_OPENCLOSE_MISMATCH;
		goto cleanup;
	}
	if (numcontdata[0] > 0) {
		if (level >= XMIINS_REPORT_ERRORS) {
			printf("The special container is referenced for XML data!\n");
		}
		stat = XMIINS_ERR_SPECIALREF;
		goto cleanup;
	}

cleanup:
	return stat;
}

/* inspect an XMI file 
 * structure:
 *  a sequence of GP compressed streams:
 *   first stream contains global header & first run header
 *   the next few streams contain the large global blocks (if any)
 *   the next few streams contain the large container blocks (if any)
 *   then follows a stream with the second run header
 *   etc.
 *
 * The run header contains some run info, and containers & global data that are smaller than some threshold.
 * A container block can contain multiple blocks. For large blocks, each of those blocks is in its own stream.
 */
int XMIInspect::inspectFile(char *xmifile)
{
	int stat = 0;
	long oldleft, newleft;
	long oldpos, newpos;
	FILE *f = fopen(xmifile, "rb");
	clock_t t1 = clock(), t2;
	Input *input = NULL;
	XMill *xmill = NULL;
	Session *session = NULL;
	CFile *file = NULL;
	Uncompressor *uncompressor = NULL;
   int decompbuf = -2, compbuf = -2;

	if (!f) {
		if (level >= XMIINS_REPORT_ERRORS) {
			printf("XMI file %s could not be opened!\n", xmifile);
		}
		stat = XMIINS_ERR_FILENOTFOUND;
		goto cleanup;
	}

	/* get file size */
	fseek(f, 0, SEEK_END);
	xmifilesize = ftell(f);
	fclose(f);

	if (level >= XMIINS_REPORT_SOMEINFO) {
		printf ("inspecting %s\n", xmifile);
	}

	/* get XMill instance */
	xmill = new XMill();
	session = xmill->session;
	/* open XMI file */
	file = new CFile();
	file->OpenFile(xmifile);
	input = new Input(session);
	input->SetCFile(file);

	/* get & report GPC type */
   if ((stat = guessGPC(input, gpc, session->settings->use_bzip, session->settings->compressidx)) != 0) {
		goto cleanup;
	}
	if (level >= XMIINS_REPORT_MININFO) {
		printf("gp compressor: %s\n", gpc);
	}

	/* create an uncompressor instance and buffer */
	uncompressor = new Uncompressor(session);

	/* decompress XMI compressed streams */
	for (numbuffers = 0; len > 0; numbuffers++) {
		/* set buffer length, get larger buffer if needed */
		newBuffer(buflen);

		/* uncompress & store sizes */
		oldleft = input->endptr - input->curptr;
		oldpos = file->GetFilePos();
		if (uncompressor->Uncompress(input, buffer, &len) && len > 0) {
			if (level >= XMIINS_REPORT_ERRORS) {
				printf("buffer is not large enough (%ld Bytes)\n", buflen);
			}
			stat = XMIINS_ERR_BUFFERTOOSMALL;
			goto cleanup;
		}

      /* write uncompressed buffer to disc */
      if (numbuffers==decompbuf || decompbuf == -1) {
         char ubuf[STRLEN+1];
         sprintf(ubuf, "buf%05ld.%s.dat", numbuffers, xmifile);
         FILE *g = fopen(ubuf, "wb");
         if (g) {
            fwrite(buffer, 1, len, g);
            fclose(g);
         }
      }

		/* calculate compressed size. A bit awkward because of the caching that the Input and CFile classes do.. */
		newleft = input->endptr - input->curptr;
		newpos = file->GetFilePos();
		compsize = oldleft - newleft - oldpos + newpos;
		totalcompsize += compsize;

      /* write compressed buffer to disc */
      if (numbuffers==compbuf || compbuf == -1) {
         char ubuf[STRLEN+1];
         sprintf(ubuf, "buf%05ld.%s.cdat", numbuffers, xmifile);
         FILE *f = fopen(xmifile, "rb");
         fseek(f, totalcompsize - compsize, SEEK_SET);
         FILE *g = fopen(ubuf, "wb");
         char *buf = (char*)malloc(compsize);
         if (g && f && buf) {
            fread(buf, 1, compsize, f);
            fwrite(buf, 1, compsize, g);
            fclose(g);
            fclose(f);
            free(buf);
         }
      }

		if (len > 0) {
			/* a valid buffer was read, now handle it */
			if (level >= XMIINS_REPORT_INFO) {
				printf ("read buffer # %ld, uncompressed size: %ld Bytes, compressed size: %ld Bytes\n", 
					numbuffers, len, compsize);
			}

			/* some size administration */
			totaluncompsize += len;
			curptr = buffer;
			preheadersize = -((long)curptr);

			switch (state) {
				case XMIINS_READ_HEADER:
					/* read the global XMI header */
					if ((stat = readGlobalHeader()) != 0) {
						goto cleanup;
					}
					// no break!
				case XMIINS_READ_RUN_HEADER:
					/* read a run header */
					if ((stat = readRunHeader()) != 0) {
						goto cleanup;
					}
					break;

				case XMIINS_READ_GLOBAL_BLOCK:
					/* read a global data block (currently only for EnumCompressors) */
					if ((stat = readLargeGlobalBlock()) != 0) {
						goto cleanup;
					}
					break;

				case XMIINS_READ_CONTAINER_BLOCK:
					/* read a large container block */
					if ((stat = readLargeContainerBlock()) != 0) {
						goto cleanup;
					}
					break;

				default:
					/* oops! */
					stat = XMIINS_ERR_STATE;
					if (level >= XMIINS_REPORT_ERRORS) {
						printf("illegal state %ld!\n", state);
					}
					break;
			}
		}
	}

	/* set correct # buffers */
	numbuffers--;

	/* print an end report (and check some sizes) */
	if ((stat = endReport(input)) != 0) {
		goto cleanup;
	}

cleanup:
	t2 = clock();

	/* done! */
	if (level >= XMIINS_REPORT_INFO || (stat > 0 && level >= XMIINS_REPORT_ERRORS)) {
		printf ("inspecting %s done in %5.1lf secs\n\n", xmifile, ((double)(t2 - t1))/CLOCKS_PER_SEC);
	}

	/* free some data */
	trydela(index);
	trydela (statesize);
	trydela (numcontainers);
	trydela (containersize);
	trydel (input);
	trydel (xmill);
	trydel (uncompressor);

	return stat;
}

