/* 
 * E-XML Library:  For XML, XML-RPC, HTTP, and related.
 * Copyright (C) 2002-2008  Elias Ross
 * 
 * genman@noderunner.net
 * http://noderunner.net/~genman
 * 
 * 1025 NE 73RD ST
 * SEATTLE WA 98115
 * USA
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 * 
 * $Id$
 */

package net.noderunner.exml;

import java.io.IOException;
import java.io.Reader;
import java.util.Iterator;

import net.noderunner.exml.ElementRule.ElementRuleState;

/**
 * An incremental, streaming XML pull-parser.
 * The performance, and CPU/memory usage of this class is comparable to a
 * typical SAX parser.
 * <p>
 * Through an underlying stack, parse steps are kept track of, and the tree can
 * be moved back down to any level, by reading forwards, but the stream cannot
 * be read backwards.  Element rules must be added to the underlying {@link
 * XmlReader}, otherwise elements will not be recognized.  This class supports
 * full validity checking.
 * </p>
 * <p>
 * Example usage: (document)
 * <pre>
 * &lt;?xml version="1.0" encoding="UTF-8"?&gt;
 * &lt;a&gt;&lt;b&gt;&lt;c/&gt;
 *    &lt;d&gt;D&amp;apos;s content here&lt;/d&gt;
 * &lt;/b&gt;&lt;/a&gt;
 * </pre>
 * (create the reader with DTD)
 * <pre>
 * ElementRule rule;
 * rule = new ElementRule();
 * Dtd dtd = new Dtd();
 * dtd.addElementRule("a", rule);
 * dtd.addElementRule("b", rule);
 * dtd.addElementRule("c", rule);
 * rule = new ElementRule();
 * rule.setAllowPCData(true);
 * dtd.addElementRule(d, rule);
 * XmlReader reader = new XmlReader(new StringReader( ... ), dtd);
 * </pre>
 * (create the parser)
 * <pre>
 * XmlParser parser = new XmlParser(reader);
 * parser.skipProlog();
 * parser.startTag().getName(); // returns "test"
 * parser.startTag().getName(); // returns "a"
 * parser.startTag().getName(); // returns "b"
 * parser.startTag().getName(); // returns "c"
 * parser.endTag().getName();   // returns "c"
 * parser.startTag().getName(); // returns "d"
 * Element d = parser.getContent();
 * d.getCharacterData();        // returns "D's content here"
 * </pre>
 *
 * @author Elias Ross
 * @version 1.0
 */
public class XmlParser 
{
	/**
	 * Underlying reader.
	 */
	private XmlReader reader;

	/**
	 * Helps keep track of parsing.  This stack should store only
	 * <code>Element</code> instances.
	 */
	private ArrayStack<Element> readStack;

	/**
	 * If <code>Misc</code> data should be kept as part of elements that
	 * disallow PCDATA.
	 */
	private boolean alwaysSaveMisc;

	/**
	 * Constructs an XmlParser around an XmlReader.  XmlReader must contain
	 * a DTD in it which provides element rules for all elements that are
	 * to be encountered by input.  Without this DTD, any unknown elements
	 * parsed will throw an <code>ElementRuleException</code>.
	 *
	 * @param reader xml reader to wrap
	 * @see Dtd
	 */
	public XmlParser(XmlReader reader) {
		this.reader = reader;
		this.readStack = ArrayStack.create();
	}

	/**
	 * Resets this object for parsing another <code>Reader</code>
	 * source.
	 */
	public void setReader(Reader reader) {
		this.reader.setReader(reader);
		readStack.clear();
	}

	/**
	 * Skips over any initial XML prolog data and comments.
	 *
	 * @see XmlReader#Prolog
	 * @throws XmlException if bad XML data
	 * @throws IOException if bad underlying stream
	 */
	public void skipProlog() 
		throws IOException, XmlException
	{
		reader.Prolog(null);
	}

	/**
	 * Returns the element returned by the last
	 * <code>startTag</code> call. 
	 *
	 * @return the stack top, or null if there is no top element
	 * @see #startTag()
	 */
	public Element getTopElement() {
		if (readStack.size() == 0)
			return null;
		return readStack.peek();
	}

	/**
	 * Creates a new element if one exists at the current stream point.
	 * Like the <code>XmlReader.STag</code> call, but validating.  The
	 * method <code>endTag</code> must be called for every
	 * <code>startTag</code> call.  Even if the tag returned is not
	 * open, <code>endTag</code> must be called.  In the case of this
	 * document fragment:
	 * <pre>
	 * &lt;a&gt;&lt;/a&gt;
	 * </pre>
	 * even though &lt;a&gt; is considered a closed element, the parser
	 * will not know this until <code>endTag</code> is called.
	 *
	 * @return null, if no start tag exists
	 * @throws XmlException if bad XML data
	 * @throws IOException if bad underlying stream
	 * @see #endTag
	 */
	public Element startTag() 
		throws IOException, XmlException
	{
		Element e = reader.STag();
		if (e == null)
			return null;
		// check if okay to add
		ElementRule erule = null;
		RuleStack ruleStack = reader.getRuleStack();
		if (!readStack.isEmpty()) {
			erule = reader.getDtd().getElementRule(getTopElement());
			erule.encounterElement(e, ruleStack.state());
		} else {
			// should check doctype
		}
    	ruleStack.startElement();
		ElementRule nrule = reader.getDtd().getElementRule(e.getName());
		if (nrule == null)
			throw new XmlException("Unknown element " + e);
		// clear out Misc if no PCDATA and open tag
		if (!alwaysSaveMisc) {
			if ((e.isOpen() && !nrule.isPCDataAllowed()) || 
				(!e.isOpen() && erule != null && !erule.isPCDataAllowed()))
			{
				reader.Misc(null);
			}
		}
		// add to stack (even if closed)
		readStack.add(e);
		return e;
	}

	/**
	 * Adds content to object returned by last <code>startTag</code>
	 * call.  May parse recursively.  Call <code>endTag</code> to
	 * finally close this element.  This element is stored within this
	 * parser, so if the memory is no longer needed, call
	 * <code>Element.clearChildren</code>.
	 *
	 * @see Element#clearChildren
	 * @see #emptyContent
	 * @return the element with read in information
	 */
	public Element getContent() 
		throws IOException, XmlException
	{
		if (readStack.isEmpty())
			throw new XmlException("No current tag");
		reader.content(getTopElement());
		return getTopElement();
	}

	/**
	 * Reads all remaining content of last <code>startTag</code> call
	 * into a <code>NullElement</code>.  This effectively reads in all
	 * further content preceeding the end tag.
	 * Call <code>endTag</code> to finally close this element.
	 *
	 * @see NullElement
	 * @see #endTag
	 */
	public void emptyContent() 
		throws IOException, XmlException
	{
		reader.content(NullElement.getInstance());
	}

	/**
	 * Searches for end of the last element read.  Skips over any
	 * existant content in the current element and reads in the end tag.
	 * @throws XmlException if bad Xml data
	 * @throws IOException if bad underlying stream
	 * @return last element returned by <code>startTag</code>
	 */
	public Element endTag() 
		throws IOException, XmlException
	{
		if (readStack.isEmpty())
			throw new XmlException("Too many endTag() calls:  No more elements");
		Element top = (Element)getTopElement();
		if (top.isOpen()) {
			// throw away whatever content
			emptyContent();
			boolean ok = reader.ETag(top);
			if (!ok)
				throw new XmlException("Element </" + top.getName() + "> expected, not found");
		}
		ElementRule elementRule = reader.getDtd().getElementRule(getTopElement());
		RuleStack ruleStack = reader.getRuleStack();
		ElementRuleState state = ruleStack.state();
		elementRule.encounterEnd(state);
		readStack.pop();
		ruleStack.endElement();
		// read in Misc if current rule does not allow PCData
		if (top.isOpen() && !readStack.isEmpty()) {
    		elementRule = reader.getDtd().getElementRule(getTopElement());
			if (!alwaysSaveMisc && !elementRule.isPCDataAllowed())
				reader.Misc(null);
		}
		return top;
	}

	/**
	 * Closes the underlying input stream.
	 * Calling any more methods on this object will likely result in a
	 * <code>java.io.IOException</code>.
	 */
	public void close() 
		throws IOException
	{
		reader.close();
	}

	/**
	 * Skips over any whitespace or comments or processing instructions.
	 * This may be useful for reading everything trailing at 
	 * the end of a document.
	 */
	public void skipMisc()
		throws IOException, XmlException
	{
		while (reader.Misc(null));
	}

	/**
	 * Returns the parse tree depth of this tree.
	 * For example, assuming this much of the document stream has been
	 * read:
	 * <pre>
	 * &lt;a&gt;
	 *   &lt;b&gt;
	 * </pre>
	 * the depth returned is <code>2</code>.
	 *
	 * @return an integer, counting from 0, representing the number of
	 * open element tags encountered so far that have not been ended
	 */
	public int getDepth() {
		return readStack.size();
	}
	
	/**
	 * Moves the stream up to the depth given.  For example, if
	 * <code>depth = getDepth()</code> was called when
	 * <code>getTopElement</code> was the element &lt;bob&gt;, then
	 * up(depth) will read until the matching element &lt;/bob&gt; is
	 * read.  If <code>depth</code> exceeds the current level, this
	 * method has no effect.  If <code>depth</code> is zero, reads until
	 * the end of the document.  This is useful for reading in the
	 * remaining content of an entire XML document that is no longer
	 * important.
	 * <p>
	 * For example, calling <code>up(1)</code>, having read the
	 * document this far:
	 * <pre>
	 * &lt;a&gt;
	 *   &lt;b&gt;
	 *     &lt;c&gt;
	 * </pre>
	 * will read the remainder of the document up until the final
	 * <pre>&lt;/a&gt;</pre> tag.
	 *
	 * @throws IllegalArgumentException if negative depth is given
	 * @see #close
	 * @see #getDepth
	 */
	public void up(int depth) 
		throws IOException, XmlException
	{
		if (depth < 0)
			throw new IllegalArgumentException("Depth must be positive: " + depth);
		while (readStack.size() > depth) {
			emptyContent();
			endTag();
		}
	}

	/**
	 * Sets whether or not miscellaneous data, like whitespace,
	 * comments, processing instructions are to be kept.
	 * By default, whitespace, comments, and processing instructions are
	 * not saved into children of elements that do not allow PCDATA.  This
	 * is to simplify parsing in many ways.  However, if this information
	 * should be kept, call this method to have these nodes saved.
	 *
	 * @param yes to save misc data for all elements
	 */
	public void alwaysSaveMisc(boolean yes) {
		this.alwaysSaveMisc = yes;
	}

	/**
	 * Returns a string representation of this object for debugging.
	 */
	@Override
	public String toString() {
		String s = "XmlParser [reader=" + reader + " readStack=";
		Iterator<Element> i = readStack.iterator();	
		while (i.hasNext())
			s += i.next().getName() + " ";
		s += "]";
		return s;
	}
}
