<?php
/**
 * Unicode Normalizer
 *
 * PHP version 5
 *
 * All rights reserved.
 * Redistribution and use in source and binary forms, with or without modification,
 * are permitted provided that the following conditions are met:
 * + Redistributions of source code must retain the above copyright notice,
 * this list of conditions and the following disclaimer.
 * + Redistributions in binary form must reproduce the above copyright notice,
 * this list of conditions and the following disclaimer in the documentation and/or
 * other materials provided with the distribution.
 * + The names of its contributors may not be used to endorse or
 * promote products derived from this software without specific prior written permission.
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * @category  Internationalization
 * @package   I18N_UnicodeNormalizer
 * @author    Michel Corne <mcorne@yahoo.com>
 * @copyright 2007 Michel Corne
 * @license   http://www.opensource.org/licenses/bsd-license.php The BSD License
 * @version   SVN: $Id: UnicodeNormalizerTest.php 38 2007-07-23 11:42:30Z mcorne $
 * @link      http://pear.php.net/package/I18N_UnicodeNormalizer
 */
// Call tests_UnicodeNormalizerTest::main() if this source file is executed directly.
if (!defined("PHPUnit_MAIN_METHOD")) {
    define("PHPUnit_MAIN_METHOD", "tests_UnicodeNormalizerTest::main");
}

require_once "PHPUnit/Framework/TestCase.php";
require_once "PHPUnit/Framework/TestSuite.php";
require_once 'I18N/UnicodeNormalizer.php';

/**
 * Test class for I18N_UnicodeNormalizer.
 * Generated by PHPUnit_Util_Skeleton on 2007-05-18 at 18:49:09.
 *
 * The following flags can be adjusted to customize the tests:
 * self::$codeCoverageTest, self::$forceCompile, self::$generateUCN,
 * self::$mediawikiNormalization and self::$runNormalizationTest.
 * The result of the normalization regression test is in reports/regression.
 *
 * @category  Internationalization
 * @package   I18N_UnicodeNormalizer
 * @author    Michel Corne <mcorne@yahoo.com>
 * @copyright 2007 Michel Corne
 * @license   http://www.opensource.org/licenses/bsd-license.php The BSD License
 * @version   Release: @package_version@
 * @link      http://pear.php.net/package/I18N_UnicodeNormalizer
 */
class tests_UnicodeNormalizerTest extends PHPUnit_Framework_TestCase
{
    /**
     * The code coverage test flag
     *
     * Overrules: self::$generateUCN, self::$mediawikiNormalization, self::$forceCompile.
     * Controls the normalizer to run with a limited set of the Unicode data.
     * This flag must be set when phpunit is run with the --report option, so tests
     * are run with a limited set of the Unicode.org data. The normalizer will then
     * include small compiled files and phpunit/xdebug will not exceed memory requirements
     * to generate the code coverage report.
     *
     * @var    boolean
     * @access public 
     * @static
     */
    public static $codeCoverageTest = false;

    /**
     * Controls the compiler to force-compile files
     *
     * @var    boolean
     * @access public 
     * @static
     */
    public static $forceCompile = false;

    /**
     * Controls the compilation of Unicode data to the UCN format
     *
     * When selected, the compilation to the UCN format is done in addition to
     * the compilation to UTF-8. The compiled files in UCN format are used
     * for debugging purposes.
     *
     * @var    boolean
     * @access public 
     * @static
     */
    public static $generateUCN = false;

    /**
     * Controls the normalization test
     *
     * The normalization test uses either the I18N_UnicodeNormalizer class if false
     * or the mediawiki UtfNormal class if true
     *
     * @var    boolean
     * @access public 
     * @static
     */
    public static $mediawikiNormalization = false;

    /**
     * Runs the Unicode.org normalization regression test
     *
     * @var    boolean
     * @access public 
     * @static
     */
    public static $runNormalizationTest = true;

    /**
     * Allows the compiler to force-compile the files only during the first setup call
     *
     * @var    boolean
     * @access private
     * @static
     */
    private static $firstSetup = true;

    /**
     * The name list of compiled files
     *
     * @var    array  
     * @access private
     */
    private $compiled;

    /**
     * The I18N_UnicodeNormalizer_Normalizer class instance
     *
     * @var    object 
     * @access private
     */
    private $normalizer;

    /**
     * The normalization class name
     *
     * @var    string 
     * @access private
     */
    private $normClass;

    /**
     * The test result file name
     *
     * @var    string 
     * @access private
     */
    private $resultFile;

    /**
     * The test rules
     *
     * Format:  NFx(string-i) => string-j,
     * as defined in the header of the Unicode.org test file: NormalizationTest-*.txt
     *
     * @var    array  
     * @access private
     */
    private $rules = array(// /
        'NFC' => array(1 => 2, 2 => 2, 3 => 2, 4 => 4, 5 => 4),
        'NFD' => array(1 => 3, 2 => 3, 3 => 3, 4 => 5, 5 => 5),
        'NFKC' => array(1 => 4, 2 => 4, 3 => 4, 4 => 4, 5 => 4),
        'NFKD' => array(1 => 5, 2 => 5, 3 => 5, 4 => 5, 5 => 5),
        );

    /**
     * The I18N_UnicodeNormalizer_String class instance
     *
     * @var    object 
     * @access private
     */
    private $string;

    /**
     * Runs the test methods of this class.
     *
     * @access public
     * @static
     */
    public static function main()
    {
        require_once "PHPUnit/TextUI/TestRunner.php";

        $suite = new PHPUnit_Framework_TestSuite("I18N_UnicodeNormalizerTest");
        $result = PHPUnit_TextUI_TestRunner::run($suite);
    }

    /**
     * Sets up the fixture, for example, open a network connection.
     * This method is called before a test is executed.
     *
     * @access protected
     */
    protected function setUp()
    {
        $this->resultFile = dirname(__FILE__) . '/reports/regression/';

        if (self::$mediawikiNormalization and !self::$codeCoverageTest) {
            // test with the mediawiki class
            require_once 'benchmark/mediawiki/UtfNormal.php';
            $this->normClass = 'UtfNormal';
            $this->resultFile .= 'mediawiki-';
        } else {
            // test with this package class
            $this->normClass = 'I18N_UnicodeNormalizer';
            $this->resultFile .= 'i18n-unorm-';
        }

        if (self::$codeCoverageTest) {
            // test with a limited set of data
            $compiledRoot = dirname(__FILE__) . '/data';
            $this->resultFile .= 'limited.php';
        } else {
            // test with full data
            $compiledRoot = '';
            $this->resultFile .= 'all.php';
        }

        $forceCompile = (self::$firstSetup and (self::$forceCompile or self::$codeCoverageTest));
        // this prevents re-running the compilations in the following setups
        self::$firstSetup = false;
        // compiles the Unicode data files to UTF-8
        $compile = new I18N_UnicodeNormalizer_Compiler($compiledRoot, 'utf8',
            $forceCompile, self::$codeCoverageTest);
        $compile->compileAll();

        if (self::$generateUCN or self::$codeCoverageTest) {
            // compiles the Unicode data files to the UCN format
            $compile = new I18N_UnicodeNormalizer_Compiler($compiledRoot, 'ucn',
                $forceCompile, self::$codeCoverageTest);
            $compile->compileAll();
        }
        // gets the compiled file names
        $this->compiled = I18N_UnicodeNormalizer::getFileNames($compiledRoot);

        $this->normalizer = new I18N_UnicodeNormalizer($compiledRoot);
        $this->string = new I18N_UnicodeNormalizer_String();
        $this->file = new I18N_UnicodeNormalizer_File();
    }

    /**
     * Tears down the fixture, for example, close a network connection.
     * This method is called after a test is executed.
     *
     * @access protected
     */
    protected function tearDown()
    {
    }

    /**
     * Base NFD regression tests
     */
    public function testBaseNFD()
    {
        ($result = $this->normalizationTest('test_base', 'NFD')) === true or
        $this->fail($result);
    }

    /**
     * Base NFKD regression test
     */
    public function testBaseNFKD()
    {
        ($result = $this->normalizationTest('test_base', 'NFKD')) === true or
        $this->fail($result);
    }

    /**
     * Base NFC regression test
     */
    public function testBaseNFC()
    {
        ($result = $this->normalizationTest('test_base', 'NFC')) === true or
        $this->fail($result);
    }

    /**
     * Base NFKC regression test
     */
    public function testBaseNFKC()
    {
        ($result = $this->normalizationTest('test_base', 'NFKC')) === true or
        $this->fail($result);
    }

    /**
     * Hangul NFD regression test
     */
    public function testHangulNFD()
    {
        ($result = $this->normalizationTest('test_hangul', 'NFD')) === true or
        $this->fail($result);
    }

    /**
     * Hangul NFKD regression test
     */
    public function testHangulNFKD()
    {
        ($result = $this->normalizationTest('test_hangul', 'NFKD')) === true or
        $this->fail($result);
    }

    /**
     * Hangul NFC regression test
     */
    public function testHangulNFC()
    {
        ($result = $this->normalizationTest('test_hangul', 'NFC')) === true or
        $this->fail($result);
    }

    /**
     * Hangul NFKC regression test
     */
    public function testHangulNFKC()
    {
        ($result = $this->normalizationTest('test_hangul', 'NFKC')) === true or
        $this->fail($result);
    }

    /**
     * Tests getCharInfo()
     */
    public function testGetCharInfo()
    {
        // converts some composable Unicode code points to UTF-8
        $string = $this->string->unicode2string('\uAC01\u1100\u1161\u11A8\u0591');

        $expected = array (// /
            'quick_check' => array ('\\u1161' => true, '\\u11A8' => true),
            'combining' => array ('\\u0591' => 220),
            'decompositions' => array ('\\uAC01' => '\\u1100\\u1161\\u11A8'),
            'compositions' => array ('\\u1100\\u1161' => '\\uAC00', '\\u1100\\u1161\\u11A8' => '\\uAC01'),
            );

        $this->assertEquals($expected, $this->normalizer->getCharInfo($string, 'NFC'));
        $this->assertEquals(array(), $this->normalizer->getCharInfo('$', 'NFC'));
        $this->assertEquals(null, $this->normalizer->getCharInfo('$', 'invalid-type'));
    }

    /**
     * Tests isStarter()
     */
    public function testIsStarter()
    {
        // converts COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK to UTF-8, not a starter
        $string = $this->string->unicode2char('3099');

        $this->assertEquals(true, $this->normalizer->isStarter('$', 'NFD'));
        $this->assertEquals(true, $this->normalizer->isStarter('$', 'NFC'));
        $this->assertEquals(false, $this->normalizer->isStarter($string, 'NFKD'));
        $this->assertEquals(false, $this->normalizer->isStarter($string, 'NFKC'));
        $this->assertEquals(null, $this->normalizer->isStarter('$', 'invalid-type'));
    }

    /**
     * Encoding and type tests
     *
     * Code coverage testing cannot include the static I18N_UnicodeNormalizer::toNFx() methods
     * which use the whole Unicode.org data.
     *
     * @see self::$codeCoverageTest definition
     */
    public function testToNFx()
    {
        $string = "\xE0\xE9\xE7\xEF"; // 
        if (self::$codeCoverageTest) {
            $this->assertEquals('aeci', $this->normalizer->normalize('aeci', 'NFD', 'ASCII'));
            $this->assertEquals($string, $this->normalizer->normalize($string, 'NFC', 'ISO-8859-1'));
            $this->assertEquals(false, $this->normalizer->normalize($string, 'NFKD', 'invalid-encoding'));
            $this->assertNotEquals($string, $this->normalizer->normalize($string, 'NFKC', 'ASCII'));
            $this->assertEquals('', $this->normalizer->normalize(''));
            $this->assertEquals(false, $this->normalizer->normalize('', 'invalid-type'));
        } else {
            $this->assertEquals('aeci', I18N_UnicodeNormalizer::toNFD('aeci', 'ASCII'));
            $this->assertEquals($string, I18N_UnicodeNormalizer::toNFC($string, 'ISO-8859-1'));
            $this->assertEquals(false, I18N_UnicodeNormalizer::toNFKD($string, 'invalid-encoding'));
            $this->assertNotEquals($string, I18N_UnicodeNormalizer::toNFKC($string, 'ASCII'));
            $this->assertEquals('', I18N_UnicodeNormalizer::toNFC(''));
        }
    }

    /**
     * Runs the normalization regression test
     *
     * @param  string  $name         the test name: 'test_base' or 'test_hangul'
     * @param  string  $type         the type of normalization: 'NFC', 'NFD', 'NFKC' or 'NFKD'
     * @param  integer $targetLine   the line number to test as defined in
     *                               the Unicode.org test file: NormalizationTest-*.txt,
     *                               all lines are tested by default
     * @param  integer $targetColumn the column number to test: 1, 2, 3, 4, or 5,
     *                               as defined in the Unicode.org test file: NormalizationTest-*.txt,
     *                               all columns are tested by default
     * @return mixed   true if no error, the error message if false
     * @access private
     */
    private function normalizationTest($name, $type, $targetLine = null, $targetColumn = null)
    {
        if (self::$runNormalizationTest) {
            static $file = array();
            static $results = array();
            // loads the test file
            isset($file[$name]) or $file[$name] = require($this->compiled[$name]);
            // resets the timer, the test counter, the normalized code point cache
            $time = 0;
            $testCnt = 0;
            $normCache = array();

            foreach($file[$name] as $line => $test) {
                // processes the test lines
                if (is_null($targetLine) or $targetLine == $line) {
                    // testing all lines or the targeted line only
                    foreach($test as $column => $chars) {
                        // processes the test line columns: a unit test
                        if (is_null($targetColumn) or $targetColumn == $column) {
                            // testing all columns or the targeted column only
                            // increments the test counter
                            $testCnt++;

                            if (!isset($normCache[$chars])) {
                                // the UTF-8 string is not yet normalized
                                // starts the timer
                                $start = microtime(true);

                                if (self::$codeCoverageTest) {
                                    $normChars = $this->normalizer->normalize($chars, $type);
                                } else {
                                    $normChars = call_user_func(array($this->normalizer, "to$type"), $chars);
                                }
                                // captures the elapsed time to normalize
                                $time += (microtime(true) - $start);
                                // caches the normalized UTF-8 string
                                $normCache[$chars] = $normChars;
                            }
                            // captures the normalized UTF-8 string
                            $normalized[$column] = $normCache[$chars];
                        }
                    }
                    foreach($this->rules[$type] as $i => $j) {
                        // processes the test rules
                        if (isset($normalized[$i])) {
                            if ($normalized[$i] != $test[$j]) {
                                // the test failed: the normalized code does not match the expected code
                                // converts the tested, normalized and expected strings
                                // to Unicode in UCN format
                                $test[$i] = $this->string->string2unicode($test[$i]);
                                $normalized[$i] = $this->string->string2unicode($normalized[$i]);
                                $test[$j] = $this->string->string2unicode($test[$j]);
                                // creates the error report: the test file, the test name,
                                // and the tested, normalized and expected strings
                                $error = array(// /
                                    "file: {$this->compiled[$name]}",
                                    "line #$line in unicodedata/NormalizationTest-*.txt",
                                    "test: {$type}(c{$i}) == c{$j}",
                                    "c{$i}\t\t= {$test[$i]}",
                                    "{$type}(c{$i}) \t= {$normalized[$i]}",
                                    "c{$j}\t\t= {$test[$j]}",
                                    );

                                return implode("\n", $error);
                            }
                        }
                    }
                }
            }
            // creates the test result: the test file, the normalization type,
            // the number of tests, of  normalization method calls, and
            // the accumulated time to run the normalization tests
            $results[] = array(// /
                'file' => $this->compiled[$name],
                'type' => $type,
                'test_cnt' => $testCnt,
                'normalization_cnt' => count($normCache),
                'time (s)' => round($time, 1),
                );
            // creates the test report file
            $this->file->put($this->resultFile, $results, __CLASS__ . '::' . __FUNCTION__,
                dirname($this->compiled[$name]) . '*.php', 'Unicode.org normalization test results');
        }

        return true;
    }
}
// Call tests_UnicodeNormalizerTest::main() if this source file is executed directly.
if (PHPUnit_MAIN_METHOD == "tests_UnicodeNormalizerTest::main") {
    tests_UnicodeNormalizerTest::main();
}

?>
