#!/usr/local/bin/perl
# $Id: anal_tdf.pl,v 1.4 2001/01/03 08:20:27 tom Exp $
################################################################
use strict;
use lib qw(lib ../web_diary_dir/lib web_diary_dir/lib web_diary_dir
	   ../web_diary_dir);

use SimpleDB::Hash;
use JConv;

use TDS;
use TDS::System;
use TDS::Status;
use TDS::Collection;
use TDS::Tdf::Command;
use TDS::DirInfo;

use vars qw($OutputJcode);

$OutputJcode = 'sjis';

my $status = $TDS::Status;
$status->mode('ALL');
my $col = new TDS::Collection;

my @files;

if (1){    # all tdf
    @files = $col->pickup_recent($status->start_time->year,
				 $status->start_time->month,
				 $status->start_time->day);
} else {   # about this year
    @files = $col->pickup_recent($status->start_time->year,
				 12, 31, 365);
}

my %cmds;
my %cnt;
my %cat;
my ($total_topics, $total_size);

require TDS::Tdf::Command;
TDS::Tdf::Command::Setup();

print "reading...\n";
for (@files){
    my $filename = $_->[0];
    $total_size += (stat($filename))[7];
    print "$filename:\n";
    open(F, $filename) || last;
    while (<F>){
#	jconv(*_, 'euc');
	if (m!^(/?)([A-Z]+)[\+\*]?\s(.*)! && $TDS::Tdf::Command::IsCommand{$2}){
	    unless ($1){                # ϥޥ
		$cmds{$2}++;
		$cnt{start}++;
		if ($2 =~ /^S?NEW$/){
		    $total_topics++;
		} elsif ($2 eq 'CAT'){
		    foreach ( split( ' ', $3 ) ) {
			$cat{ $_ }++;
		    }
		}
	    } else {                    # λޥ
		$cnt{end}++;
	    }
	    $cnt{cmd}++;
	} else {
	    $cnt{text}++;
	}
    }
    close F;
}

my @tmp = sort { $cmds{$b} <=> $cmds{$a}} keys %cmds;
print "frequency used command:\n";
for (@tmp[0..9]){
    printf("%-6s %5d (%4.1f%%)\n", $_, $cmds{$_}, $cmds{$_}/$cnt{start}*100);
}

print "***\n";
my $num_files = @files;
my $lines = $cnt{cmd}+$cnt{text};
print "files: $num_files\n";
print "average line: " . int($lines/$num_files) . " lines\n";
print "average size: " . int($total_size/$num_files) . " bytes\n";
printf("average topics: %4.1f\n", $total_topics/$num_files);

print "used category: \n";
#my %ch;
#tie %ch, 'SimpleDB::Hash', "$TDS::DirInfo::DataDir/category.dat", 1;

my %ch;
for (@{$status->category->content}){
    chomp;
    $ch{$_} = 1;
}

my $c=0;
for (sort { $cat{$b} <=> $cat{$a}} keys %cat){
    jconv(*_, 'euc');
    my $key = $_;
    jconv(\$key, $OutputJcode, 'euc');
    if ($c < 10){
	printf("%-10s %5d\n", $key, $cat{$_});
    }
    print "not accounted in category.dat: $key\n" unless $ch{$_};
    $c++;
}
print "command: $cnt{cmd}(start:$cnt{start}, end: $cnt{end})\n";
print "text: $cnt{text}\n";


