#!/usr/bin/env perl

# This program is copyright 2007-2008 Percona Inc.
# Feedback and improvements are welcome.
#
# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
#
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation, version 2; OR the Perl Artistic License.  On UNIX and similar
# systems, you can issue `man perlgpl' or `man perlartistic' to read these
# licenses.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
# Place, Suite 330, Boston, MA  02111-1307  USA.

use strict;
use warnings FATAL => 'all';

our $VERSION = '0.9.1';
our $DISTRIB = '2725';
our $SVN_REV = sprintf("%d", (q$Revision: 2720 $ =~ m/(\d+)/g, 0));

# ###########################################################################
# DSNParser package 2460
# ###########################################################################
use strict;
use warnings FATAL => 'all';

package DSNParser;

use DBI;
use Data::Dumper;
$Data::Dumper::Indent    = 0;
$Data::Dumper::Quotekeys = 0;
use English qw(-no_match_vars);

use constant MKDEBUG => $ENV{MKDEBUG};

sub new {
   my ( $class, @opts ) = @_;
   my $self = {
      opts => {
         A => {
            desc => 'Default character set',
            dsn  => 'charset',
            copy => 1,
         },
         D => {
            desc => 'Database to use',
            dsn  => 'database',
            copy => 1,
         },
         F => {
            desc => 'Only read default options from the given file',
            dsn  => 'mysql_read_default_file',
            copy => 1,
         },
         h => {
            desc => 'Connect to host',
            dsn  => 'host',
            copy => 1,
         },
         p => {
            desc => 'Password to use when connecting',
            dsn  => 'password',
            copy => 1,
         },
         P => {
            desc => 'Port number to use for connection',
            dsn  => 'port',
            copy => 1,
         },
         S => {
            desc => 'Socket file to use for connection',
            dsn  => 'mysql_socket',
            copy => 1,
         },
         u => {
            desc => 'User for login if not current user',
            dsn  => 'user',
            copy => 1,
         },
      },
   };
   foreach my $opt ( @opts ) {
      MKDEBUG && _d('Adding extra property ' . $opt->{key});
      $self->{opts}->{$opt->{key}} = { desc => $opt->{desc}, copy => $opt->{copy} };
   }
   return bless $self, $class;
}

sub prop {
   my ( $self, $prop, $value ) = @_;
   if ( @_ > 2 ) {
      MKDEBUG && _d("Setting $prop property");
      $self->{$prop} = $value;
   }
   return $self->{$prop};
}

sub parse {
   my ( $self, $dsn, $prev, $defaults ) = @_;
   if ( !$dsn ) {
      MKDEBUG && _d('No DSN to parse');
      return;
   }
   MKDEBUG && _d("Parsing $dsn");
   $prev     ||= {};
   $defaults ||= {};
   my %given_props;
   my %final_props;
   my %opts = %{$self->{opts}};
   my $prop_autokey = $self->prop('autokey');

   foreach my $dsn_part ( split(/,/, $dsn) ) {
      if ( my ($prop_key, $prop_val) = $dsn_part =~  m/^(.)=(.*)$/ ) {
         $given_props{$prop_key} = $prop_val;
      }
      elsif ( $prop_autokey ) {
         MKDEBUG && _d("Interpreting $dsn_part as $prop_autokey=$dsn_part");
         $given_props{$prop_autokey} = $dsn_part;
      }
      else {
         MKDEBUG && _d("Bad DSN part: $dsn_part");
      }
   }

   foreach my $key ( keys %opts ) {
      MKDEBUG && _d("Finding value for $key");
      $final_props{$key} = $given_props{$key};
      if (   !defined $final_props{$key}
           && defined $prev->{$key} && $opts{$key}->{copy} )
      {
         $final_props{$key} = $prev->{$key};
         MKDEBUG && _d("Copying value for $key from previous DSN");
      }
      if ( !defined $final_props{$key} ) {
         $final_props{$key} = $defaults->{$key};
         MKDEBUG && _d("Copying value for $key from defaults");
      }
   }

   foreach my $key ( keys %given_props ) {
      die "Unrecognized DSN part '$key' in '$dsn'\n"
         unless exists $opts{$key};
   }
   if ( (my $required = $self->prop('required')) ) {
      foreach my $key ( keys %$required ) {
         die "Missing DSN part '$key' in '$dsn'\n" unless $final_props{$key};
      }
   }

   return \%final_props;
}

sub as_string {
   my ( $self, $dsn ) = @_;
   return $dsn unless ref $dsn;
   return join(',',
      map  { "$_=" . ($_ eq 'p' ? '...' : $dsn->{$_}) }
      grep { defined $dsn->{$_} && $self->{opts}->{$_} }
      sort keys %$dsn );
}

sub usage {
   my ( $self ) = @_;
   my $usage
      = "DSN syntax is key=value[,key=value...]  Allowable DSN keys:\n"
      . "  KEY  COPY  MEANING\n"
      . "  ===  ====  =============================================\n";
   my %opts = %{$self->{opts}};
   foreach my $key ( sort keys %opts ) {
      $usage .= "  $key    "
             .  ($opts{$key}->{copy} ? 'yes   ' : 'no    ')
             .  ($opts{$key}->{desc} || '[No description]')
             . "\n";
   }
   if ( (my $key = $self->prop('autokey')) ) {
      $usage .= "  If the DSN is a bareword, the word is treated as the '$key' key.\n";
   }
   return $usage;
}

sub get_cxn_params {
   my ( $self, $info ) = @_;
   my $dsn;
   my %opts = %{$self->{opts}};
   my $driver = $self->prop('dbidriver') || '';
   if ( $driver eq 'Pg' ) {
      $dsn = 'DBI:Pg:dbname=' . ( $info->{D} || '' ) . ';'
         . join(';', map  { "$opts{$_}->{dsn}=$info->{$_}" }
                     grep { defined $info->{$_} }
                     qw(h P));
   }
   else {
      $dsn = 'DBI:mysql:' . ( $info->{D} || '' ) . ';'
         . join(';', map  { "$opts{$_}->{dsn}=$info->{$_}" }
                     grep { defined $info->{$_} }
                     qw(F h P S A))
         . ';mysql_read_default_group=mysql';
   }
   MKDEBUG && _d($dsn);
   return ($dsn, $info->{u}, $info->{p});
}

sub fill_in_dsn {
   my ( $self, $dbh, $dsn ) = @_;
   my $vars = $dbh->selectall_hashref('SHOW VARIABLES', 'Variable_name');
   my ($user, $db) = $dbh->selectrow_array('SELECT USER(), DATABASE()');
   $user =~ s/@.*//;
   $dsn->{h} ||= $vars->{hostname}->{Value};
   $dsn->{S} ||= $vars->{'socket'}->{Value};
   $dsn->{P} ||= $vars->{port}->{Value};
   $dsn->{u} ||= $user;
   $dsn->{D} ||= $db;
}

sub get_dbh {
   my ( $self, $cxn_string, $user, $pass, $opts ) = @_;
   $opts ||= {};
   my $defaults = {
      AutoCommit        => 0,
      RaiseError        => 1,
      PrintError        => 0,
      mysql_enable_utf8 => ($cxn_string =~ m/charset=utf8/ ? 1 : 0),
   };
   @{$defaults}{ keys %$opts } = values %$opts;
   MKDEBUG && _d($cxn_string, ' ', $user, ' ', $pass, ' {',
      join(', ', map { "$_=>$defaults->{$_}" } keys %$defaults ), '}');
   my $dbh = DBI->connect($cxn_string, $user, $pass, $defaults);
   if ( my ($charset) = $cxn_string =~ m/charset=(\w+)/ ) {
      my $sql = "/*!40101 SET NAMES $charset*/";
      MKDEBUG && _d("$dbh: $sql");
      $dbh->do($sql);
      MKDEBUG && _d('Enabling charset for STDOUT');
      if ( $charset eq 'utf8' ) {
         binmode(STDOUT, ':utf8')
            or die "Can't binmode(STDOUT, ':utf8'): $OS_ERROR";
      }
      else {
         binmode(STDOUT) or die "Can't binmode(STDOUT): $OS_ERROR";
      }
   }
   my $setvars = $self->prop('setvars');
   if ( $cxn_string =~ m/mysql/i && $setvars ) {
      my $sql = "SET $setvars";
      MKDEBUG && _d("$dbh: $sql");
      $dbh->do($sql);
   }
   MKDEBUG && _d('DBH info: ',
      $dbh,
      Dumper($dbh->selectrow_hashref(
         'SELECT DATABASE(), CONNECTION_ID(), VERSION()/*!50038 , @@hostname*/')),
      ' Connection info: ', ($dbh->{mysql_hostinfo} || 'undef'),
      ' Character set info: ',
      Dumper($dbh->selectall_arrayref(
         'SHOW VARIABLES LIKE "character_set%"', { Slice => {}})),
      ' $DBD::mysql::VERSION: ', $DBD::mysql::VERSION,
      ' $DBI::VERSION: ', $DBI::VERSION,
   );
   return $dbh;
}

sub get_hostname {
   my ( $self, $dbh ) = @_;
   if ( my ($host) = ($dbh->{mysql_hostinfo} || '') =~ m/^(\w+) via/ ) {
      return $host;
   }
   my ( $hostname, $one ) = $dbh->selectrow_array(
      'SELECT /*!50038 @@hostname, */ 1');
   return $hostname;
}

sub disconnect {
   my ( $self, $dbh ) = @_;
   MKDEBUG && $self->print_active_handles($dbh);
   $dbh->disconnect;
}

sub print_active_handles {
   my ( $self, $thing, $level ) = @_;
   $level ||= 0;
   printf("# Active %sh: %s %s %s\n", ($thing->{Type} || 'undef'), "\t" x $level,
      $thing, (($thing->{Type} || '') eq 'st' ? $thing->{Statement} || '' : ''))
      or die "Cannot print: $OS_ERROR";
   foreach my $handle ( grep {defined} @{ $thing->{ChildHandles} } ) {
      $self->print_active_handles->( $handle, $level + 1 );
   }
}

sub _d {
   my ( $line ) = (caller(0))[2];
   @_ = map { defined $_ ? $_ : 'undef' } @_;
   print "# DSNParser:$line $PID ", @_, "\n";
}

1;

# ###########################################################################
# End DSNParser package
# ###########################################################################

# ###########################################################################
# Quoter package 2215
# ###########################################################################
use strict;
use warnings FATAL => 'all';

package Quoter;

use English qw(-no_match_vars);

use constant MKDEBUG => $ENV{MKDEBUG};

sub new {
   my ( $class ) = @_;
   bless {}, $class;
}

sub quote {
   my ( $self, @vals ) = @_;
   foreach my $val ( @vals ) {
      $val =~ s/`/``/g;
   }
   return join('.', map { '`' . $_ . '`' } @vals);
}

sub quote_val {
   my ( $self, @vals ) = @_;
   return join(', ',
      map {
         if ( defined $_ ) {
            $_ =~ s/(['\\])/\\$1/g;
            $_ eq '' || $_ =~ m/^0|\D/ ? "'$_'" : $_;
         }
         else {
            'NULL';
         }
      } @vals
   );
}

1;

# ###########################################################################
# End Quoter package
# ###########################################################################

# ###########################################################################
# OptionParser package 2300
# ###########################################################################
use strict;
use warnings FATAL => 'all';

package OptionParser;

use Getopt::Long;
use List::Util qw(max);
use English qw(-no_match_vars);

use constant MKDEBUG => $ENV{MKDEBUG};

my $POD_link_re = '[LC]<"?([^">]+)"?>';

sub new {
   my ( $class, @opts ) = @_;
   my %key_seen;
   my %long_seen;
   my %key_for;
   my %defaults;
   my @mutex;
   my @atleast1;
   my %long_for;
   my %disables;
   my %copyfrom;
   my @allowed_with;
   unshift @opts,
      { s => 'help',    d => 'Show this help message' },
      { s => 'version', d => 'Output version information and exit' };
   foreach my $opt ( @opts ) {
      if ( ref $opt ) {
         my ( $long, $short ) = $opt->{s} =~ m/^([\w-]+)(?:\|([^!+=]*))?/;
         $opt->{k} = $short || $long;
         $key_for{$long} = $opt->{k};
         $long_for{$opt->{k}} = $long;
         $long_for{$long} = $long;
         $opt->{l} = $long;
         die "Duplicate option $opt->{k}" if $key_seen{$opt->{k}}++;
         die "Duplicate long option $opt->{l}" if $long_seen{$opt->{l}}++;
         $opt->{t} = $short;
         $opt->{n} = $opt->{s} =~ m/!/;
         $opt->{g} ||= 'o';
         if ( (my ($y) = $opt->{s} =~ m/=([mdHhAaz])/) ) {
            MKDEBUG && _d("Option $opt->{k} type: $y");
            $opt->{y} = $y;
            $opt->{s} =~ s/=./=s/;
         }
         if ( $opt->{d} =~ m/required/ ) {
            $opt->{r} = 1;
            MKDEBUG && _d("Option $opt->{k} is required");
         }
         if ( (my ($def) = $opt->{d} =~ m/default\b(?: ([^)]+))?/) ) {
            $defaults{$opt->{k}} = defined $def ? $def : 1;
            MKDEBUG && _d("Option $opt->{k} has a default");
         }
         if ( (my ($dis) = $opt->{d} =~ m/(disables .*)/) ) {
            $disables{$opt->{k}} = [ $class->get_participants($dis) ];
            MKDEBUG && _d("Option $opt->{k} $dis");
         }
      }
      else { # It's an instruction.

         if ( $opt =~ m/at least one|mutually exclusive|one and only one/ ) {
            my @participants = map {
                  die "No such option '$_' in $opt" unless $long_for{$_};
                  $long_for{$_};
               } $class->get_participants($opt);
            if ( $opt =~ m/mutually exclusive|one and only one/ ) {
               push @mutex, \@participants;
               MKDEBUG && _d(@participants, ' are mutually exclusive');
            }
            if ( $opt =~ m/at least one|one and only one/ ) {
               push @atleast1, \@participants;
               MKDEBUG && _d(@participants, ' require at least one');
            }
         }
         elsif ( $opt =~ m/default to/ ) {
            my @participants = map {
                  die "No such option '$_' in $opt" unless $long_for{$_};
                  $key_for{$_};
               } $class->get_participants($opt);
            $copyfrom{$participants[0]} = $participants[1];
            MKDEBUG && _d(@participants, ' copy from each other');
         }
         elsif ( $opt  =~ m/allowed with/ ) {
            my @participants = map {
                  die "No such option '$_' while processing $opt"
                     unless $long_for{$_};
                  $key_for{$_};
               } $class->get_participants($opt);
            push @allowed_with, \@participants;
         }

      }
   }

   foreach my $dis ( keys %disables ) {
      $disables{$dis} = [
            map {
               if ( !defined $long_for{$_} ) {
                  die "No such option '$_' while processing $dis";
               }
               $long_for{$_};
            } @{$disables{$dis}}
      ];
   }

   my $self = {
      specs        => [ grep { ref $_ } @opts ],
      notes        => [],
      instr        => [ grep { !ref $_ } @opts ],
      mutex        => \@mutex,
      defaults     => \%defaults,
      long_for     => \%long_for,
      atleast1     => \@atleast1,
      disables     => \%disables,
      key_for      => \%key_for,
      copyfrom     => \%copyfrom,
      strict       => 1,
      groups       => [ { k => 'o', d => 'Options' } ],
      allowed_with => \@allowed_with,
   };

   return bless $self, $class;
}

sub get_participants {
   my ( $self, $str ) = @_;
   my @participants;
   foreach my $thing ( $str =~ m/(--?[\w-]+)/g ) {
      if ( (my ($long) = $thing =~ m/--(.+)/) ) {
         push @participants, $long;
      }
      else {
         foreach my $short ( $thing =~ m/([^-])/g ) {
            push @participants, $short;
         }
      }
   }
   MKDEBUG && _d("Participants for $str: ", @participants);
   return @participants;
}

sub parse {
   my ( $self, %defaults ) = @_;
   my @specs = @{$self->{specs}};
   my %factor_for = (k => 1_024, M => 1_048_576, G => 1_073_741_824);

   my %opt_seen;
   my %vals = %{$self->{defaults}};
   @vals{keys %defaults} = values %defaults;
   foreach my $spec ( @specs ) {
      $vals{$spec->{k}} = undef unless defined $vals{$spec->{k}};
      $opt_seen{$spec->{k}} = 1;
   }

   foreach my $key ( keys %defaults ) {
      die "Cannot set default for non-existent option '$key'\n"
         unless $opt_seen{$key};
   }

   Getopt::Long::Configure('no_ignore_case', 'bundling');
   GetOptions( map { $_->{s} => \$vals{$_->{k}} } @specs )
      or $self->error('Error parsing options');

   if ( $vals{version} ) {
      my $prog = $self->prog;
      printf("%s  Ver %s Distrib %s Changeset %s\n",
         $prog, $main::VERSION, $main::DISTRIB, $main::SVN_REV)
         or die "Cannot print: $OS_ERROR";
      exit(0);
   }

   if ( @ARGV && $self->{strict} ) {
      $self->error("Unrecognized command-line options @ARGV");
   }

   foreach my $dis ( grep { defined $vals{$_} } keys %{$self->{disables}} ) {
      my @disses = map { $self->{key_for}->{$_} } @{$self->{disables}->{$dis}};
      MKDEBUG && _d("Unsetting options: ", @disses);
      @vals{@disses} = map { undef } @disses;
   }

   foreach my $spec ( grep { $_->{r} } @specs ) {
      if ( !defined $vals{$spec->{k}} ) {
         $self->error("Required option --$spec->{l} must be specified");
      }
   }

   foreach my $mutex ( @{$self->{mutex}} ) {
      my @set = grep { defined $vals{$self->{key_for}->{$_}} } @$mutex;
      if ( @set > 1 ) {
         my $note = join(', ',
            map { "--$self->{long_for}->{$_}" }
                @{$mutex}[ 0 .. scalar(@$mutex) - 2] );
         $note .= " and --$self->{long_for}->{$mutex->[-1]}"
               . " are mutually exclusive.";
         $self->error($note);
      }
   }

   foreach my $required ( @{$self->{atleast1}} ) {
      my @set = grep { defined $vals{$self->{key_for}->{$_}} } @$required;
      if ( !@set ) {
         my $note = join(', ',
            map { "--$self->{long_for}->{$_}" }
                @{$required}[ 0 .. scalar(@$required) - 2] );
         $note .= " or --$self->{long_for}->{$required->[-1]}";
         $self->error("Specify at least one of $note");
      }
   }

   foreach my $spec ( grep { $_->{y} && defined $vals{$_->{k}} } @specs ) {
      my $val = $vals{$spec->{k}};
      if ( $spec->{y} eq 'm' ) {
         my ( $num, $suffix ) = $val =~ m/(\d+)([a-z])?$/;
         if ( !$suffix ) {
            my ( $s ) = $spec->{d} =~ m/\(suffix (.)\)/;
            $suffix = $s || 's';
            MKDEBUG && _d("No suffix given; using $suffix for $spec->{k} "
               . "(value: '$val')");
         }
         if ( $suffix =~ m/[smhd]/ ) {
            $val = $suffix eq 's' ? $num            # Seconds
                 : $suffix eq 'm' ? $num * 60       # Minutes
                 : $suffix eq 'h' ? $num * 3600     # Hours
                 :                  $num * 86400;   # Days
            $vals{$spec->{k}} = $val;
            MKDEBUG && _d("Setting option $spec->{k} to $val");
         }
         else {
            $self->error("Invalid --$spec->{l} argument");
         }
      }
      elsif ( $spec->{y} eq 'd' ) {
         MKDEBUG && _d("Parsing option $spec->{y} as a DSN");
         my $from_key = $self->{copyfrom}->{$spec->{k}};
         my $default = {};
         if ( $from_key ) {
            MKDEBUG && _d("Option $spec->{y} DSN copies from option $from_key");
            $default = $self->{dsn}->parse($self->{dsn}->as_string($vals{$from_key}));
         }
         $vals{$spec->{k}} = $self->{dsn}->parse($val, $default);
      }
      elsif ( $spec->{y} eq 'z' ) {
         my ($pre, $num, $factor) = $val =~ m/^([+-])?(\d+)([kMG])?$/;
         if ( defined $num ) {
            if ( $factor ) {
               $num *= $factor_for{$factor};
               MKDEBUG && _d("Setting option $spec->{y} to num * factor");
            }
            $vals{$spec->{k}} = ($pre || '') . $num;
         }
         else {
            $self->error("Invalid --$spec->{l} argument");
         }
      }
   }

   foreach my $spec ( grep { $_->{y} } @specs ) {
      MKDEBUG && _d("Treating option $spec->{k} as a list");
      my $val = $vals{$spec->{k}};
      if ( $spec->{y} eq 'H' || (defined $val && $spec->{y} eq 'h') ) {
         $vals{$spec->{k}} = { map { $_ => 1 } split(',', ($val || '')) };
      }
      elsif ( $spec->{y} eq 'A' || (defined $val && $spec->{y} eq 'a') ) {
         $vals{$spec->{k}} = [ split(',', ($val || '')) ];
      }
   }

   foreach my $allowed_opts ( @{ $self->{allowed_with} } ) {
      my $opt = $allowed_opts->[0];
      next if !defined $vals{$opt};
      my %defined_opts = map { $_ => 1 } grep { defined $vals{$_} } keys %vals;
      delete @defined_opts{ @$allowed_opts };
      foreach my $defined_opt ( keys %defined_opts ) {
         MKDEBUG
            && _d("Unsetting options: $defined_opt (not allowed with $opt)");
         $vals{$defined_opt} = undef;
      }
   }

   return %vals;
}

sub error {
   my ( $self, $note ) = @_;
   $self->{__error__} = 1;
   push @{$self->{notes}}, $note;
}

sub prog {
   (my $prog) = $PROGRAM_NAME =~ m/([.A-Za-z-]+)$/;
   return $prog || $PROGRAM_NAME;
}

sub prompt {
   my ( $self ) = @_;
   my $prog   = $self->prog;
   my $prompt = $self->{prompt} || '<options>';
   return "Usage: $prog $prompt\n";
}

sub descr {
   my ( $self ) = @_;
   my $prog = $self->prog;
   my $descr  = $prog . ' ' . ($self->{descr} || '')
          . "  For more details, please use the --help option, "
          . "or try 'perldoc $prog' for complete documentation.";
   $descr = join("\n", $descr =~ m/(.{0,80})(?:\s+|$)/g);
   $descr =~ s/ +$//mg;
   return $descr;
}

sub usage_or_errors {
   my ( $self, %opts ) = @_;
   if ( $opts{help} ) {
      print $self->usage(%opts)
         or die "Cannot print: $OS_ERROR";
      exit(0);
   }
   elsif ( $self->{__error__} ) {
      print $self->errors()
         or die "Cannot print: $OS_ERROR";
      exit(0);
   }
}

sub errors {
   my ( $self ) = @_;
   my $usage = $self->prompt() . "\n";
   if ( (my @notes = @{$self->{notes}}) ) {
      $usage .= join("\n  * ", 'Errors in command-line arguments:', @notes) . "\n";
   }
   return $usage . "\n" . $self->descr();
}

sub usage {
   my ( $self, %vals ) = @_;
   my @specs = @{$self->{specs}};

   my $maxl = max(map { length($_->{l}) + ($_->{n} ? 4 : 0)} @specs);

   my $maxs = max(0,
      map { length($_->{l}) + ($_->{n} ? 4 : 0)}
      grep { $_->{t} } @specs);

   my $lcol = max($maxl, ($maxs + 3));
   my $rcol = 80 - $lcol - 6;
   my $rpad = ' ' x ( 80 - $rcol );

   $maxs = max($lcol - 3, $maxs);

   my $usage = $self->descr() . "\n" . $self->prompt();
   foreach my $g ( @{$self->{groups}} ) {
      $usage .= "\n$g->{d}:\n";
      foreach my $spec (
         sort { $a->{l} cmp $b->{l} } grep { $_->{g} eq $g->{k} } @specs )
      {
         my $long  = $spec->{n} ? "[no]$spec->{l}" : $spec->{l};
         my $short = $spec->{t};
         my $desc  = $spec->{d};
         if ( $spec->{y} && $spec->{y} eq 'm' ) {
            my ($s) = $desc =~ m/\(suffix (.)\)/;
            $s    ||= 's';
            $desc =~ s/\s+\(suffix .\)//;
            $desc .= ".  Optional suffix s=seconds, m=minutes, h=hours, "
                   . "d=days; if no suffix, $s is used.";
         }
         $desc = join("\n$rpad", grep { $_ } $desc =~ m/(.{0,$rcol})(?:\s+|$)/g);
         $desc =~ s/ +$//mg;
         if ( $short ) {
            $usage .= sprintf("  --%-${maxs}s -%s  %s\n", $long, $short, $desc);
         }
         else {
            $usage .= sprintf("  --%-${lcol}s  %s\n", $long, $desc);
         }
      }
   }

   if ( (my @instr = @{$self->{instr}}) ) {
      $usage .= join("\n", map { "  $_" } @instr) . "\n";
   }
   if ( $self->{dsn} ) {
      $usage .= "\n" . $self->{dsn}->usage();
   }
   $usage .= "\nOptions and values after processing arguments:\n";
   foreach my $spec ( sort { $a->{l} cmp $b->{l} } @specs ) {
      my $val   = $vals{$spec->{k}};
      my $type  = $spec->{y} || '';
      my $bool  = $spec->{s} =~ m/^[\w-]+(?:\|[\w-])?!?$/;
      $val      = $bool                     ? ( $val ? 'TRUE' : 'FALSE' )
                : !defined $val             ? '(No value)'
                : $type eq 'd'              ? $self->{dsn}->as_string($val)
                : $type =~ m/H|h/           ? join(',', sort keys %$val)
                : $type =~ m/A|a/           ? join(',', @$val)
                :                             $val;
      $usage .= sprintf("  --%-${lcol}s  %s\n", $spec->{l}, $val);
   }
   return $usage;
}

sub pod_to_spec {
   my ( $self, $file ) = @_;

   my %types = (
      'time' => 'm',
      'int'  => 'i',
      string => 's',
      hash   => 'h',
      Hash   => 'H',
      array  => 'a',
      Array  => 'A',
      size   => 'z',
      DSN    => 'd',
      float  => 'f',
   );

   my @spec = ();
   my @special_options = ();
   $file ||= __FILE__;
   open my $fh, "<", $file or die "Can't open $file: $OS_ERROR";
   my $para;
   my $option;

   local $INPUT_RECORD_SEPARATOR = '';
   while ( $para = <$fh> ) {
      next unless $para =~ m/^=head1 OPTIONS/;
      last;
   }

   while ( $para = <$fh> ) {
      MKDEBUG && _d($para);
      last if $para =~ m/^=over/;
      chomp $para;
      $para =~ s/\s+/ /g;
      $para =~ s/$POD_link_re/$1/go;
      push @special_options, $para;
   }

   do {
      if ( ($option) = $para =~ m/^=item --(.*)/ ) {
         MKDEBUG && _d($para);
         my %props;
         $para = <$fh>;
         if ( $para =~ m/: / ) {
            $para =~ s/\s+\Z//g;
            %props = map { split(/: /, $_) } split(/; /, $para);
            if ( $props{'short form'} ) {
               $props{'short form'} =~ s/-//;
            }
            $para = <$fh>;
         }
         $para =~ s/\s+\Z//g;
         $para =~ s/\s+/ /g;
         $para =~ s/$POD_link_re/$1/go;
         if ( $para =~ m/^[^.]+\.$/ ) {
            $para =~ s/\.$//;
         }
         push @spec, {
            s => $option
               . ( $props{'short form'} ? '|' . $props{'short form'} : '' )
               . ( $props{'negatable'}  ? '!'                        : '' )
               . ( $props{'cumulative'} ? '+'                        : '' )
               . ( $props{type}         ? '=' . $types{$props{type}} : '' ),
            d => $para
               . (defined $props{default} ? " (default $props{default})" : ''),
         };
      }
      while ( $para = <$fh> ) {
         last unless $para;

         if ( $option ) {
            if ( my ($line)
                  = $para =~ m/(allowed with --$option[:]?.*?)\./ ) {
               1 while ( $line =~ s/$POD_link_re/$1/go );
               push @special_options, $line;
            }
         }

         if ( $para =~ m/^=head1/ ) {
            $para = undef; # Can't 'last' out of a do {} block.
            last;
         }
         last if $para =~ m/^=item --/;
      }
   } while ( $para );

   close $fh;
   return @spec, @special_options;
}

sub prompt_noecho {
   shift @_ if ref $_[0] eq __PACKAGE__;
   my ( $prompt ) = @_;
   local $OUTPUT_AUTOFLUSH = 1;
   print $prompt
      or die "Cannot print: $OS_ERROR";
   my $response;
   eval {
      require Term::ReadKey;
      Term::ReadKey::ReadMode('noecho');
      chomp($response = <STDIN>);
      Term::ReadKey::ReadMode('normal');
      print "\n"
         or die "Cannot print: $OS_ERROR";
   };
   if ( $EVAL_ERROR ) {
      die "Cannot read response; is Term::ReadKey installed? $EVAL_ERROR";
   }
   return $response;
}

sub groups {
   my ( $self, @groups ) = @_;
   push @{$self->{groups}}, @groups;
}

sub _d {
   my ( $line ) = (caller(0))[2];
   print "# OptionParser:$line $PID ", @_, "\n";
}

if ( MKDEBUG ) {
   print '# ', $^X, ' ', $], "\n";
   my $uname = `uname -a`;
   if ( $uname ) {
      $uname =~ s/\s+/ /g;
      print "# $uname\n";
   }
   printf("# %s  Ver %s Distrib %s Changeset %s line %d\n",
      $PROGRAM_NAME, ($main::VERSION || ''), ($main::DISTRIB || ''),
      ($main::SVN_REV || ''), __LINE__);
   print('# Arguments: ',
      join(' ', map { my $a = "_[$_]_"; $a =~ s/\n/\n# /g; $a; } @ARGV), "\n");
}

1;

# ###########################################################################
# End OptionParser package
# ###########################################################################

# ###########################################################################
# Transformers package 2627
# ###########################################################################

package Transformers;

use strict;
use warnings FATAL => 'all';
use English qw(-no_match_vars);

use constant MKDEBUG => $ENV{MKDEBUG};

require Exporter;
our @ISA         = qw(Exporter);
our %EXPORT_TAGS = ();
our @EXPORT      = ();
our @EXPORT_OK   = qw(
   micro_t
   percentage_of
   secs_to_time
   shorten 
   ts
);

sub micro_t {
   my ( $t, %args ) = @_;
   my $p_ms = defined $args{p_ms} ? $args{p_ms} : 3;  # precision for ms vals
   my $p_s  = defined $args{p_s}  ? $args{p_s}  : 6;  # precision for s vals
   my $f;

   $t = 0 if $t < 0;

   $t = sprintf('%.17f', $t) if $t =~ /e/;

   $t =~ s/\.(\d{1,6})\d*/\.$1/;

   if ($t > 0 && $t <= 0.000999) {
      $f = ($t * 1000000) . 'us';
   }
   elsif ($t >= 0.001000 && $t <= 0.999999) {
      $f = sprintf("%.${p_ms}f", $t * 1000);
      $f = ($f * 1) . 'ms'; # * 1 to remove insignificant zeros
   }
   elsif ($t >= 1) {
      $f = sprintf("%.${p_s}f", $t);
      $f = ($f * 1) . 's'; # * 1 to remove insignificant zeros
   }
   else {
      $f = 0;  # $t should = 0 at this point
   }

   return $f;
}

sub percentage_of {
   my ( $is, $of, %args ) = @_;
   my $p   = defined $args{p} ? $args{p} : 2; # float precision
   my $fmt = $p ? "%.${p}f" : "%d";
   return sprintf $fmt, ($is * 100) / ($of ||= 1);
}

sub secs_to_time {
   my ( $secs, $fmt ) = @_;
   $secs ||= 0;
   return '00:00' unless $secs;

   $fmt ||= $secs >= 86_400 ? 'd'
          : $secs >= 3_600  ? 'h'
          :                   'm';

   return
      $fmt eq 'd' ? sprintf(
         "%d+%02d:%02d:%02d",
         int($secs / 86_400),
         int(($secs % 86_400) / 3_600),
         int(($secs % 3_600) / 60),
         $secs % 60)
      : $fmt eq 'h' ? sprintf(
         "%02d:%02d:%02d",
         int(($secs % 86_400) / 3_600),
         int(($secs % 3_600) / 60),
         $secs % 60)
      : sprintf(
         "%02d:%02d",
         int(($secs % 3_600) / 60),
         $secs % 60);
}

sub shorten {
   my ( $num, %args ) = @_;
   my $p = defined $args{p} ? $args{p} : 2;     # float precision
   my $d = defined $args{d} ? $args{d} : 1_024; # divisor
   my $n = 0;

   while ( $num >= $d ) {
      $num /= $d;
      ++$n;
   }
   return sprintf(
      $num =~ m/\./ || $n
         ? "%.${p}f%s"
         : '%d',
      $num, ('','k','M','G', 'T')[$n]);
}

sub ts {
   my ( $time ) = @_;
   my ( $sec, $min, $hour, $mday, $mon, $year )
      = localtime($time);
   $mon  += 1;
   $year += 1900;
   return sprintf("%d-%02d-%02dT%02d:%02d:%02d",
      $year, $mon, $mday, $hour, $min, $sec);
}

sub _d {
   my ( $line ) = (caller(0))[2];
   print "# Transformers:$line $PID ", @_, "\n";
}

1;

# ###########################################################################
# End Transformers package
# ###########################################################################

# ###########################################################################
# QueryRewriter package 2713
# ###########################################################################
use strict;
use warnings FATAL => 'all';

package QueryRewriter;

use English qw(-no_match_vars);

use constant MKDEBUG => $ENV{MKDEBUG};

my $quote_re = qr/"(?:(?!(?<!\\)").)*"|'(?:(?!(?<!\\)').)*'/; # Costly!
my $bal;
$bal         = qr/
                  \(
                  (?:
                     (?> [^()]+ )    # Non-parens without backtracking
                     |
                     (??{ $bal })    # Group with matching parens
                  )*
                  \)
                 /x;

my $olc_re = qr/(?:--|#)[^'"\r\n]*/;   # One-line comments
my $mlc_re = qr#/\*[^!].*?\*/#sm;      # Multi-line comments, but not /*!version */

sub new {
   my ( $class ) = @_;
   bless {}, $class;
}

sub strip_comments {
   my ( $self, $query ) = @_;
   $query =~ s/$olc_re//go;
   $query =~ s/$mlc_re//go;
   return $query;
}

sub fingerprint {
   my ( $self, $query ) = @_;
   $query =~ m#\ASELECT /\*!40001 SQL_NO_CACHE \*/ \* FROM `# # mysqldump query
      && return 'mysqldump';
   $query =~ m#/\*\w+\.\w+:\d/\d\*/#     # mk-table-checksum, etc query
      && return 'maatkit';
   $query =~ s/$olc_re//go;
   $query =~ s/$mlc_re//go;
   $query =~ s/\Ause \S+\Z/use ?/i       # Abstract the DB in USE
      && return $query;

   $query =~ s/\\["']//g;                # quoted strings
   $query =~ s/".*?"/?/g;                # quoted strings
   $query =~ s/'.*?'/?/g;                # quoted strings

   $query =~ s{                          # Anything vaguely resembling numbers
      (?<=[^0-9+-])
      [0-9+-].*?
      (?=[^0-9a-f.xb+-]|\Z)
      }{?}gx;
   $query =~ s/[xb.+-]\?/?/g;            # Clean up leftovers
   $query =~ s/\A\s+//;                  # Chop off leading whitespace
   $query =~ tr[ \n\t\r\f][ ]s;          # Collapse whitespace
   $query = lc $query;
   $query =~ s{
               \b(in|values?)(?:[\s,]*\([\s?,]*\))+
              }
              {$1(?+)}gx;      # Collapse IN() and VALUES() lists
   $query =~ s{
               \b(select\s.*?)(?:(\sunion(?:\sall)?)\s\1)+
              }
              {$1 /*repeat$2*/}xg; # UNION
   $query =~ s/\blimit \?(?:, ?\?| offset \?)?/limit ?/; # LIMIT
   return $query;
}

sub convert_to_select {
   my ( $self, $query ) = @_;
   return unless $query;
   $query =~ s{
                 \A.*?
                 update\s+(.*?)
                 \s+set\b(.*?)
                 (?:\s*where\b(.*?))?
                 (limit\s*\d+(?:\s*,\s*\d+)?)?
                 \Z
              }
              {__update_to_select($1, $2, $3, $4)}exsi
      || $query =~ s{
                    \A.*?
                    (?:insert|replace)\s+
                    .*?\binto\b(.*?)\(([^\)]+)\)\s*
                    values?\s*(\(.*?\))\s*
                    (?:\blimit\b|on\s*duplicate\s*key.*)?\s*
                    \Z
                 }
                 {__insert_to_select($1, $2, $3)}exsi
      || $query =~ s{
                    \A.*?
                    delete\s+(.*?)
                    \bfrom\b(.*)
                    \Z
                 }
                 {__delete_to_select($1, $2)}exsi;
   $query =~ s/\s*on\s+duplicate\s+key\s+update.*\Z//si;
   $query =~ s/\A.*?(?=\bSELECT\s*\b)//ism;
   return $query;
}

sub convert_select_list {
   my ( $self, $query ) = @_;
   $query =~ s{
               \A\s*select(.*?)\bfrom\b
              }
              {$1 =~ m/\*/ ? "select 1 from" : "select isnull(coalesce($1)) from"}exi;
   return $query;
}

sub __delete_to_select {
   my ( $delete, $join ) = @_;
   if ( $join =~ m/\bjoin\b/ ) {
      return "select 1 from $join";
   }
   return "select * from $join";
}

sub __insert_to_select {
   my ( $tbl, $cols, $vals ) = @_;
   MKDEBUG && _d('Args: ', @_);
   my @cols = split(/,/, $cols);
   MKDEBUG && _d('Cols: ', @cols);
   $vals =~ s/^\(|\)$//g; # Strip leading/trailing parens
   my @vals = $vals =~ m/($quote_re|[^,]*${bal}[^,]*|[^,]+)/g;
   MKDEBUG && _d('Vals: ', @vals);
   if ( @cols == @vals ) {
      return "select * from $tbl where "
         . join(' and ', map { "$cols[$_]=$vals[$_]" } (0..$#cols));
   }
   else {
      return "select * from $tbl limit 1";
   }
}

sub __update_to_select {
   my ( $from, $set, $where, $limit ) = @_;
   return "select $set from $from "
      . ( $where ? "where $where" : '' )
      . ( $limit ? " $limit "      : '' );
}

sub wrap_in_derived {
   my ( $self, $query ) = @_;
   return unless $query;
   return $query =~ m/\A\s*select/i
      ? "select 1 from ($query) as x limit 1"
      : $query;
}

sub _d {
   my ( $line ) = (caller(0))[2];
   @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } @_;
   print "# QueryRewriter:$line $PID ", @_, "\n";
}

1;

# ###########################################################################
# End QueryRewriter package
# ###########################################################################

# ###########################################################################
# LogParser package 2710
# ###########################################################################
package LogParser;

use strict;
use warnings FATAL => 'all';
use English qw(-no_match_vars);

use constant MKDEBUG => $ENV{MKDEBUG};

sub new {
   my ( $class ) = @_;
   bless {}, $class;
}

my $general_log_first_line = qr{
   \A
   (?:(\d{6}\s+\d{1,2}:\d\d:\d\d)|\t)? # Timestamp
   \t
   (?:\s*(\d+))                        # Thread ID
   \s
   (.*)                                # Everything else
   \Z
}xs;

my $general_log_any_line = qr{
   \A(
      Connect
      |Field\sList
      |Init\sDB
      |Query
      |Quit
   )
   (?:\s+(.*\Z))?
}xs;

my $slow_log_ts_line = qr/^# Time: (\d{6}\s+\d{1,2}:\d\d:\d\d)/;
my $slow_log_uh_line = qr/# User\@Host: ([^\[]+|\[[^[]+\]).*?@ (\S*) \[(.*)\]/;

my $binlog_line_1 = qr{^# at (\d+)};
my $binlog_line_2 = qr/^#(\d{6}\s+\d{1,2}:\d\d:\d\d)\s+server\s+id\s+(\d+)\s+end_log_pos\s+(\d+)\s+(\S+)\s*([^\n]*)$/;
my $binlog_line_2_rest = qr{Query\s+thread_id=(\d+)\s+exec_time=(\d+)\s+error_code=(\d+)};

sub parse_event {
   my ( $self, $fh, $code, $mode ) = @_;
   my $event; # Don't initialize, that'll cause a loop.

   my $done = 0;
   my $type = 0; # 0 = comments, 1 = USE and SET etc, 2 = the actual query
   my $line = defined $self->{last_line} ? $self->{last_line} : <$fh>;
   $mode  ||= '';

   LINE:
   while ( !$done && defined $line ) {
      MKDEBUG && _d('type: ', $type, ' ', $line);
      my $handled_line = 0;

      if ( !$mode && $line =~ m/^# [A-Z]/ ) {
         MKDEBUG && _d('Setting mode to slow log');
         $mode ||= 'slow';
      }

      if ( $line =~ m/Version:.+ started with:/ ) {
         MKDEBUG && _d('Chomping out header lines');
         <$fh>; # Tcp port: etc
         <$fh>; # Column headers
         $line = <$fh>;
         $type = 0;
         redo LINE;
      }

      elsif ( $mode ne 'slow'
         && (my ( $ts, $id, $rest ) = $line =~ m/$general_log_first_line/s)
      ) {
         MKDEBUG && _d('Beginning of general log event');
         $handled_line = 1;
         $mode ||= 'log';
         $self->{last_line} = undef;
         if ( $type == 0 ) {
            MKDEBUG && _d('Type 0');
            my ( $cmd, $arg ) = $rest =~ m/$general_log_any_line/;
            $event = {
               ts  => $ts || '',
               id  => $id,
               cmd => $cmd,
               arg => $arg || '',
            };
            if ( $cmd ne 'Query' ) {
               MKDEBUG && _d('Not a query, done with this event');
               $done = 1;
               chomp $event->{arg} if $event->{arg};
            }
            $type = 2;
         }
         else {
            MKDEBUG && _d('Saving line for next invocation');
            $self->{last_line} = $line;
            $done = 1;
            chomp $event->{arg} if $event->{arg};
         }
      }

      elsif ( $mode eq 'slow' ) {
         if ( $line =~ m/^# No InnoDB statistics available/ ) {
            $handled_line = 1;
            MKDEBUG && _d('Ignoring line');
            $line = <$fh>;
            $type = 0;
            next LINE;
         }

         elsif ( my ( $time ) = $line =~ m/$slow_log_ts_line/ ) {
            $handled_line = 1;
            MKDEBUG && _d('Beginning of slow log event');
            $self->{last_line} = undef;
            if ( $type == 0 ) {
               MKDEBUG && _d('Type 0');
               $event->{ts} = $time;
               if ( my ( $user, $host, $ip ) = $line =~ m/$slow_log_uh_line/ ) {
                  @{$event}{qw(user host ip)} = ($user, $host, $ip);
               }
            }
            else {
               MKDEBUG && _d('Saving line for next invocation');
               $self->{last_line} = $line;
               $done = 1;
            }
            $type = 0;
         }

         elsif ( my ( $user, $host, $ip ) = $line =~ m/$slow_log_uh_line/ ) {
            $handled_line = 1;
            if ( $type == 0 ) {
               MKDEBUG && _d('Type 0');
               @{$event}{qw(user host ip)} = ($user, $host, $ip);
            }
            else {
               MKDEBUG && _d('Saving line for next invocation');
               $self->{last_line} = $line;
               $done = 1;
            }
            $type = 0;
         }

         elsif ( $line =~ m/^# / && (my %hash = $line =~ m/(\w+):\s+(\S+)/g ) ) {
            
            if ( $type == 0 ) {
               if ( $line =~ m/^#.+;/ ) {
                  MKDEBUG && _d('Commented event line ends header');
               }
               else {
                  $handled_line = 1;
                  MKDEBUG && _d('Splitting line into fields');
                  @{$event}{keys %hash} = values %hash;
               }
            }
            elsif ( $type == 1 && $line =~ m/^#.+;/ ) {
               MKDEBUG && _d('Commented event line after type 1 line');
               $handled_line = 0;
            }
            else {
               $handled_line = 1;
               MKDEBUG && _d('Saving line for next invocation');
               $self->{last_line} = $line;
               $done = 1;
            }
            $type = 0;
         }
      }

      if ( !$handled_line ) {
         $event->{cmd} = 'Query';
         if ( $mode eq 'slow' && $line =~ m/;\s+\Z/ ) {
            MKDEBUG && _d('Line is the end of a query within event');
            if ( my ( $db ) = $line =~ m/^use (.*);/i ) {
               MKDEBUG && _d('Setting event DB to ', $db);
               $event->{db} = $db;
               $type = 1;
            }
            elsif ( $type < 2 && (my ( $setting ) = $line =~ m/^(SET .*);\s+\Z/ ) ) {
               MKDEBUG && _d('Setting a property for event');
               push @{$event->{settings}}, $setting;
               $type = 1;
            }
            else {
               MKDEBUG && _d('Line is a continuation of prev line');
               if ( $line =~ m/^# / ) {
                  MKDEBUG && _d('Line is a commented event line');
                  $line =~ s/.+: (.+);\n/$1/;
                  $event->{cmd} = 'Admin';
               }
               $event->{arg} .= $line;
               $type = 2;
            }
         }
         else {
            MKDEBUG && _d('Line is a continuation of prev line');
            $event->{arg} .= $line;
            $type = 2;
         } 
      }

      $event->{NR} = $NR;

      $line = <$fh> unless $done;
   }

   if ( !defined $line ) {
      MKDEBUG && _d('EOF found');
      $self->{last_line} = undef;
   }

   if ( $mode && $mode eq 'slow' ) {
      MKDEBUG && _d('Slow log, trimming');
      $event->{arg} =~ s/;\s*\Z// if $event->{arg};
   }

   $code->($event) if $event && $code;
   return $event;
}

sub parse_slowlog_event {
   my ( $self, $fh, $callbacks ) = @_;
   my $num_events = 0;

   my @pending;
   local $INPUT_RECORD_SEPARATOR = ";\n#";
   my $trimlen    = length($INPUT_RECORD_SEPARATOR);
   my $pos_in_log = tell($fh);
   my $stmt;

   EVENT:
   while ( defined($stmt = shift @pending) or defined($stmt = <$fh>) ) {
      my @properties = ('cmd', 'Query', 'pos_in_log', $pos_in_log);
      $pos_in_log = tell($fh);

      if ( $stmt =~ s{
            ^(?:
            Tcp\sport:\s+\d+
            |
            /.*Version.*started
            |
            Time\s+Id\s+Command
            ).*\n
         }{}gmxo
      ){
         my @chunks = split(/$INPUT_RECORD_SEPARATOR/o, $stmt);
         if ( @chunks > 1 ) {
            $stmt = shift @chunks;
            unshift @pending, @chunks;
         }
      }

      $stmt = '#' . $stmt unless $stmt =~ m/\A#/;
      $stmt =~ s/;\n#?\Z//;


      my ($got_ts, $got_uh, $got_ac, $got_db, $got_set);
      my $pos = 0;
      my $len = length($stmt);
      my $found_arg = 0;
      LINE:
      while ( $stmt =~ m/^(.*)$/mg ) { # /g is important, requires scalar match.
         $pos     = pos($stmt);  # Be careful not to mess this up!
         my $line = $1;          # Necessary for /g and pos() to work.

         if ($line =~ m/^(?:#|use |SET (?:last_insert_id|insert_id|timestamp))/oi) {

            if ( !$got_ts
               && (my ( $time ) = $line =~ m/$slow_log_ts_line/o)
               && ++$got_ts
            ) {
               push @properties, 'ts', $time;
               if ( !$got_uh
                  && ( my ( $user, $host, $ip ) = $line =~ m/$slow_log_uh_line/o )
                  && ++$got_uh
               ) {
                  push @properties, 'user', $user, 'host', $host, 'ip', $ip;
               }
            }

            elsif ( !$got_uh
                  && ( my ( $user, $host, $ip ) = $line =~ m/$slow_log_uh_line/o )
                  && ++$got_uh
            ) {
               push @properties, 'user', $user, 'host', $host, 'ip', $ip;
            }

            elsif ( !$got_ac
                  && $line =~ m/^# (?:administrator command:.*)$/
                  && ++$got_ac
            ) {
               push @properties, 'cmd', 'Admin', 'arg', $line;
               $found_arg++;
            }

            elsif ( my @temp = $line =~ m/(\w+):\s+(\S+)/g ) {
               push @properties, @temp;
            }

            elsif ( !$got_db
                  && (my ( $db ) = $line =~ m/^USE ([^;]+)/i )
                  && ++$got_db
            ) {
               push @properties, 'db', $db;
            }

            elsif ( !$got_set
                  && ( my ( $setting ) = $line =~ m/^SET\s+([^;]*)/i )
                  && ++$got_set
            ) {
               push @properties, split(/,|\s*=\s*/, $setting);
            }

            if ( !$found_arg && $pos == $len ) {
               local $INPUT_RECORD_SEPARATOR = ";\n";
               if ( defined(my $l = <$fh>) ) {
                  chomp $l;
                  push @properties, 'cmd', 'Admin', 'arg', '#' . $l;
                  $found_arg++;
               }
               else {
                  next EVENT;
               }
            }
         }
         else {
            push @properties, 'arg', substr($stmt, $pos - length($line));
            last LINE;
         }
      }

      my $event = { @properties };
      CALLBACK:
      foreach my $callback ( @$callbacks ) {
         last CALLBACK unless $callback->($event);
      }
      ++$num_events;
   }
   return $num_events;
}

sub parse_binlog_event {
   my ( $self, $fh, $code ) = @_;
   my $event;

   my $term  = $self->{term} || ";\n"; # Corresponds to DELIMITER
   my $tpat  = quotemeta $term;
   local $RS = $term;
   my $line  = <$fh>;

   LINE: {
      return unless $line;

      if ( $line =~ m/^DELIMITER/m ) {
         my($del)      = $line =~ m/^DELIMITER ([^\n]+)/m;
         $self->{term} = $del;
         local $RS     = $del;
         $line         = <$fh>; # Throw away DELIMITER line
         MKDEBUG && _d('New record separator: ', $del);
         redo LINE;
      }

      $line =~ s/$tpat\Z//;

      if ( my ( $offset ) = $line =~ m/$binlog_line_1/m ) {
         $self->{last_line} = undef;
         $event = {
            offset => $offset,
         };
         my ( $ts, $sid, $end, $type, $rest ) = $line =~ m/$binlog_line_2/m;
         @{$event}{qw(ts server_id end type)} = ($ts, $sid, $end, $type);
         (my $arg = $line) =~ s/\n*^#.*\n//gm; # Remove comment lines
         $event->{arg} = $arg;
         if ( $type eq 'Xid' ) {
            my ($xid) = $rest =~ m/(\d+)/;
            $event->{xid} = $xid;
         }
         elsif ( $type eq 'Query' ) {
            @{$event}{qw(id time code)} = $rest =~ m/$binlog_line_2_rest/;
         }
         else {
            die "Unknown event type $type"
               unless $type =~ m/Rotate|Start|Execute_load_query|Append_block|Begin_load_query|Rand|User_var|Intvar/;
         }
      }
      else {
         $event = {
            arg => $line,
         };
      }
   }

   if ( !defined $line ) {
      delete $self->{term};
   }

   $code->($event) if $event && $code;
   return $event;
}

sub _d {
   my ( $line ) = (caller(0))[2];
   print "# LogParser:$line $PID ", @_, "\n";
}

1;

# ###########################################################################
# End LogParser package
# ###########################################################################

# ###########################################################################
# SQLMetrics package 2704
# ###########################################################################

package SQLMetrics;

use strict;
use warnings FATAL => 'all';
use English qw(-no_match_vars);
use POSIX qw(floor);

use constant MKDEBUG => $ENV{MKDEBUG};

sub new {
   my ( $class, %args ) = @_;
   foreach my $arg ( qw(group_by attributes) ) {
      die "I need a $arg argument" unless $args{$arg};
   }

   my %attributes = map {
      my ($name, @aliases) = split qr/\|/, $_;
      $name => \@aliases;
   } @{$args{attributes}};

   my $self = {
      group_by     => $args{group_by},
      attributes   => \%attributes,
      handlers     => $args{handlers} || {},
      worst_attrib => $args{worst_attrib},
      metrics      => { all => {}, unique => {} },
      n_events     => 0,
      n_queries    => 0,
      unroll_limit => 50,
   };

   return bless $self, $class;
}

sub make_handler {
   my ( $self, $attrib, $event, %args ) = @_;
   die "I need an attrib" unless defined $attrib;
   return unless $event;
   my ($val) =
      grep { defined $_ }
      map  { $event->{$_} }
           ( $attrib, @{ $args{alt} || [] } );
   return unless defined $val; # Can't decide type if it's undef.

   my $float_re = qr{[+-]?(?:(?=\d|[.])\d*(?:[.])\d{0,})?(?:[E](?:[+-]?\d+)|)}i;
   my $type = $val  =~ m/^(?:\d+|$float_re)$/o ? 'num'
            : $val  =~ m/^(?:Yes|No)$/         ? 'bool'
            :                                    'string';
   MKDEBUG && _d("Type for $attrib is $type (sample: $val)");

   %args = ( # Set up defaults
      min => 1,
      max => 1,
      sum => $type =~ m/num|bool/    ? 1 : 0,
      cnt => $type eq 'string'       ? 0 : 1,
      unq => $type =~ m/bool|string/ ? 1 : 0,
      all => $type eq 'num'          ? 1 : 0,
      glo => 1,
      trf => ($type eq 'bool') ? q{($val || '' eq 'Yes') ? 1 : 0} : undef,
      wor => 0,
      alt => [],
      %args,
   );

   my @lines; # Lines of code for the subroutine
   if ( $args{trf} ) {
      push @lines, q{$val = } . $args{trf} . ';';
   }

   foreach my $place ( $args{glo} ? qw($class $global) : qw($class) ) {
      my @tmp;
      if ( $args{min} ) {
         my $op   = $type eq 'num' ? '<' : 'lt';
         push @tmp, 'PLACE->{min} = $val if !defined PLACE->{min} || $val '
            . $op . ' PLACE->{min};';
      }
      if ( $args{max} ) {
         my $op = ($type eq 'num') ? '>' : 'gt';
         push @tmp, 'PLACE->{max} = $val if !defined PLACE->{max} || $val '
            . $op . ' PLACE->{max};';
      }
      if ( $args{sum} ) {
         push @tmp, 'PLACE->{sum} += $val;';
      }
      if ( $args{cnt} ) {
         push @tmp, '++PLACE->{cnt};';
      }
      if ( $place eq '$class' ) {
         if ( $args{unq} ) {
            push @tmp, '++PLACE->{unq}->{$val};';
         }
         if ( $args{all} ) {
            push @tmp, 'push @{PLACE->{all}}, $val;';
         }
         if ( $args{wor} ) {
            my $op = $type eq 'num' ? '>=' : 'ge';
            push @tmp, (
               'if ( $val ' . $op . ' ($class->{max} || 0) ) {',
               '   $class->{sample} = $event;',
               '}',
            );
         }
      }
      push @lines, map { s/PLACE/$place/g; $_ } @tmp;
   }

   my @unrolled = (
      '$val = $event->{' . $attrib . '};',
      (map { "\$val = \$event->{$_} unless defined \$val;" } @{$args{alt}}),
      'defined $val && do {',
      ( map { s/^/   /gm; $_ } @lines ), # Indent for debugging
      '};',
   );
   $self->{unrolled_for}->{$attrib} = join("\n", @unrolled);

   unshift @lines, (
      'sub {',
      'my ( $event, $class, $global ) = @_;',
      'my $val = $event->{' . $attrib . '};',
      (map { "\$val = \$event->{$_} unless defined \$val;" } @{$args{alt}}),
      'return unless defined $val;',
   );
   push @lines, '}';
   my $code = join("\n", @lines);
   $self->{code_for}->{$attrib} = $code;

   MKDEBUG && _d("Metric handler for $attrib: ", @lines);
   my $sub = eval join("\n", @lines);
   die if $EVAL_ERROR;
   return $sub;
}

sub calc_event_metrics {
   my ( $self, $event ) = @_;

   $self->{n_events}++;

   my $group_by = $event->{ $self->{group_by} };
   return unless defined $group_by;

   $self->{n_queries}++;

   if ( defined $self->{unrolled_loops} ) {
      return $self->{unrolled_loops}->($self, $event, $group_by);
   }

   my @attrs = sort keys %{$self->{attributes}};
   my $fp_ds = $self->{metrics}->{unique}->{ $group_by }
      ||= { map { $_ => {} } @attrs };

   ATTRIB:
   foreach my $attrib ( @attrs ) {
      my $stats_for_attrib = $self->{metrics}->{all}->{ $attrib } ||= {};
      my $stats_for_class  = $fp_ds->{ $attrib } ||= {};

      my $handler = $self->{handlers}->{ $attrib };
      if ( !$handler ) {
         $handler = $self->make_handler(
            $attrib,
            $event,
            wor => (($self->{worst_attrib} || '') eq $attrib),
            alt => $self->{attributes}->{$attrib},
         );
         if ( $handler ) {
            $self->{handlers}->{$attrib} = $handler;
         }
      }
      next ATTRIB unless $handler;
      $handler->($event, $stats_for_class, $stats_for_attrib);
   }

   if ( $self->{n_queries} > $self->{unroll_limit}
      || !grep {ref $self->{handlers}->{$_} ne 'CODE'} @attrs)
   {
      my @attrs = grep { $self->{handlers}->{$_} } @attrs;
      my @handl = @{$self->{handlers}}{@attrs};
      my @st_fa = @{$self->{metrics}->{all}}{@attrs}; # Stats for attribute

      my @lines = (
         'my ( $self, $event, $group_by ) = @_;',
         'my ($val, $class, $global);',
         'my $fp_ds = $self->{metrics}->{unique}->{ $group_by }
            ||= { map { $_ => {} } @attrs };',
         'my @st_fc = @{$fp_ds}{@attrs};', # Stats for class
      );
      foreach my $i ( 0 .. $#attrs ) {
         push @lines, (
            '$class  = $st_fc[' . $i . '];',
            '$global = $st_fa[' . $i . '];',
            $self->{unrolled_for}->{$attrs[$i]},
         );
      }
      @lines = map { s/^/   /gm; $_ } @lines; # Indent for debugging
      unshift @lines, 'sub {';
      push @lines, '}';

      my $code = join("\n", @lines);
      MKDEBUG && _d("Unrolled subroutine: ", @lines);
      my $sub = eval $code;
      die if $EVAL_ERROR;
      $self->{unrolled_loops} = $sub;
   }

   return;
}

sub reset_metrics {
   my ( $self ) = @_;
   $self->{n_events}          = 0;
   $self->{n_queries}         = 0;
   $self->{metrics}->{all}    = {};
   $self->{metrics}->{unique} = {};
   return;
}

sub calculate_statistical_metrics {
   my ( $self, $vals, %args ) = @_;
   my @distro              = qw(0 0 0 0 0 0 0 0);
   my $statistical_metrics = {
      avg       => 0,
      max       => 0,
      stddev    => 0,
      median    => 0,
      distro    => \@distro,
      cutoff    => undef,
   };
   return $statistical_metrics unless defined $vals;

   my $n_vals = scalar @$vals;
   return $statistical_metrics unless $n_vals;

   my $cutoff = $n_vals >= 10 ? int ( scalar @$vals * 0.95 ) : $n_vals;
   $statistical_metrics->{cutoff} = $cutoff;

   my $middle_val_n = int $statistical_metrics->{cutoff} / 2;
   my $previous_val;

   my $sum    = 0; # stddev and 95% avg
   my $sumsq  = 0; # stddev
   my $max    = 0; # 95th percentile
   my $i      = 0; # for knowing when we've reached the 95%
   foreach my $val ( sort { $a <=> $b } @$vals ) {
      if ( defined $val && $val > 0 && $args{distro} ) {
         my $bucket = floor(log($val) / log(10)) + 6;
         $bucket = $bucket > 7 ? 7 : $bucket < 0 ? 0 : $bucket;
         $distro[ $bucket ]++;
      }

      if ( $i < $cutoff ) {
         if ( $i == $middle_val_n ) {
            $statistical_metrics->{median}
               = $cutoff % 2 ? $val : ($previous_val + $val) / 2;
         }

         $sum   += $val;
         $sumsq += ($val **2);
         $max   = $val;
         $i++;

         $previous_val = $val;
      }
   }

   my $stddev = sqrt (($sumsq - (($sum**2) / $cutoff)) / ($cutoff -1 || 1));

   MKDEBUG && _d("95 cutoff $cutoff, sum $sum, sumsq $sumsq, stddev $stddev");

   $statistical_metrics->{stddev} = $stddev;
   $statistical_metrics->{avg}    = $sum / $cutoff;
   $statistical_metrics->{max}    = $max;

   return $statistical_metrics;
}

sub _d {
   my ( $line ) = (caller(0))[2];
   print "# SQLMetrics:$line $PID ", @_, "\n";
}

1;

# ###########################################################################
# End SQLMetrics package
# ###########################################################################

# ###########################################################################
# QueryParser package 2648
# ###########################################################################
package QueryParser;

use strict;
use warnings FATAL => 'all';
use English qw(-no_match_vars);

use Data::Dumper;
$Data::Dumper::Indent = 1;

use constant MKDEBUG => $ENV{MKDEBUG};

sub new {
   my ( $class ) = @_;
   bless {}, $class;
}

sub get_table_ref {
   my ( $self, $query ) = @_;
   return if !defined $query || !$query;
   my $table_ref;

   if ( $query =~ m/FROM\s+(.+?)(?:WHERE|ORDER|LIMIT|HAVING)+.+/is ) {
      $table_ref = $1;
   }
   elsif( $query =~ m/FROM\s+(.+?);?$/is ) {
      chomp($table_ref = $1);
   }

   MKDEBUG && _d($table_ref ? "table ref: $table_ref"
                            : "Failed to parse table ref");

   return $table_ref;
}

sub parse_table_aliases {
   my ( $self, $table_ref ) = @_;
   my $table_aliases = {};
   return $table_aliases if !defined $table_ref || !$table_ref;
   my @tables;

   $table_ref =~ s/\n/ /g;
   $table_ref =~ s/`//g; # Graves break database discovery

   if( $table_ref =~ m/ (:?straight_)?join /i ) {
      $table_ref =~ s/ join /,/ig;
      1 while ($table_ref =~ s/ (?:inner|outer|cross|left|right|natural),/,/ig);
      $table_ref =~ s/ using\s*\(.+?\)//ig;
      $table_ref =~ s/ on \([\w\s=.,]+\),?/,/ig;
      $table_ref =~ s/ on [\w\s=.]+,?/,/ig;
      $table_ref =~ s/ straight_join /,/ig;
   }

   @tables = split /,/, $table_ref;

   my @alias_patterns = (
      qr/\s*(\S+)\s+AS\s+(\S+)\s*/i,
      qr/^\s*(\S+)\s+(\S+)\s*$/,
      qr/^\s*(\S+)+\s*$/, # Not an alias but we save it anyway to be complete
   );

   TABLE:
   foreach my $table ( @tables ) {
      my ( $db_tbl, $alias );

      if ( $table =~ m/\(\s*SELECT\s+/i ) {
         MKDEBUG && _d("Ignoring subquery table: $table");
         next TABLE;
      }

      ALIAS_PATTERN:
      foreach my $alias_pattern ( @alias_patterns ) {
         if ( ( $db_tbl, $alias ) = $table =~ m/$alias_pattern/ ) {
            MKDEBUG && _d("$table matches $alias_pattern");
            last ALIAS_PATTERN;
         }
      }

      if ( defined $db_tbl && $db_tbl ) {
         my ( $db, $tbl ) = $db_tbl =~ m/^(?:(\S+)\.)?(\S+)/;

         $table_aliases->{$alias || $tbl} = $tbl;
         $table_aliases->{DATABASE}->{$tbl} = $db if defined $db && $db;
      }
      elsif ( MKDEBUG ) {
         _d("Failed to parse table alias for $table");
      }
   }

   MKDEBUG && _d('table aliases: ' . Dumper($table_aliases));

   return $table_aliases;
}

sub _d {
   my ( $line ) = (caller(0))[2];
   @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } @_;
   print "# QueryParser:$line $PID ", @_, "\n";
}

1;
# ###########################################################################
# End QueryParser package
# ###########################################################################

# #############################################################################
# MySQLDump package 2468
# #############################################################################
package MySQLDump;

use strict;
use warnings FATAL => 'all';

use English qw(-no_match_vars);

use constant MKDEBUG => $ENV{MKDEBUG};

( our $before = <<'EOF') =~ s/^   //gm;
   /*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
   /*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */;
   /*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */;
   /*!40101 SET NAMES utf8 */;
   /*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */;
   /*!40103 SET TIME_ZONE='+00:00' */;
   /*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */;
   /*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */;
   /*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */;
   /*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */;
EOF

( our $after = <<'EOF') =~ s/^   //gm;
   /*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */;
   /*!40101 SET SQL_MODE=@OLD_SQL_MODE */;
   /*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */;
   /*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */;
   /*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;
   /*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */;
   /*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;
   /*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */;
EOF

# Arguments:
# * cache: defaults to 1
sub new {
   my ( $class, %args ) = @_;
   $args{cache} = 1 unless defined $args{cache};
   my $self = bless \%args, $class;
   return $self;
}

sub dump {
   my ( $self, $dbh, $quoter, $db, $tbl, $what ) = @_;

   if ( $what eq 'table' ) {
      my $ddl = $self->get_create_table($dbh, $quoter, $db, $tbl);
      if ( $ddl->[0] eq 'table' ) {
         return $before
            . 'DROP TABLE IF EXISTS ' . $quoter->quote($tbl) . ";\n"
            . $ddl->[1] . ";\n";
      }
      else {
         return 'DROP TABLE IF EXISTS ' . $quoter->quote($tbl) . ";\n"
            . '/*!50001 DROP VIEW IF EXISTS '
            . $quoter->quote($tbl) . "*/;\n/*!50001 "
            . $self->get_tmp_table($dbh, $quoter, $db, $tbl) . "*/;\n";
      }
   }
   elsif ( $what eq 'triggers' ) {
      my $trgs = $self->get_triggers($dbh, $quoter, $db, $tbl);
      if ( $trgs && @$trgs ) {
         my $result = $before . "\nDELIMITER ;;\n";
         foreach my $trg ( @$trgs ) {
            if ( $trg->{sql_mode} ) {
               $result .= "/*!50003 SET SESSION SQL_MODE=\"$trg->{sql_mode}\" */;;\n";
            }
            $result .= "/*!50003 CREATE */ ";
            if ( $trg->{definer} ) {
               my ( $user, $host )
                  = map { s/'/''/g; "'$_'"; }
                    split('@', $trg->{definer}, 2);
               $result .= "/*!50017 DEFINER=$user\@$host */ ";
            }
            $result .= sprintf("/*!50003 TRIGGER %s %s %s ON %s\nFOR EACH ROW %s */;;\n\n",
               $quoter->quote($trg->{trigger}),
               @{$trg}{qw(timing event)},
               $quoter->quote($trg->{table}),
               $trg->{statement});
         }
         $result .= "DELIMITER ;\n\n/*!50003 SET SESSION SQL_MODE=\@OLD_SQL_MODE */;\n\n";
         return $result;
      }
      else {
         return undef;
      }
   }
   elsif ( $what eq 'view' ) {
      my $ddl = $self->get_create_table($dbh, $quoter, $db, $tbl);
      return '/*!50001 DROP TABLE IF EXISTS ' . $quoter->quote($tbl) . "*/;\n"
         . '/*!50001 DROP VIEW IF EXISTS ' . $quoter->quote($tbl) . "*/;\n"
         . '/*!50001 ' . $ddl->[1] . "*/;\n";
   }
   else {
      die "You didn't say what to dump.";
   }
}

# USEs the given database, and returns the previous default database.
sub _use_db {
   my ( $self, $dbh, $quoter, $new ) = @_;
   if ( !$new ) {
      MKDEBUG && _d('No new DB to use');
      return;
   }
   my $sql = 'SELECT DATABASE()';
   MKDEBUG && _d($sql);
   my $curr = $dbh->selectrow_array($sql);
   if ( $curr && $new && $curr eq $new ) {
      MKDEBUG && _d('Current and new DB are the same');
      return $curr;
   }
   $sql = 'USE ' . $quoter->quote($new);
   MKDEBUG && _d($sql);
   $dbh->do($sql);
   return $curr;
}

sub get_create_table {
   my ( $self, $dbh, $quoter, $db, $tbl ) = @_;
   if ( !$self->{cache} || !$self->{tables}->{$db}->{$tbl} ) {
      my $sql = '/*!40101 SET @OLD_SQL_MODE := @@SQL_MODE, '
         . '@@SQL_MODE := REPLACE(REPLACE(@@SQL_MODE, "ANSI_QUOTES", ""), ",,", ","), '
         . '@OLD_QUOTE := @@SQL_QUOTE_SHOW_CREATE, '
         . '@@SQL_QUOTE_SHOW_CREATE := 1 */';
      MKDEBUG && _d($sql);
      $dbh->do($sql);
      my $curr_db = $self->_use_db($dbh, $quoter, $db);
      $sql = "SHOW CREATE TABLE " . $quoter->quote($db, $tbl);
      MKDEBUG && _d($sql);
      my $href = $dbh->selectrow_hashref($sql);
      $self->_use_db($dbh, $quoter, $curr_db);
      $sql = '/*!40101 SET @@SQL_MODE := @OLD_SQL_MODE, '
         . '@@SQL_QUOTE_SHOW_CREATE := @OLD_QUOTE */';
      MKDEBUG && _d($sql);
      $dbh->do($sql);
      my ($key) = grep { m/create table/i } keys %$href;
      if ( $key ) {
         MKDEBUG && _d('This table is a base table');
         $self->{tables}->{$db}->{$tbl} = [ 'table', $href->{$key} ];
      }
      else {
         MKDEBUG && _d('This table is a view');
         ($key) = grep { m/create view/i } keys %$href;
         $self->{tables}->{$db}->{$tbl} = [ 'view', $href->{$key} ];
      }
   }
   return $self->{tables}->{$db}->{$tbl};
}

sub get_columns {
   my ( $self, $dbh, $quoter, $db, $tbl ) = @_;
   MKDEBUG && _d("Get columns for $db.$tbl");
   if ( !$self->{cache} || !$self->{columns}->{$db}->{$tbl} ) {
      my $curr_db = $self->_use_db($dbh, $quoter, $db);
      my $sql = "SHOW COLUMNS FROM " . $quoter->quote($db, $tbl);
      MKDEBUG && _d($sql);
      my $cols = $dbh->selectall_arrayref($sql, { Slice => {} });
      $self->_use_db($dbh, $quoter, $curr_db);
      $self->{columns}->{$db}->{$tbl} = [
         map {
            my %row;
            @row{ map { lc $_ } keys %$_ } = values %$_;
            \%row;
         } @$cols
      ];
   }
   return $self->{columns}->{$db}->{$tbl};
}

sub get_tmp_table {
   my ( $self, $dbh, $quoter, $db, $tbl ) = @_;
   my $result = 'CREATE TABLE ' . $quoter->quote($tbl) . " (\n";
   $result .= join(",\n",
      map { '  ' . $quoter->quote($_->{field}) . ' ' . $_->{type} }
      @{$self->get_columns($dbh, $quoter, $db, $tbl)});
   $result .= "\n)";
   MKDEBUG && _d($result);
   return $result;
}

sub get_triggers {
   my ( $self, $dbh, $quoter, $db, $tbl ) = @_;
   if ( !$self->{cache} || !$self->{triggers}->{$db} ) {
      $self->{triggers}->{$db} = {};
      my $sql = '/*!40101 SET @OLD_SQL_MODE := @@SQL_MODE, '
         . '@@SQL_MODE := REPLACE(REPLACE(@@SQL_MODE, "ANSI_QUOTES", ""), ",,", ","), '
         . '@OLD_QUOTE := @@SQL_QUOTE_SHOW_CREATE, '
         . '@@SQL_QUOTE_SHOW_CREATE := 1 */';
      MKDEBUG && _d($sql);
      $dbh->do($sql);
      $sql = "SHOW TRIGGERS FROM " . $quoter->quote($db);
      MKDEBUG && _d($sql);
      my $sth = $dbh->prepare($sql);
      $sth->execute();
      if ( $sth->rows ) {
         my $trgs = $sth->fetchall_arrayref({});
         foreach my $trg (@$trgs) {
            # Lowercase the hash keys because the NAME_lc property might be set
            # on the $dbh, so the lettercase is unpredictable.  This makes them
            # predictable.
            my %trg;
            @trg{ map { lc $_ } keys %$trg } = values %$trg;
            push @{ $self->{triggers}->{$db}->{ $trg{table} } }, \%trg;
         }
      }
      $sql = '/*!40101 SET @@SQL_MODE := @OLD_SQL_MODE, '
         . '@@SQL_QUOTE_SHOW_CREATE := @OLD_QUOTE */';
      MKDEBUG && _d($sql);
      $dbh->do($sql);
   }
   if ( $tbl ) {
      return $self->{triggers}->{$db}->{$tbl};
   }
   return values %{$self->{triggers}->{$db}};
}

sub get_databases {
   my ( $self, $dbh, $quoter, $like ) = @_;
   if ( !$self->{cache} || !$self->{databases} || $like ) {
      my $sql = 'SHOW DATABASES';
      my @params;
      if ( $like ) {
         $sql .= ' LIKE ?';
         push @params, $like;
      }
      my $sth = $dbh->prepare($sql);
      MKDEBUG && _d($sql, @params);
      $sth->execute( @params );
      my @dbs = map { $_->[0] } @{$sth->fetchall_arrayref()};
      $self->{databases} = \@dbs unless $like;
      return @dbs;
   }
   return @{$self->{databases}};
}

sub get_table_status {
   my ( $self, $dbh, $quoter, $db, $like ) = @_;
   if ( !$self->{cache} || !$self->{table_status}->{$db} || $like ) {
      my $sql = "SHOW TABLE STATUS FROM " . $quoter->quote($db);
      my @params;
      if ( $like ) {
         $sql .= ' LIKE ?';
         push @params, $like;
      }
      MKDEBUG && _d($sql, @params);
      my $sth = $dbh->prepare($sql);
      $sth->execute(@params);
      my @tables = @{$sth->fetchall_arrayref({})};
      @tables = map {
         my %tbl; # Make a copy with lowercased keys
         @tbl{ map { lc $_ } keys %$_ } = values %$_;
         $tbl{engine} ||= $tbl{type} || $tbl{comment};
         delete $tbl{type};
         \%tbl;
      } @tables;
      $self->{table_status}->{$db} = \@tables unless $like;
      return @tables;
   }
   return @{$self->{table_status}->{$db}};
}

sub get_table_list {
   my ( $self, $dbh, $quoter, $db, $like ) = @_;
   if ( !$self->{cache} || !$self->{table_list}->{$db} || $like ) {
      my $sql = "SHOW /*!50002 FULL*/ TABLES FROM " . $quoter->quote($db);
      my @params;
      if ( $like ) {
         $sql .= ' LIKE ?';
         push @params, $like;
      }
      MKDEBUG && _d($sql, @params);
      my $sth = $dbh->prepare($sql);
      $sth->execute(@params);
      my @tables = @{$sth->fetchall_arrayref()};
      @tables = map {
         my %tbl = (
            name   => $_->[0],
            engine => ($_->[1] || '') eq 'VIEW' ? 'VIEW' : '',
         );
         \%tbl;
      } @tables;
      $self->{table_list}->{$db} = \@tables unless $like;
      return @tables;
   }
   return @{$self->{table_list}->{$db}};
}


sub _d {
   my ( $line ) = (caller(0))[2];
   @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } @_;
   print "# MySQLDump:$line $PID ", @_, "\n";
}

1;

# #############################################################################
# End MySQLDump package
# #############################################################################

# ###########################################################################
# TableParser package 2631
# ###########################################################################
package TableParser;

use strict;
use warnings FATAL => 'all';
use English qw(-no_match_vars);

use List::Util qw(min);

use constant MKDEBUG => $ENV{MKDEBUG};

sub new {
   my ( $class ) = @_;
   return bless {}, $class;
}


sub parse {
   my ( $self, $ddl, $opts ) = @_;

   if ( ref $ddl eq 'ARRAY' ) {
      if ( lc $ddl->[0] eq 'table' ) {
         $ddl = $ddl->[1];
      }
      else {
         return {
            engine => 'VIEW',
         };
      }
   }

   if ( $ddl !~ m/CREATE (?:TEMPORARY )?TABLE `/ ) {
      die "Cannot parse table definition; is ANSI quoting "
         . "enabled or SQL_QUOTE_SHOW_CREATE disabled?";
   }

   $ddl =~ s/(`[^`]+`)/\L$1/g;

   my ( $engine ) = $ddl =~ m/\).*?(?:ENGINE|TYPE)=(\w+)/;
   MKDEBUG && _d('Storage engine: ', $engine);

   my @defs = $ddl =~ m/^(\s+`.*?),?$/gm;
   my @cols = map { $_ =~ m/`([^`]+)`/g } @defs;
   MKDEBUG && _d('Columns: ' . join(', ', @cols));

   my %def_for;
   @def_for{@cols} = @defs;

   my (@nums, @null);
   my (%type_for, %is_nullable, %is_numeric, %is_autoinc);
   foreach my $col ( @cols ) {
      my $def = $def_for{$col};
      my ( $type ) = $def =~ m/`[^`]+`\s([a-z]+)/;
      die "Can't determine column type for $def" unless $type;
      $type_for{$col} = $type;
      if ( $type =~ m/(?:(?:tiny|big|medium|small)?int|float|double|decimal|year)/ ) {
         push @nums, $col;
         $is_numeric{$col} = 1;
      }
      if ( $def !~ m/NOT NULL/ ) {
         push @null, $col;
         $is_nullable{$col} = 1;
      }
      $is_autoinc{$col} = $def =~ m/AUTO_INCREMENT/i ? 1 : 0;
   }

   my %keys;
   foreach my $key ( $ddl =~ m/^  ((?:[A-Z]+ )?KEY .*)$/gm ) {

      if ( $engine !~ m/MEMORY|HEAP/ ) {
         $key =~ s/USING HASH/USING BTREE/;
      }

      my ( $type, $cols ) = $key =~ m/(?:USING (\w+))? \((.+)\)/;
      my ( $special ) = $key =~ m/(FULLTEXT|SPATIAL)/;
      $type = $type || $special || 'BTREE';
      if ( $opts->{mysql_version} && $opts->{mysql_version} lt '004001000'
         && $engine =~ m/HEAP|MEMORY/i )
      {
         $type = 'HASH'; # MySQL pre-4.1 supports only HASH indexes on HEAP
      }

      my ($name) = $key =~ m/(PRIMARY|`[^`]*`)/;
      my $unique = $key =~ m/PRIMARY|UNIQUE/ ? 1 : 0;
      my @cols;
      my @col_prefixes;
      foreach my $col_def ( split(',', $cols) ) {
         my ($name, $prefix) = $col_def =~ m/`([^`]+)`(?:\((\d+)\))?/;
         push @cols, $name;
         push @col_prefixes, $prefix;
      }
      $name =~ s/`//g;
      MKDEBUG && _d("Index $name columns: " . join(', ', @cols));

      $keys{$name} = {
         colnames     => $cols,
         cols         => \@cols,
         col_prefixes => \@col_prefixes,
         unique       => $unique,
         is_col       => { map { $_ => 1 } @cols },
         is_nullable  => scalar(grep { $is_nullable{$_} } @cols),
         type         => $type,
         name         => $name,
      };
   }

   return {
      cols           => \@cols,
      col_posn       => { map { $cols[$_] => $_ } 0..$#cols },
      is_col         => { map { $_ => 1 } @cols },
      null_cols      => \@null,
      is_nullable    => \%is_nullable,
      is_autoinc     => \%is_autoinc,
      keys           => \%keys,
      defs           => \%def_for,
      numeric_cols   => \@nums,
      is_numeric     => \%is_numeric,
      engine         => $engine,
      type_for       => \%type_for,
   };
}

sub sort_indexes {
   my ( $self, $tbl ) = @_;

   my @indexes
      = sort {
         (($a ne 'PRIMARY') <=> ($b ne 'PRIMARY'))
         || ( !$tbl->{keys}->{$a}->{unique} <=> !$tbl->{keys}->{$b}->{unique} )
         || ( $tbl->{keys}->{$a}->{is_nullable} <=> $tbl->{keys}->{$b}->{is_nullable} )
         || ( scalar(@{$tbl->{keys}->{$a}->{cols}}) <=> scalar(@{$tbl->{keys}->{$b}->{cols}}) )
      }
      grep {
         $tbl->{keys}->{$_}->{type} eq 'BTREE'
      }
      sort keys %{$tbl->{keys}};
   
   MKDEBUG && _d('Indexes sorted best-first: ' . join(', ', @indexes));
   return @indexes;
}

sub find_best_index {
   my ( $self, $tbl, $index ) = @_;
   my $best;
   if ( $index ) {
      ($best) = grep { uc $_ eq uc $index } keys %{$tbl->{keys}};
   }
   if ( !$best ) {
      if ( $index ) {
         die "Index '$index' does not exist in table";
      }
      else {
         ($best) = $self->sort_indexes($tbl);
      }
   }
   MKDEBUG && _d("Best index found is " . ($best || 'undef'));
   return $best;
}

sub find_possible_keys {
   my ( $self, $dbh, $database, $table, $quoter, $where ) = @_;
   return () unless $where;
   my $sql = 'EXPLAIN SELECT * FROM ' . $quoter->quote($database, $table)
      . ' WHERE ' . $where;
   MKDEBUG && _d($sql);
   my $expl = $dbh->selectrow_hashref($sql);
   $expl = { map { lc($_) => $expl->{$_} } keys %$expl };
   if ( $expl->{possible_keys} ) {
      MKDEBUG && _d("possible_keys=$expl->{possible_keys}");
      my @candidates = split(',', $expl->{possible_keys});
      my %possible   = map { $_ => 1 } @candidates;
      if ( $expl->{key} ) {
         MKDEBUG && _d("MySQL chose $expl->{key}");
         unshift @candidates, grep { $possible{$_} } split(',', $expl->{key});
         MKDEBUG && _d('Before deduping: ' . join(', ', @candidates));
         my %seen;
         @candidates = grep { !$seen{$_}++ } @candidates;
      }
      MKDEBUG && _d('Final list: ' . join(', ', @candidates));
      return @candidates;
   }
   else {
      MKDEBUG && _d('No keys in possible_keys');
      return ();
   }
}

sub table_exists {
   my ( $self, $dbh, $db, $tbl, $q, $can_insert ) = @_;
   my $db_tbl = $q->quote($db, $tbl);
   my $sql    = $can_insert ? "REPLACE INTO $db_tbl " : '';
   $sql      .= "SELECT * FROM $db_tbl LIMIT 0";
   MKDEBUG && _d("table_exists check for $db_tbl: $sql");
   eval { $dbh->do($sql); };
   MKDEBUG && _d("eval error (if any): $EVAL_ERROR");
   return 0 if $EVAL_ERROR;
   return 1;
}

sub get_engine {
   my ( $self, $ddl, $opts ) = @_;
   my ( $engine ) = $ddl =~ m/\) (?:ENGINE|TYPE)=(\w+)/;
   return $engine || undef;
}

sub get_keys {
   my ( $self, $ddl, $opts ) = @_;

   my @indexes = 
      grep { $_ !~ m/FOREIGN/ }
      $ddl =~ m/((?:\w+ )?KEY .+\))/mg;

   my $engine = $self->get_engine($ddl);
   if ( $engine !~ m/MEMORY|HEAP/ ) {
      @indexes = map { $_ =~ s/USING HASH/USING BTREE/; $_; } @indexes;
   }

   my @keys = map {
      my ( $struct, $cols ) = $_ =~ m/(?:USING (\w+))? \((.+)\)/;
      my ( $special ) = $_ =~ m/(FULLTEXT|SPATIAL)/;
      $struct = $struct || $special || 'BTREE';
      my ( $name ) = $_ =~ m/KEY `(.*?)` \(/;

      if ( $opts->{version} lt '004001000' && $engine =~ m/HEAP|MEMORY/i ) {
         $struct = 'HASH';
      }

      {
         struct   => $struct,
         cols     => $cols,
         name     => $name || 'PRIMARY',
      }
   } @indexes;

   return \@keys;
}

sub get_fks {
   my ( $self, $ddl, $opts ) = @_;

   my @fks = $ddl =~ m/CONSTRAINT .* FOREIGN KEY .* REFERENCES [^\)]*\)/mg;

   my @result = map {
      my ( $name ) = $_ =~ m/CONSTRAINT `(.*?)`/;
      my ( $fkcols ) = $_ =~ m/\(([^\)]+)\)/;
      my ( $cols )   = $_ =~ m/REFERENCES.*?\(([^\)]+)\)/;
      my ( $parent ) = $_ =~ m/REFERENCES (\S+) /;
      if ( $parent !~ m/\./ ) {
         $parent = "`$opts->{database}`.$parent";
      }
      {  name   => $name,
         parent => $parent,
         cols   => $cols,
         fkcols => $fkcols,
      };
   } @fks;
   return \@result;
}

sub get_duplicate_keys {
   my ( $self, $keys, $opts ) = @_;
   my @keys = @$keys;
   my %seen; # Avoid outputting a key more than once.
   my @result;

   foreach my $i ( 0..$#keys - 1 ) {
      foreach my $j ( $i+1..$#keys ) {
         my $i_cols        = $keys[$i]->{cols};
         my $j_cols        = $keys[$j]->{cols};
         my $type_i_cols   = $keys[$i]->{struct};
         my $type_j_cols   = $keys[$j]->{struct};
         my $len_i_cols    = length($i_cols);
         my $len_j_cols    = length($j_cols);
         my $min_len       = min($len_i_cols, $len_j_cols);
         my $both_FULLTEXT = (    $type_i_cols eq 'FULLTEXT'
                               && $type_j_cols eq 'FULLTEXT'
                             ) ? 1 : 0;
         if ( MKDEBUG ) {
            _d( "Checking $type_i_cols $keys[$i]->{name} ($i_cols)"
               ." against $type_j_cols $keys[$j]->{name} ($j_cols)");
         }

         if ( $opts->{ignore_order} || $both_FULLTEXT ) {
            $i_cols = join(',', sort(split(/`/, $i_cols)));
            $j_cols = join(',', sort(split(/`/, $j_cols)));
         }
         if ( ( ($keys[$i]->{struct} eq $keys[$j]->{struct})
                || $opts->{ignore_type}
              )
              && substr($i_cols, 0, $min_len) eq substr($j_cols, 0, $min_len))
         {
            if ( $both_FULLTEXT ) {
               if ( $len_i_cols == $len_j_cols ) {
                  MKDEBUG && _d("Indexes are DUPLICATES (fulltext)");
                  push @result, $keys[$i] unless $seen{$i}++;
                  push @result, $keys[$j] unless $seen{$j}++;
               }
               else {
                  MKDEBUG && _d("Indexes are not duplicates (fulltext)");
               }
            }
            else {
               MKDEBUG && _d("Indexes are DUPLICATES");
               push @result, $keys[$i] unless $seen{$i}++;
               push @result, $keys[$j] unless $seen{$j}++;
            }
         }
         else {
            MKDEBUG && _d("Indexes are not duplicates");
         }
      }
   }

   if ( $opts->{clustered} && $opts->{engine} =~ m/^(?:InnoDB|solidDB)$/ ) {
      my $i = 0;
      my $found = 0;
      while ( $i < @keys ) {
         if ( $keys[$i]->{name} eq 'PRIMARY' ) {
            $found = 1;
            last;
         }
         $i++;
      }
      if ( $found ) {
         my $pkcols = $keys[$i]->{cols};
         KEY:
         foreach my $j ( 0..$#keys ) {
            next KEY if $i == $j;
            my $suffix = $keys[$j]->{cols};
            SUFFIX:
            while ( $suffix =~ s/`[^`]+`,// ) {
               my $len = min(length($pkcols), length($suffix));
               if ( (($keys[$i]->{struct} eq $keys[$j]->{struct}) || $opts->{ignore_type})
                  && substr($suffix, 0, $len) eq substr($pkcols, 0, $len))
               {
                  push @result, $keys[$i] unless $seen{$i}++;
                  push @result, $keys[$j] unless $seen{$j}++;
                  last SUFFIX;
               }
            }
         }
      }
   }

   return \@result;
}

sub get_duplicate_fks {
   my ( $self, $fks, $opts ) = @_;
   my @fks = @$fks;
   my %seen; # Avoid outputting a fk more than once.
   my @result;
   foreach my $i ( 0..$#fks - 1 ) {
      foreach my $j ( $i+1..$#fks ) {
         my $i_cols = join(', ', map { "`$_`" } sort($fks[$i]->{cols} =~ m/`([^`]+)`/g));
         my $j_cols = join(', ', map { "`$_`" } sort($fks[$j]->{cols} =~ m/`([^`]+)`/g));
         my $i_fkcols = join(', ', map { "`$_`" } sort($fks[$i]->{fkcols} =~ m/`([^`]+)`/g));
         my $j_fkcols = join(', ', map { "`$_`" } sort($fks[$j]->{fkcols} =~ m/`([^`]+)`/g));
         if ( $fks[$i]->{parent} eq $fks[$j]->{parent}
               && $i_cols eq $j_cols
               && $i_fkcols eq $j_fkcols
         ) {
            push @result, $fks[$i] unless $seen{$i}++;
            push @result, $fks[$j] unless $seen{$j}++;
         }
      }
   }

   return \@result;
}

sub _d {
   my ( $line ) = (caller(0))[2];
   print "# TableParser:$line $PID ", @_, "\n";
}

1;

# ###########################################################################
# End TableParser package
# ###########################################################################

# ###########################################################################
# QueryReview package 2719
# ###########################################################################
package QueryReview;

use strict;
use warnings FATAL => 'all';
use English qw(-no_match_vars);

use Digest::MD5 qw(md5_hex);
use Data::Dumper;

use constant MKDEBUG => $ENV{MKDEBUG};

sub new {
   my ( $class, %args ) = @_;
   foreach my $arg ( qw(dbh db_tbl tbl_struct group_by) ) {
      die "I need a $arg argument" unless $args{$arg};
   }

   my @basic_cols
      = qw(checksum fingerprint sample first_seen last_seen
           reviewed_by reviewed_on comments);
   foreach my $col ( @basic_cols ) {
      die "Query review table $args{db_tbl} does not have a $col column"
         unless $args{tbl_struct}->{is_col}->{$col};
   }
   my %basic_cols = map { $_ => 1 } @basic_cols;
   my @extra_cols = grep { !$basic_cols{$_} } @{$args{tbl_struct}->{cols}};

   my $sql = "SELECT fingerprint, CONV(checksum, 10, 16) as checksum_hex, "
           . "first_seen, last_seen "
           . "FROM $args{db_tbl} "
           . ($args{where} ? $args{where} : '');
   my %cache = map {
      $_->{fingerprint} => {
         checksum => $_->{checksum_hex},
         dirty    => 0,
         cols     => {
            first_seen => $_->{first_seen},
            last_seen  => $_->{last_seen},
         }
      }
   }
   @{ $args{dbh}->selectall_arrayref($sql, { Slice => {} }) };

   my $insert_new_sth = $args{dbh}->prepare(
         'INSERT IGNORE INTO ' . $args{db_tbl}
         . '(checksum, fingerprint, sample, first_seen, last_seen) VALUES( '
         . 'CONV(?, 16, 10), ?, ?, COALESCE(?, NOW()), COALESCE(?, NOW()))');

   my $self = {
      dbh            => $args{dbh},
      db_tbl         => $args{db_tbl},
      cache          => \%cache,
      insert_new_sth => $insert_new_sth,
      group_by       => $args{group_by},
      basic_cols     => \@basic_cols,
      extra_cols     => \@extra_cols,
   };
   MKDEBUG && _d("new QueryReview obj: " . Dumper($self));
   return bless $self, $class;
}

sub cache_event {
   my ( $self, $event ) = @_;
   my $checksum;

   my $group_by =  $event->{ $self->{group_by} };
   return unless defined $group_by;

   if ( exists $self->{cache}->{$group_by} ) {
      $checksum = $self->{cache}->{$group_by}->{checksum};
      $self->_update_cache($group_by, $event);
   }
   else {
      $checksum = make_checksum($group_by);

      if ( $self->event_is_stored($checksum) ) {
         my $review_info = $self->{dbh}->selectall_hashref(
            'SELECT CONV(checksum,10,16) AS checksum_conv, '
            . join(', ', @{$self->{basic_cols}})
            . ' FROM ' . $self->{db_tbl}
            . " WHERE checksum=CONV('$checksum',16,10)",
            'checksum_conv',);
         $self->{cache}->{$group_by} = {
            checksum => $checksum,
            dirty    => 1,
            cols     => {
               first_seen => $review_info->{$checksum}->{first_seen} || '',
               last_seen  => $review_info->{$checksum}->{last_seen}  || '',
            },
         }; 
         $self->_update_cache($group_by, $event);
      }
      else {
         my $ts = _parse_timestamp($event->{ts});

         $self->{insert_new_sth}->execute(
            $checksum,
            $group_by,
            $event->{arg},
            $ts,
            $ts);
         MKDEBUG && _d("Stored new event: $checksum $group_by $ts");

         $self->{cache}->{$group_by} = {
            checksum => $checksum,
            dirty    => 0,
            cols     => {
               first_seen => $ts,
               last_seen  => $ts,
            },
         }; 
      }
   } 

   $event->{checksum} = $checksum;

   return;
}

sub event_is_stored {
   my ( $self, $checksum ) = @_;
   my $sql = "SELECT checksum FROM $self->{db_tbl} "
           . "WHERE checksum=CONV('$checksum',16,10)";
   MKDEBUG && _d($sql);
   my $event = $self->{dbh}->selectall_arrayref($sql);
   return scalar @$event ? 1 : 0;
}

sub _update_cache {
   my ( $self, $group_by, $event ) = @_;
   return unless exists $self->{cache}->{$group_by};

   my $fp_ds = $self->{cache}->{$group_by};

   $fp_ds->{dirty} = 1;  # group_by in cache differs from query review tbl

   my $ts = _parse_timestamp($event->{ts});
   $fp_ds->{cols}->{first_seen} = $ts if $ts le $fp_ds->{cols}->{first_seen};
   $fp_ds->{cols}->{last_seen}  = $ts if $ts ge $fp_ds->{cols}->{last_seen};


   return;
}

sub flush_event_cache {
   my ( $self ) = @_;

   CLASS:
   foreach my $class ( keys %{$self->{cache}} ) {
      my $fp_ds = $self->{cache}->{$class};
      next CLASS if !$fp_ds->{dirty};
      my @sets;
      foreach my $col ( keys %{$fp_ds->{cols}} ) {
         push @sets, "$col='$fp_ds->{cols}->{$col}'";
      }
      my $set_clause = join(', ', @sets);
      my $sql = "UPDATE $self->{db_tbl} SET "
              . $set_clause
              . " WHERE checksum=CONV('$fp_ds->{checksum}',16,10)";
      MKDEBUG && _d("update sql for cached event: $sql");
      $self->{dbh}->do($sql);
   }

   return;
}

sub make_checksum {
   my ( $val ) = @_;
   my $checksum = uc substr(md5_hex($val), -16);
   MKDEBUG && _d("$checksum checksum for $val");
   return $checksum;
}

sub _parse_timestamp {
   my ( $val ) = @_;
   return '' unless defined $val;
   $val =~ s/^(\d\d)(\d\d)(\d\d) /20$1-$2-$3 /;
   return $val;
}

sub _d {
   my ( $line ) = (caller(0))[2];
   @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } @_;
   print "# QueryReview:$line $PID ", @_, "\n";
}

1;
# ###########################################################################
# End QueryReview package
# ###########################################################################

# #############################################################################
# mk-log-parser
# #############################################################################
package main;

# TODO: sort by ninetyfive, stddev and med.
# TODO: reverse sorting.
# TODO: sort by expressions like Rows_sent/Rows_examined
# TODO: tail -c +160665 /home/baron/smalltest.log | less

use English qw(-no_match_vars);
use Time::Local qw(timelocal);
use Data::Dumper;
$Data::Dumper::Indent=1;

Transformers->import( qw(shorten micro_t percentage_of ts) );

use constant MKDEBUG => $ENV{MKDEBUG};

# TODO: use sigtrap
$SIG{INT} = \&sig_int;

# #############################################################################
# Get configuration information.
# #############################################################################

my $dp = new DSNParser (
   { key => 'D', desc => 'Database that contains the query review table' },
   { key => 't', desc => 'Table to use as the query review table' } );

my @opt_spec   = OptionParser::pod_to_spec();
my $opt_parser = new OptionParser(@opt_spec);
$opt_parser->{strict} = 0;
$opt_parser->{dsn}    = $dp;
$opt_parser->{prompt} = '[OPTION...] [FILE]';
$opt_parser->{descr}  = q{parses and analyzes MySQL log files.  With no }
                      . q{FILE, or when FILE is -, read standard input.};
my %opts = $opt_parser->parse();

if ( !$opts{help} ) {
   if ( $opts{R} && (!defined $opts{R}->{D} || !defined $opts{R}->{t}) ) {
      $opt_parser->error('The --review DSN requires a D (database) and t"
         . " (table) part specifying the query review table');
   }
   if ( $opts{t} !~ m/^\d+%?$/ ) {
      $opt_parser->error('Invalid syntax for --top.  See perldoc for details.');
   }
}

$opt_parser->usage_or_errors(%opts);

# #############################################################################
# Set up callbacks for the various event handling behaviors.
# #############################################################################
my $q  = new Quoter();
my $qr = new QueryRewriter();
my $qv;           # used if --review
my $dbh;          # used if --review
my $sm;           # used if do_analysis
my @callbacks;

# This chain of callbacks begins with the sub below which only
# fingerprints the event and adds that fingerprint to the event.
# Subsequent callbacks--for review and/or analyze--expect the event
# to have its own fingerprint. This way we fingerprint only once.
# Only the event is passed to each callback. Each callback is called
# in order in LogParser::parse_slowlog_event().
# The chain can terminate early if one of the callbacks returns 0.
# If this happens, the chain restarts immediately with the next event.
push @callbacks, sub {
   my ( $event ) = @_;
   # TODO: save db and ts from last event and apply it here.  (Before skipping!)
   # Skip events which do not have the group_by attribute.
   my $group_by_val = $event->{arg}; # TODO: this may change for non-slowlogs
   return 0 unless defined $group_by_val;
   my $fingerprint = $qr->fingerprint($group_by_val);
   $event->{fingerprint} = $fingerprint;
   return 1;
};

push @callbacks, get_query_review_callback() if $opts{R};
push @callbacks, get_analyze_callback()      if $opts{A};

# ############################################################################
# Parse the input.
# ############################################################################
my @fhs;
if ( @ARGV == 0 || (@ARGV == 1 && $ARGV[0] eq '-' ) ) {
   my $fh = *STDIN;
   push @fhs, $fh;
}
else {
   foreach my $arg ( @ARGV ) {
      open my $fh, "<", $arg or die "Cannot open $arg: $OS_ERROR\n";
      push @fhs, $fh;
   }
}

my $oktorun = 1;
my $lp = new LogParser();
while ( $oktorun && (my $fh = shift(@fhs)) ) {
   1 while ( $oktorun && $lp->parse_slowlog_event($fh, \@callbacks) );
   close $fh or warn "Cannot close filehandle: $OS_ERROR\n";
}

# ############################################################################
# Finish up (print results, etc).
# ############################################################################

# Flush before reporting so that the report gets the latest review vals.
$qv->flush_event_cache() if $opts{R};

if ( $opts{A} ) {
   my @worst = find_top_queries($sm, $opts{s}, $opts{t}, $opts{w});
   report_queries(@worst);
}

$dp->disconnect($dbh) if $opts{R};

exit;

# ############################################################################
# Subroutines.
# ############################################################################

sub get_query_review_callback {
   $dbh = $dp->get_dbh($dp->get_cxn_params($opts{R}), {AutoCommit => 1});
   my $tp  = new TableParser();
   if ( !$tp->table_exists($dbh, $opts{R}->{D}, $opts{R}->{t}, $q, 1) ) {
      die "The query review table $opts{R}->{D}.$opts{R}->{t} "
         . "does not exist or you do not have INSERT privileges";
   }
   my $du     = new MySQLDump();
   my $db_tbl = $q->quote($opts{R}->{D}, $opts{R}->{t});
   my $struct = $tp->parse(
      $du->dump($dbh, $q, $opts{R}->{D}, $opts{R}->{t}, 'table'));
   $qv        = new QueryReview(
      group_by    => 'fingerprint',
      dbh         => $dbh,
      db_tbl      => $db_tbl,
      tbl_struct  => $struct,
   );

   return sub {
      my ( $event ) = @_;
      return 0 unless defined $event;
      $qv->cache_event($event);
      return 1; # required to keep callback chain going
   };
}

sub get_analyze_callback {
   $sm = new SQLMetrics(
      group_by     => 'fingerprint',
      worst_attrib => $opts{w},  # TODO this can be an expr, add to attributes
      attributes   => [
         qw(
            Query_time
            Lock_time
            Rows_read
            Rows_sent
            Rows_examined
            user
            db|Schema
            ts
            )
      ],
      # TODO: add values from command-line options
      # TODO: name=expression syntax
      #withoutlocks=Query_time-Lock_time
   );

   return sub {
      my ( $event ) = @_;
      $sm->calc_event_metrics($event);
      return 1; # required to keep callback chain going
   };
}

sub find_top_queries {
   my ( $sm, $sort, $top, $worst ) = @_;
   my $classes = $sm->{metrics}->{unique};
   my @worst;

   MKDEBUG && _d("sort $sort top $top worst $worst");
   my @sorted =
      reverse sort {
         $classes->{$a}->{$worst}->{$sort} <=> $classes->{$b}->{$worst}->{$sort}
      } keys %$classes;
   my $count = 0;
   if ( $top =~ m/^\d+$/ ) {
      $count = @sorted > $top ? $top : scalar @sorted;
   }
   else {
      # It's a percentage, so grab as many as needed to get to that % of the
      # file.
      ($top) = $top =~ m/(\d+)/;
      my $so_far = 0;
      my $target = $sm->{metrics}->{all}->{$worst}->{sum} * ($top / 100);
      while ( $so_far < $target && $count < @sorted) {
         $so_far += $classes->{$sorted[$count++]}->{$worst}->{$sort};
      }
   }
   # Add event's fingerprint to itself as a pseudo-attribute for use in
   # report_queries() to correlate with query review table info.
   return map { $classes->{$_}->{fingerprint} = $_; $classes->{$_}; }
      @sorted[0 .. $count - 1];
}

sub report_queries {
   my ( @worst ) = @_;
   my $u = $sm->{metrics}->{unique};
   my $g = $sm->{metrics}->{all};
   my $Query_time_distro;

   # Print header
   if ( $opts{header} ) {
      printf "# Queries: %s, Time: %s\n",
         shorten($sm->{n_queries}),
         micro_t($sm->{metrics}->{all}->{Query_time}->{sum}, p_s => 1, p_ms => 1);
      if ( $opts{rusage} ) {
         printf "# CPU times: %s user, %s system\n",
            map { micro_t($_, p_s => 1, p_ms => 1) } times();
         eval {
            my $mem = `ps -o rss,vsz $PID`;
            printf "# Memory use: %s rss,  %s vsz\n",
               map {shorten($_ * 1_024)} $mem =~ m/(\d+)/g;
         };
      }
      print  "\n";
   }

   my $query_hdr_fmt   = "# Query %03d (%s QPS) ID: 0x%s at byte %d ";
   my $metrics_hdr_fmt = "#              total    %%     min     max     "
                       . "avg     95%%  stddev  median\n";
   my $count_line_fmt  = "# %9s %8s  %3d\n";
   my $metric_line_fmt = "# %9s %8s  %3d %7s %7s %7s %7s %7s %7s\n";
   my $list_line_fmt   = "# %9s  %s\n";
   my $timestamp_fmt   = "# Time range %s to %s\n";
   my $review_info_fmt = "# %13s: %-s\n";

   my @reported_metrics = (
      {
         metric => 'Query_time',
         name   => 'Time',
         fmt    => sub { return micro_t(@_, p_ms => 1, p_s => 1); },
      },
      {
         metric => 'Lock_time',
         name   => 'Lock',
         fmt    => sub { return micro_t(@_, p_ms => 1, p_s => 1); },
      },
      {
         metric => 'Rows_read',
         name   => 'Rows read',
         fmt    => sub { return shorten(@_, p => 1, d => 1000); },
      },
      {
         metric => 'Rows_sent',
         name   => 'Rows sent',
         fmt    => sub { return shorten(@_, p => 1, d => 1000); },
      },
      {
         metric => 'Rows_examined',
         name   => 'Rows exam',
         fmt    => sub { return shorten(@_, p => 1, d => 1000); },
      },
   );

   my @review_cols;
   my %exclude_cols = (fingerprint => 1, sample => 1, checksum => 1);
   if ( $opts{R} ) {
      @review_cols = sort
                     grep { !$exclude_cols{$_} }
                     (@{$qv->{basic_cols}}, @{$qv->{extra_cols}});
   }

   QUERY:
   foreach my $i ( 0..$#worst ) {
      my $class       = $worst[$i];
      my $fingerprint = $class->{fingerprint};
      my $checksum    =  QueryReview::make_checksum($fingerprint);

      my $review_vals;
      if ( $opts{R} ) {
         $review_vals = get_review_vals($checksum, \@review_cols);
         next QUERY if ( defined $review_vals->{reviewed_by}
                                && !$opts{reportall} );
      }

      # Calculate QPS (queries per second) by looking at the min/max timestamp.
      my $qps = 0;
      if ( $class->{Query_time}->{cnt} > 1
            && $class->{ts}->{min}
            && $class->{ts}->{max} gt $class->{ts}->{min}
      ) {
         my $min = parse_timestamp($class->{ts}->{min});
         my $max = parse_timestamp($class->{ts}->{max});
         $qps = $class->{Query_time}->{cnt} / ( $max - $min );
      }

      my $header = sprintf $query_hdr_fmt, $i + 1, shorten($qps,p=>1,d=>1000),
            $checksum, $class->{$opts{w}}->{sample}->{pos_in_log} || 0;

      print  $header, ('_' x (73 - length($header))), "\n";
      printf $metrics_hdr_fmt;
      printf $count_line_fmt,
         'Count',
         $class->{Query_time}->{cnt},
         percentage_of($class->{Query_time}->{cnt}, $sm->{n_queries});

      foreach my $r ( @reported_metrics ) {
         my $metric = $r->{metric};
         my $val    = $class->{ $metric };

         next unless $val && %$val;
         MKDEBUG && _d("Reporting metrics for $metric");

         my $save_distro = $metric eq 'Query_time' ? 1 : 0; # TODO: $worst?
         my $stats
            = $sm->calculate_statistical_metrics($val->{all},
               distro => $save_distro);

         $Query_time_distro = $stats->{distro} if $save_distro;

         printf $metric_line_fmt,
            $r->{name},                             # friendly metric name
            $r->{fmt}->($val->{sum}),               # total
            percentage_of($val->{sum},
                          $g->{$metric}->{sum}),    # % total/grand total
            $r->{fmt}->($val->{min}),               # min
            $r->{fmt}->($val->{max}),               # max
            $r->{fmt}->($val->{sum}/$val->{cnt}),   # avg
            $r->{fmt}->($stats->{max}),             # 95% are within this
            $r->{fmt}->($stats->{stddev}),          # 95% stdev
            $r->{fmt}->($stats->{median});          # 95% med
      }

      printf $list_line_fmt, 'DBs',   join(', ', keys %{$class->{db}->{unq}});
      printf $list_line_fmt, 'Users', join(', ', keys %{$class->{user}->{unq}});

      # TODO: permit to switch off with cmdline option --verbosity or --quiet
      if ( $class->{ts}->{min} ) {
         printf $timestamp_fmt,
            map { ts(parse_timestamp($_)) } @{$class->{ts}}{qw(min max)};
      }
      print "# Execution times\n";
      print chart_distro($Query_time_distro);

      # TODO
      # print "# Time clustering\n";

      if ( $opts{R} ) {
         print "# Review information\n";
         foreach my $j ( 0..$#review_cols ) {
            my $col = $review_cols[$j];
            my $val = $review_vals->{$col};
            printf $review_info_fmt, $col, (defined $val ? $val : '');
         }
      }

      print "# Fingerprint\n#    $fingerprint\n" if $opts{f};

      # If the query uses qualified table names (db.tbl), print_tables()
      # will print SHOW TABLE STATUS FROM `db` LIKE 'tbl'. Otherwise,
      # if a default_db is given, print_tables() will use it for queries
      # without qualified table names. We pass a default db only if the
      # query logged one db because there is no reliable way to choose
      # between multiple logged dbs. As a last report, print_tables()
      # will simply omit the FROM `db` clause and it's left to the user
      # to determine the correct db.
      my ( $default_db ) = keys %{$class->{db}->{unq}}
         if scalar keys %{$class->{db}->{unq}} == 1;

      my $sample = $class->{$opts{w}}->{sample}->{arg};
      my $select_pattern = qr/^[\s\(]*SELECT /i;
      if ( $sample =~ m/$select_pattern/ ) {
         print_tables($sample, $default_db) if $opts{forexplain};
         print "# EXPLAIN\n$sample\\G\n";
      }
      else {
         my $converted_sample = $qr->convert_to_select($sample);
         if ( $converted_sample =~ m/$select_pattern/ ) {
            print_tables($converted_sample, $default_db) if $opts{forexplain};
            print "$sample\\G\n";
            print "# Converted for EXPLAIN\n# EXPLAIN\n" if $opts{forexplain};
         }
         # converted_sample will be the original sample if it
         # failed to convert. Otherwise, it will be a SELECT.
         print "$converted_sample\\G\n" if $opts{forexplain};
      }
      print "\n";
   }

   return;
}

sub get_review_vals {
   my ( $checksum, $cols ) = @_;
   return unless defined $checksum;
   my $db_tbl = $q->quote($opts{R}->{D}, $opts{R}->{t});
   my $select_cols  = join(',', @$cols);
   my $sql  = "SELECT $select_cols, CONV(checksum,10,16) AS checksum_conv "
            . "FROM $db_tbl "
            . "WHERE checksum=CONV('$checksum',16,10)";
   MKDEBUG && _d("select for review vals: $sql");
   # There should only be 1 result.
   my $res = $dbh->selectall_hashref($sql, 'checksum_conv');
   my $review_vals = $res->{$checksum};
   delete $review_vals->{checksum_conv};
   return $review_vals;
}

sub chart_distro {
   my ( $distro ) = @_;
   return "\n" if !defined $distro || scalar @$distro== 0;
   my $max_val = 0;
   my $vals_per_mark; # number of vals represented by 1 #-mark
   my $max_disp_width = 64;
   my $bar_fmt = "# %5s%s\n";
   my @distro_labels = qw(1us 10us 100us 1ms 10ms 100ms 1s 10s+);

   # Find the distro with the most values. This will set
   # vals_per_mark and become the bar at max_disp_width.
   foreach my $n_vals ( @$distro ) {
      $max_val = $n_vals if $n_vals > $max_val;
   }
   $vals_per_mark = $max_val / $max_disp_width;

   MKDEBUG && _d("vals per mark $vals_per_mark, max val $max_val");

   my $i = 0;
   foreach my $n_vals ( @$distro ) {
      MKDEBUG && _d("$n_vals vals in $distro_labels[$i] bucket");

      my $n_marks = $n_vals / $vals_per_mark;

      # Always print at least 1 mark for any bucket that has at least
      # 1 value. This skews the graph a tiny bit, but it allows us to
      # see all buckets that have values.
      $n_marks = 1 if $n_marks < 1 && $n_vals > 0;

      my $bar = ($n_marks ? '  ' : '') . '#' x $n_marks;

      printf $bar_fmt, $distro_labels[$i++], $bar;
   }

   return;
}

sub print_tables {
   my ( $query, $default_db ) = @_;
   my $qp = new QueryParser();
   my $table_aliases = $qp->parse_table_aliases( $qp->get_table_ref($query) );
   print "# Tables\n";
   foreach my $table_alias ( keys %$table_aliases ) {
      next if $table_alias eq 'DATABASE';
      my $tbl = $table_aliases->{$table_alias};
      my $db  = $table_aliases->{DATABASE}->{$tbl} || $default_db;
      print '#    SHOW TABLE STATUS',
         (defined $db && $db ? " FROM `$db`" : ''),
         " LIKE '$tbl'\\G\n";
      print "#    SHOW CREATE TABLE ",
         (defined $db && $db ? "`$db`." : ''),
         "`$tbl`\\G\n";
   }
   # If no tables are printed, this may be due to a query like
   #    SELECT col FROM (SELECT col FROM tbl2) AS tbl1
   # because QueryParser ignores subquery tables.
   return;
}
      
sub _d {
   my ( $line ) = (caller(0))[2];
   print "# main:$line ", @_, "\n";
}

# Catches signals so we can exit gracefully.
# TODO: test this
# TODO: break wait for <$fh> with SIGINT.
sub sig_int {
   my ( $signal ) = @_;
   if ( $oktorun ) {
      print STDERR "# Caught SIG$signal.\n";
      $oktorun = 0;
   }
   else {
      print STDERR "# Exiting on SIG$signal.\n";
      exit(1);
   }
}

# Turns 071015 21:43:52 into a Unix timestamp. TODO: put this into transformers
# and make it not use timelocal, just return formatted string.
sub parse_timestamp {
   my ( $val ) = @_;
   if ( my($y, $m, $d, $h, $i, $s)
         = $val =~ m/^(\d\d)(\d\d)(\d\d) +(\d+):(\d+):(\d+)$/ )
   {
      $val = timelocal($s, $i, $h, $d, $m - 1, $y + 2000);
   }
   return $val;
}

# #############################################################################
# Documentation.
# #############################################################################

=pod

=head1 NAME

mk-log-parser - Analyze and review MySQL logs.

=head1 SYNOPSIS

Analyze and report on a slow log:

 mk-log-parser /path/to/slow.log

Review a slow log, saving results to the test.query_review table in a MySQL
server running on host1.  See L<--review> for more on reviewing queries:

 mk-log-parser --review h=host1,D=test,t=query_review

=head1 DESCRIPTION

C<mk-log-parser> is a framework for much more functionality in the future,
but at present it can analyze MySQL slow query logs and report on aggregate
statistics by grouping similar queries together and showing the worst ones first
in a report.  It can also do a "query review," which means to save a sample of
each type of query into a MySQL table so you can easily see whether you've
reviewed and analyzed a query before.  The benefit of this is that you can
keep track of changes to your server's queries and avoid repeated work.  You can
also save other information with the queries, such as comments, issue numbers in
your ticketing system, and so on.

By default the tool does slow-query-log analysis, and there are options to make
it do reviews.

Note that this is a work in *very* active progress and you should expect
incompatible changes in the future.

=head1 OUTPUT

The default output is a query analysis report.  There is one paragraph for each
class of query analyzed.  A "class" of queries all have the same fingerprint,
which is an abstracted version of the query text with literals removed,
whitespace collapsed, and so forth.  The report is formatted so it's easy to
paste into emails without wrapping, and all non-query lines begin with a
comment, so you can save it to a .sql file and open it in your favorite
syntax-highlighting text editor.

The report begins with one paragraph about the entire analysis run:

   # Queries: 8, Time: 762.1ms
   # CPU times: 260ms user, 30ms system
   # Memory use: 5.63M rss,  8.40M vsz

This file had 8 queries total, with a total execution time of 762.1ms.  The CPU
and memory lines are statistics about the analysis tool; we are trying very hard
to make the tool fast and lean.  If you find a log file that takes an unusually
long time or a lot of memory, please submit a bug report.

Each query then appears in a paragraph.  Here's a sample, slightly reformatted
so 'perldoc' will not wrap lines in a terminal:

 # Query 001 (0 QPS) ID: 0x66825DDC008FFA89 at byte 332 ________________
 #              total   %     min     max     avg     95% stddev  median
 #     Count        1  12
 #      Time  726.1ms  95 726.1ms 726.1ms 726.1ms 726.1ms      0 726.1ms
 #      Lock     91us  29    91us    91us    91us    91us      0    91us
 # Rows sent        0   0       0       0       0       0      0       0
 # Rows exam    63.0k 100   63.0k   63.0k   63.0k   63.0k      0   63.0k
 #       DBs  db1
 #     Users  [SQL_SLAVE]
 # Execution times
 #   1us
 #  10us
 # 100us
 #   1ms
 #  10ms
 # 100ms  ##############################################################
 #    1s
 #  10s+
 # Tables
 #    SHOW TABLE STATUS FROM `db2` LIKE 'table1'\G
 #    SHOW CREATE TABLE `db2`.`table1`\G
 #    SHOW TABLE STATUS FROM `db1` LIKE 'table2'\G
 #    SHOW CREATE TABLE `db1`.`table2`\G
 update db2.table1 n
       inner join db1.table2 a using(table2) 
       set n.column1 = a.column1, n.word3 = a.word3\G
 # Converted for EXPLAIN
 # EXPLAIN
 select  n.column1 = a.column1, n.word3 = a.word3 from db2.table1 n
       inner join db1.table2 a using(table2) \G

Let's take that line by line.  Line 1 is the header:

 # Query 001 (0 QPS) ID: 0x66825DDC008FFA89 at byte 332 ________________

This line identifies the sequential number of the query in the sort order
specified by L<--sort>.  Then there's the queries per second, and a query ID.
This ID is a hex version of the query's checksum in the database, if you're
using L<--review>.  You can select the reviewed query's details from the
database with a query like C<SELECT .... WHERE checksum=0x66825DDC008FFA89>.

Finally, in case you want to find a sample of the query in the log file, there's
the byte offset where you can look.  (This is not always accurate, due to some
silly anomalies in the slow-log format, but it's usually right.)  The position
refers to the L<--worst> sample, which we'll see more about below.

Next is the table of metrics about this class of queries.

 #              total   %     min     max     avg     95% stddev  median
 #     Count        1  12
 #      Time  726.1ms  95 726.1ms 726.1ms 726.1ms 726.1ms      0 726.1ms
 #      Lock     91us  29    91us    91us    91us    91us      0    91us
 # Rows sent        0   0       0       0       0       0      0       0
 # Rows exam    63.0k 100   63.0k   63.0k   63.0k   63.0k      0   63.0k

The first line is column headers for the table.  The total
is the actual value of the specified metric, and the percentage is the percent
of the total for the whole analysis run.  For example, in this case we can see
that the query executed 1 time, which is 12% of the total number of queries in
the file.  The min, max and avg columns are self-explanatory.  The 95% column
shows the 95th percentile; 95% of the values are less than or equal to this
value.  The standard deviation shows you how tightly grouped the values are.
The standard deviation and median are both calculated from the 95th percentile,
discarding the extremely large values.

 #       DBs  db1
 #     Users  [SQL_SLAVE]

These two lines show you from which databases the queries in this class were
executed, and the users who ran them.  In this case, the queries were all run by
the replication SQL slave thread.  This is a feature of Percona's enhanced
slow-query-logging functionality.  Normal unpatched servers don't log
replication activity to the slow query log.

 # Execution times
 #   1us
 #  10us
 # 100us
 #   1ms
 #  10ms
 # 100ms  ##############################################################
 #    1s
 #  10s+

The execution times show a logarithmic chart of time clustering.  Each query
goes into one of the "buckets" and is counted up.  The buckets are powers of
ten.  The first bucket is all values in the "single microsecond range" -- that
is, less than 10us.  The second is "tens of microseconds," which is from 10us
up to (but not including) 100us; and so on.

 # Tables
 #    SHOW TABLE STATUS FROM `db2` LIKE 'table1'\G
 #    SHOW CREATE TABLE `db2`.`table1`\G
 #    SHOW TABLE STATUS FROM `db1` LIKE 'table2'\G
 #    SHOW CREATE TABLE `db1`.`table2`\G

This section is a convenience: if you're trying to optimize the queries you see
in the slow log, you probably want to examine their structure and size.  These
are copy-and-paste-ready commands to do that.  The values are extracted from the
query sample, and are usually correct.  If you find a query that isn't parsed
right, submit a bug report.

 update db2.table1 n
       inner join db1.table2 a using(table2) 
       set n.column1 = a.column1, n.word3 = a.word3\G

This is a sample of the queries in this class of query.  This is not a random
sample.  It is the query that performed the worst, according to L<--worst>.
You would normally see a commented C<# EXPLAIN> line just before it, so you can
copy-paste the query to examine its EXPLAIN plan. But for non-SELECT queries
such as this one, that isn't possible to do, so the tool tries to transform the
query into a roughly equivalent SELECT query, and adds that below:

 # Converted for EXPLAIN
 # EXPLAIN
 select  n.column1 = a.column1, n.word3 = a.word3 from db2.table1 n
       inner join db1.table2 a using(table2) \G

=head1 QUERY REVIEWS

A "query review" is the process of storing all the query fingerprints you find
so you can add meta-data to them, mark them for follow up, etc.  But most
importantly, you can refer to the stored values on subsequent runs so you'll
knowh whether you've seen a query before, and not waste your time on it.

You can do even more with C<mk-log-parser>'s query review functionality, though.
You can add meta-data to each row by adding more columns.  These can be
arbitrary columns, such as the ticket ID of a JIRA ticket you open for a given
query.  And you can add comments and other information.

Next time you run C<mk-log-parser> in L<--review> mode, it will do the
following:

=over

=item *

It won't show you queries you've already reviewed.  A query is considered to be
already reviewed if you've set a value for the C<reviewed_by> column.

=item *

Queries that you've reviewed, and don't appear in the output, will cause gaps in
the query number sequence in the first line of each paragraph.  And the value
you've specified for L<--top> will still be honored.  So if you've reviewed all
queries in the top 10 and you ask for the top 10, you won't see anything in the
output.

=item *

If you want to see the queries you've already reviewed, you can specify
L<--reportall>.  Then you'll see the normal analysis output, but you'll also see
the information from the review table, just below the execution time graph.  For
example,

  # Review information
  #      comments: really bad IN() subquery, fix soon!
  #    first_seen: 2008-12-01 11:48:57
  #   jira_ticket: 1933
  #     last_seen: 2008-12-18 11:49:07
  #      priority: high
  #   reviewed_by: xaprb
  #   reviewed_on: 2008-12-18 15:03:11

You can see how useful this meta-data is -- as you analyze your queries, you get
your comments integrated right into the report.

=back

=head1 INTERNALS

A few concepts might help you understand what this tool does.  First, a query
fingerprint.  This is the abstracted form of a query, which makes it possible to
group similar queries together.  Abstracting a query removes literal values,
normalizes whitespace, and so on.  For example, these two queries:

  SELECT name, password FROM user WHERE id='12823';
  select name,   password from user
     where id=5;

Both of those queries will fingerprint to

  select name, password from user where id=?

Once the query's fingerprint is known, we can then talk about a query as though
it represents all similar queries.

What C<mk-log-parser> does is analogous to a GROUP BY statement in SQL.  If your
command-line looks like this,

  mk-log-parser /path/to/slow.log --sort sum --worst Query_time --top 10

The corresponding pseudo-SQL looks like this:

  SELECT WORST(query BY Query_time), SUM(Query_time), ...
  FROM /path/to/slow.log
  GROUP BY FINGERPRINT(query)
  ORDER BY SUM(Query_time) DESC
  LIMIT 10

This will matter a lot more in the future when we add more capabilities to
C<mk-log-parser>.  (At that point we'll dig back in SVN history for the
documentation that we've removed from this tool due to unimplemented
functionality.)

=head1 OPTIONS

Some options are negatable by specifying them in their long form with a --no
prefix.

=over

=item --analyze

short form: -A; default: yes; negatable: yes

Group similar queries and report on them.

This is the standard slow-log analysis functionality.  See OUTPUT for the
description of what this does and what the results look like.  You can disable
it for L<--review> if you don't want to see any reports on queries.

=item --fingerprints

short form: -f

Add query fingerprints to the L<--analyze> report.  This is mostly useful for
debugging purposes.

=item --forexplain

negatable: yes; default: yes

Print extra information to make analysis easy.

This option adds code snippets to make it easy to run SHOW CREATE TABLE and SHOW
TABLE STATUS for the query's tables.  It also rewrites non-SELECT queries into a
SELECT that might be helpful for determining the non-SELECT statement's index
usage.

=item --header

default: yes; negatable: yes

Print information about the entire analysis run.

See L<OUTPUT> for more information on the header.

=item --review

short form: -R; type: DSN

Store a sample of each class of query in this DSN.

The argument specifies a table to store all unique query fingerprints in.  The
table must have at least the following columns, but you can add any more columns
you wish:

  CREATE TABLE query_review (
     checksum     BIGINT UNSIGNED NOT NULL PRIMARY KEY,
     fingerprint  TEXT NOT NULL,
     sample       TEXT NOT NULL,
     first_seen   DATETIME,
     last_seen    DATETIME,
     reviewed_by  VARCHAR(20),
     reviewed_on  DATETIME,
     comments     VARCHAR(100)
  );

The columns are as follows:

  COLUMN       MEANING
  ===========  ===============
  checksum     A 64-bit checksum of the query fingerprint
  fingerprint  The abstracted version of the query; its primary key
  sample       The query text of a sample of the class of queries
  first_seen   The smallest timestamp of this class of queries
  last_seen    The largest timestamp of this class of queries
  reviewed_by  Initially NULL; if set, query is skipped thereafter
  reviewed_on  Initially NULL; not assigned any special meaning
  comments     Initially NULL; not assigned any special meaning

Note that the C<fingerprint> column is the true primary key for a class of
queries.  The C<checksum> is just a cryptographic hash of this value, which
provides a shorter value that is very likely to also be unique.

As the tool parses the log, it keeps track of which query fingerprints it has
seen.  Each time it sees a new one, it inserts the query into this table.  When
you're done, your table should contain a row for each fingerprint.

If you enable this option and disable L<--analyze>, the tool will ignore certain
analysis-related options, like L<--top>.

=item --reportall

Include all queries, even if they have already been reviewed.

=item --rusage

default: yes; negatable: yes

Report CPU times and memory usage in the header of the query analysis report.

=item --sort

short form: -s; type: string; default: sum

Sort the reported queries by this aggregate value of L<--worst>.

Valid aggregate values are: sum, min, max, avg, cnt.  For example, if you want
to sort by the sum of Query_time, then specify --worst Query_time --sort sum.

=item --top

short form: -t; type: string; default: 95%

Limit output to the worst offenders.

If the argument is an integer, report only the top N worst queries.  If the
argument is an integer followed by the C<%> sign, report that percentage of the
worst queries.

=item --worst

short form: -w; type: string; default: Query_time

The attribute that measures a query's badness.

Only basic attributes are valid: Query_time, Lock_time, Rows_read, etc.  This
option is not fully implemented.  It's possible that non-default values will do
strange things.

=back

=head1 DOWNLOADING

You can download Maatkit from Google Code at
L<http://code.google.com/p/maatkit/>, or you can get any of the tools
easily with a command like the following:

   wget http://www.maatkit.org/get/toolname
   or
   wget http://www.maatkit.org/trunk/toolname

Where C<toolname> can be replaced with the name (or fragment of a name) of any
of the Maatkit tools.  Once downloaded, they're ready to run; no installation is
needed.  The first URL gets the latest released version of the tool, and the
second gets the latest trunk code from Subversion.

=head1 SYSTEM REQUIREMENTS

You need Perl and some core packages that ought to be installed in any
reasonably new version of Perl.

=head1 ENVIRONMENT

The environment variable C<MKDEBUG> enables verbose debugging output in all of
the Maatkit tools:

   MKDEBUG=1 mk-....

=head1 BUGS

Please use Google Code Issues and Groups to report bugs or request support:
L<http://code.google.com/p/maatkit/>.

Please include the complete command-line used to reproduce the problem you are
seeing, the version of all MySQL servers involved, the complete output of the
tool when run with L<"--version">, and if possible, debugging output produced by
running with the C<MKDEBUG=1> environment variable.

=head1 COPYRIGHT, LICENSE AND WARRANTY

This program is copyright 2007-2008 Baron Schwartz.
Feedback and improvements are welcome.

THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.

This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation, version 2; OR the Perl Artistic License.  On UNIX and similar
systems, you can issue `man perlgpl' or `man perlartistic' to read these
licenses.

You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA  02111-1307  USA.

=head1 AUTHOR

Baron Schwartz, Daniel Nichter

=head1 VERSION

This manual page documents Ver 0.9.1 Distrib 2725 $Revision: 2720 $.

=cut
