# 
# Copyright (c) 2009-2025 The TEBA Project. All rights reserved.
# 
# Redistribution and use in source, with or without modification, are
# permitted provided that the following conditions are met:
# 1. Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
# 
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
# 
# Author: Atsuhi Yoshida

#!/usr/bin/env perl

package RewriteTokens;

use strict;
use warnings FATAL=>'all';

use feature 'state';

use Data::Dumper;

#use lib ".";

use Tokenizer;
use TEBA2JSON;

use Data::Dumper;
use GraphViz;

my $pkg_name;
my $pkg_path;

BEGIN {
    ($pkg_path = $INC{__PACKAGE__ . "2.pm"}) =~ s/[^\/]+$//;
    $pkg_path ||= "./";
}


my $debug;
my $graph;
my $syntax_check;


BEGIN {
    $debug = 0;
}

sub new {
    my $class = shift;
    my $opt = shift || {};

    my $self = bless { %$opt };
    $self->{Tokenizer} = Tokenizer->new()->load($pkg_path . "rule-token2.def");
    $self->include_path("."); # default path

    return $self;
}

sub debug_mode {
    $debug = 1;
    return $_[0];
}
sub graph_mode {
    $graph = 1;
    return $_[0];
}
sub syntax_check {
    $syntax_check = 1;
    return $_[0];
}

sub rule {
    my $self = shift;
    my $rule = shift;
    $self->{rule} = $rule;

    $self->{tokens} = [ $self->tokenizer($rule) ];

    if ($syntax_check) {
        &check_syntax($self->{tokens});
    }
    $self->{tokens} = [ grep(!/^#L\b/, @{$self->{tokens}}) ];

    $self->extract_include_files();
    $self->extract_group_tokens();
    $self->extract_pattern_replacement();

    $self->parse_rule();

    &nfa2dot($self->{ruleset}) if $graph;

    return $self;
}

sub apply {
    my $self = shift;
    my $t = shift;  # a reference of the list of tokens, which have no newlines at the ends.
    die unless ref($t) eq "ARRAY";
    chomp(@$t);

    my ($c, $delta, $json, $stopped)
        = &rule_match($t, $self->{ruleset}, [0, int(@$t)]);

    $self->{vars_json} = $json;
    $self->{count} = $c;
    $self->{stop} = $stopped;

    if ($debug) {
        print "## Number of Replacements: $c\n";
    }

    return $self;
}

sub count {
    my $self = shift;
    return $self->{count};
}

sub vars {  # JSON format
    my $self = shift;
    return $self->{vars_json};
}

##################

sub parse_rule {
    my $self = shift;

    my ($s, $e) = $self->parse_rule_set();
    $self->{ruleset} = $s;
    return $self;
}

sub parse_rule_set {
    my $self = shift;
    my $tokens = $self->{tokens};

    my ($head, $prev);
    while (@$tokens) {
        my ($start, $end);
        my $tk = shift(@$tokens);
        last if ($tk =~ /^C_R/);
        if ($tk =~ /^REPEAT/) {
            print "parse_rule_set: parsing REPEAT\n" if $debug;
            $start = &create_node({t => 'rep_b'});
            $end = &create_node({t => 'rep_e'});
            shift(@$tokens);  # C_L
            my ($s, $e) = $self->parse_rule_set();
            $start->{next} = $s;
            $e->{next} = $end;
            $end->{alt} = $start;
        } elsif ($tk =~ /^IF/) {
            print "parse_rule_set: parsing IF\n" if $debug;
            $start = &create_node({t => 'if_b'});
            $end = &create_node({t => 'if_e'});

            shift(@$tokens);  # C_L
            my ($s, $e) = $self->parse_rule_set();

            $start->{next} = $end;
            $start->{alt} = $s;
            $e->{next} = $end;
        } elsif ($tk =~ /^CONTEXT/) {
            print "parse_rule_set: parsing CONTEXT\n" if $debug;
            $start = $end = &create_node({t => 'ctxb'});

            shift(@$tokens); # C_L beginning of context
            my $c_ast = $self->parse_context();
            shift(@$tokens); # C_R beginning of context
            shift(@$tokens); # C_L beginning of rule set
            my ($s, $e) = $self->parse_rule_set();

            $start->{ctx} = &gen_nfa($c_ast);
            $start->{alt} = $s;
        } elsif ($tk =~ /^STOP/) {
            print "parse_rule_set: parsing STOP\n" if $debug;
            $start = $end = &create_node({t => 'stop'});
        } else {
            print "parse_rule_set: parsing A RULE\n" if $debug;
            my ($ast_left, $ast_right, $sig) = $self->parse_a_rule();
            my $rule = &create_node({t => 'rule', n => $sig});
            $rule->{left} = &gen_nfa($ast_left);
            $rule->{right} = &gen_replacement($ast_right);
            $start = $end = $rule;
        }
        $head ||= $start;
        $prev->{next} = $start;
        $prev = $end;
    }
    return ($head, $prev);
}


############################

sub rule_match {
    my ($token, $rule, $reg) = @_;

    my $c = 0;
    my $all_delta = 0;

    my @REPEAT; # count for foreach repeat

    my $node = $rule;
    my $last_c = 0;
    my $delta = 0;
    my $all_json;
    my $stopped = 0;
    while ($node) {
        my $type = $node->{t};
        if ($type eq "rule") {
            print "\nDEBUG: ### rule: ", $node->{n}, " ###\n" if (0 ||$debug);
            ($last_c, $delta, my $json) =
                &apply_rule($token, $reg, $node->{left}, $node->{right});
            $c += $last_c;
            $reg->[1] += $delta;
            $all_delta += $delta;
            push(@$all_json, $json) if $json;
        } elsif ($type eq "if_b") {
            if ($last_c > 0) {
                $node = $node->{alt};
                next;
            }
        } elsif ($type eq "if_e") {
            # nothing to do.
        } elsif ($type eq "rep_b") {
            push(@REPEAT, $c);
        } elsif ($type eq "rep_e") {
            my $prev_c = pop(@REPEAT);
            if ($c > $prev_c) {
                $node = $node->{alt};
                next;
            }
        } elsif ($type eq "ctxb") {
            ($last_c, $delta, my $json, $stopped) =
                &apply_context($token, $node->{alt}, $reg, $node->{ctx});
            $c += $last_c;
            $reg->[1] += $delta;
            $all_delta += $delta;
            push(@$all_json, $json) if $json;
            last if ($stopped);
        } elsif ($type eq "stop") {
            print "DEBUG: stopped!\n" if $debug;
            $stopped = 1;
            last;
        }
        $node = $node->{next};
    }
    return ($c, $all_delta, $all_json, $stopped);
}

sub apply_rule {
    my ($t, $reg, $left_nfa, $right_rep) = @_;

    my $VAR = {};
    my $count = 0;

    my ($ind, $ind_end) = @$reg;
    my $ctx;

    my $json;
    if (($right_rep->[0]||"") eq "#JSON") {
        shift(@$right_rep);
        $json = [];
    }

    while ($ind < $ind_end) {
        my $start_ind = $ind;
        ($ind, $ctx) = &nfa_match($left_nfa, $ind, $ind_end, $t, $VAR);
        if ($start_ind < $ind) {
            if ($debug) {
                print "Matching complete: region[", $start_ind, ", ", $ind - 1, "].\n";
                print "Variables:\n", Dumper($VAR);
            }
            if ($json) {
                push(@$json, &replace_json($t, $right_rep, $VAR));
            } else {
                my ($last_ind, $delta) =
                    &replace($t, [$start_ind, $ind], $right_rep, $VAR);

                $ind = $last_ind;
                $ind_end += $delta;
            }
            $count++;
            print "DEBUG: after replacement: ind=$ind, end=$ind_end\n" if $debug;
        }
        $ind = $start_ind + 1 if $start_ind == $ind;
        print "DEBUG: Next matching start at: ind=$ind, end=$ind_end\n" if $debug;
        $VAR = {};
    }

    return ($count, $ind_end - $reg->[1], $json);
}

sub apply_context {
    my ($t, $rule, $reg, $ctx_nfa) = @_;

    my $VAR = {};
    my $count = 0;
    my ($ind, $ind_end) = @$reg;
    my $all_json;
    my $stopped = 0;
    while ($ind < $ind_end) {
        my $start_ind = $ind;
        my $ctx;
        ($ind, $ctx) = &nfa_match($ctx_nfa, $ind, $ind_end, $t, $VAR);
        if ($start_ind < $ind) {
            if ($debug) {
                print "apply_context_rule: context region[",
                    $ctx->[0], ", ", $ctx->[1] - 1, "].\n";
            }
            (my $last_c, my $delta, my $json, $stopped)
                = &rule_match($t, $rule, $ctx);
            $count += $last_c;
            $ind_end += $delta;
            push(@$all_json, $json) if $json;
            last if $stopped;
        }
        $ind = $start_ind + 1 if $start_ind == $ind;
        print "DEBUG: Next context matching start at: ind=$ind, end=$ind_end\n" if $debug;
    }

    return ($count, $ind_end - $reg->[1], $all_json, $stopped);
}

sub gen_replacement {
    my $ast_right = shift;

    my @rep = ();
    push(@rep, '#JSON') if $ast_right->{json};
    foreach (@{$ast_right->{r}}) {
        my $tk;
        if (/^(\$\w+)(?:#[BE](\w+))?(?::(\w+))?$/) {
            my ($name, $ref, $type) = ($1, $2, $3);
            $tk = sprintf(q(&get_var($var->{q(%s)}, $tokens)), $name);
            $tk = "&change_type($tk, q($type))" if $type;
            $tk = "&add_ref($tk, q(#$ref))" if $ref;
            if ($ast_right->{json}) {
                $tk = "&token2json($tk)";
                $tk = sprintf(q({ e => %s, r => [ %s, %s ] }),
                              $tk,
                              sprintf(q($var->{q(%s)}->[0]), $name),
                              sprintf(q($var->{q(%s)}->[1]), $name));
                #		$tk = sprintf(q({ e => %s }), $tk);
            }
        } elsif (/^\$\$$/) {
            $tk = q("__APPLY_ID <$APPLY_ID>");
        } elsif (/^\[$/) {
            $tk = "&one_token(0";
        } elsif (/^\]$/) {
            $tk = "0)";
        } elsif ($ast_right->{json} && m/^(?:C_[LR]|[BE]_GRP|CA|TO|IDN|LIS)\s+<(.*)>$/) {
            $tk = $1; # JSON constructs.
        } else {
            s/[()]/\\$&/g;
            $tk = "q($_)";
            $tk = "&add_ref($tk, q(##$1))" if /^\w+\s+##[BE](\w+)/;
        }
        push(@rep, $tk);
    }
    return \@rep;
}


sub replace {
    my ($tokens, $reg, $rep, $var) = @_;
    my ($offset, $len) = ($reg->[0], $reg->[1] - $reg->[0]);
    state $APPLY_ID++;

    print "replace: eval: [$APPLY_ID]\n", join("\n", @$rep), "\n" if $debug;
    my @rep = eval(join(",", @$rep));
    if ($@) {
        die "replace: illegal sytanx: $@\n" . Dumper($rep);
    }
    splice(@$tokens, $offset, $len, @rep);

    my $delta = int(@rep) - $len;
    my $ind = $reg->[1] + $delta;
    #    die "illegal index." if $ind < 0;
    print "replace: replaced region: ($reg->[0], $reg->[1]), len=$len, delta=$delta, next ind=$ind\n", Dumper(\@rep) if $debug;
    return ($ind, $delta);
}

sub get_var {
    my ($v, $t) = @_;
    return () unless $v;
    return @{$t}[ $v->[0] .. $v->[1] ];
}

sub token2json {
    return TEBA2JSON->new(\@_)->json->tree;
}

sub one_token {
    my $tp;
    my @tk;
    foreach (@_) {
        if (/^(\w+)\s+(?:#\w+\s+)?<(.*)>$/) {
            $tp ||= $1;
            push(@tk, $2);

        }
    }
    return "$tp\t<" . join("", @tk) . ">";
}


sub replace_json {
    my ($tokens, $rep, $var) = @_;
    my $json = join("", @$rep);
    my $res = eval($json);
    if ($@) {
        die "replace_json: illegal sytanx: $@\n$json";
    }
    return $res;
}

sub parse_a_rule {
    my $self = shift;
    my $tokens = $self->{tokens};

    my @left = ();
    until ($tokens->[0] =~ /^C_R/) {
        push(@left, shift(@$tokens));
    }
    shift(@$tokens); # skip C_R
    shift(@$tokens); # skip =>
    my $type = shift(@$tokens); # skip C_L or B_GRP
    my $nest = 1;
    my @right = ();
    while (1) {
        $nest-- if ($tokens->[0] =~ /^C_R|E_GRP/);
        $nest++ if ($tokens->[0] =~ /^C_L|B_GRP/);
        last if ($nest == 0);
        push(@right, shift(@$tokens));
    }
    shift(@$tokens); # skip C_R or A_R

    $self->{vars_left} = {};

    my $sig;
    #    if ($debug) {
	$sig = "{".&join_token(@left) . "}=>{" . &join_token(@right) . "}";
    #    }

    my $ast_left = $self->parse_left(\@left);
    my $ast_right = $self->parse_right(\@right, $type);

    return ( $ast_left, $ast_right, $sig);
}

sub join_token {
    return @_ > 0 ? join(" ", map((m/\w+\s+<(.*)>$/), @_)) : "";
}

####

sub parse_left {
    my $rule = &join_token(@{$_[1]});
    my ($ast, $tk) = &parse_left_sub(@_);
    die "Illegal token \"$tk\" in the left rule: { $rule }" if ($tk);
    push(@{$ast->{e}}, { t => 'end', e => [] });

    if ($debug) {
        print "###AST(left):", Dumper($ast), "\n";
    }

    return $ast;
}

sub parse_left_sub {
    my $self = shift;
    my $tokens = shift;
    my $elem = { t => 'normal', e => [] };
    while (@$tokens) {
        my $tk = shift @$tokens;
        my ($type, $name, $ref, $tkpt)
            = ($tk =~ m/^(\w+)\s+<(\$\w+)?(#\w+)?:?(.*)>$/);
        $self->{vars_left}->{$name} = 1 if $name;
        if ($type eq "B_GRP") {
            my $name = "";
            if ($tkpt =~ /\[(\w+):/) {
                $name = "\$$1";
                $self->{vars_left}->{$name} = 1;
            }
            my $group_node = { t => 'grp', e => [ $tk ], n => "\$$1" };
            push(@{$elem->{e}}, $group_node);
            my ($ch, $e_grp, $rp) = $self->parse_left_sub($tokens);
            $group_node->{rp} = $rp;  # repeat type of group
            push(@{$group_node->{e}}, $ch, $e_grp);
        } elsif ($type eq "E_GRP") {
            my ($rp) = ($tkpt =~ /^\](.*)$/);
            return ($elem, $tk, $rp);
        } elsif ($type eq "OR") {
            $elem = { t => 'or', e => [ $elem, $tk ] };
            my ($e, $tk, $rp) = $self->parse_left_sub($tokens, $ref);
            push(@{$elem->{e}}, $e);
            return ($elem, $tk, $rp)
        } elsif ($type eq "P_L") {
            my $context_node = { t => 'ctx', e => [ $tk ] };
            push(@{$elem->{e}}, $context_node);
            my ($ch, $e_ctx) = $self->parse_left_sub($tokens);
            push(@{$context_node->{e}}, $ch, $e_ctx);
        } elsif ($type eq "P_R") {
            return ($elem, $tk);
        } else {
            push(@{$elem->{e}}, $tk);
        }
    }
    return $elem;
}


################################ NFA ###

sub create_node {
    my $init = shift;
    state $NODEID = 0;
    return { %$init, id => ++$NODEID };
}

sub gen_nfa {
    my $root = shift;
    die "illegal AST." unless ($root->{t} eq "normal");

    my $start = &create_node({ t => 'start' });
    my ($s, $e) = &gen_nfa_normal($root);
    $start->{next} = $s;

    &nfa_optimize($s);

    return $start;
}

sub gen_nfa_normal {
    my ($root) = @_;

    if ($root->{t} eq "or") {
        return &gen_nfa_select($root);
    }
    die "Illegal AST:".Dumper($root) unless ($root->{t} eq "normal");

    my ($start, $end);

    my $prev;
    foreach (@{$root->{e}}) {
        my ($s, $e);
        if (&isObj($_)) {
            if ($_->{t} eq "grp") {
                ($s, $e) = &gen_nfa_group($_);
            } elsif ($_->{t} eq "end") {
                $s = $e = &create_node({ t => 'end'});
            } elsif ($_->{t} eq "or") {
                ($s, $e) = &gen_nfa_selct($_);
            } elsif ($_->{t} eq "ctx") {
                ($s, $e) = &gen_nfa_context($_);
            } else {
                die "Not implemented yet for $_->{t}." . Dumper($_);
            }
        } else  {
            my ($reg, $id, $name) = &parse_token_pattern($_);
            $s = $e = &create_node({ t => 'tk',
                                     n => $name, pair => $id, p => $reg });
        }
        $prev->{next} = $s if $prev;
        $prev = $e;

        $start ||= $s;
        $end = $e;
    }
    return ($start, $end);
}

sub gen_nfa_group {
    my ($root) = @_;
    state $NFA_GROUP_ID;

    my $start = &create_node({ t => 'grp_b',
                               n => $root->{n}, gid => ++$NFA_GROUP_ID });
    my $end = &create_node({ t => 'grp_e',
                             n => $root->{n} , gid => $NFA_GROUP_ID});

    my ($s, $e) = &gen_nfa_normal($root->{e}->[1]);

    if (!$root->{rp}) {
        $start->{next} = $s;
        $e->{next} = $end;
    } elsif ($root->{rp} eq '*') {
        my $loop = &create_node({ t => 'grp_x', n => $root->{n}});
        $start->{next} = $loop;
        $loop->{next} = $end;
        $loop->{alt} = $s;
        $e->{next} = $loop;
    } elsif ($root->{rp} eq '**') {
        my $loop = &create_node({ t => 'grp_x', n => $root->{n}});
        $start->{next} = $loop;
        $loop->{next} = $s;
        $loop->{alt} = $end;
        $e->{next} = $loop;
    } elsif ($root->{rp} eq '?') {
        $start->{next} = $end;
        $start->{alt} = $s;
        $e->{next} = $end;
    } elsif ($root->{rp} eq '??') {
        $start->{next} = $s;
        $start->{alt} = $end;
        $e->{next} = $end;
    } elsif ($root->{rp} eq '!') {
        $start->{next} = $s;
        $e->{next} = $end;
        $end->{t} .= 'n'  # 'n' means 'negative'
    } else {
        die "Unknown repeat type: $root->{rp}";
    }

    return ($start, $end);
}

sub gen_nfa_select {
    my ($root) = @_;

    my $start = &create_node({ t => 'sel_b' });
    my $end = &create_node({ t => 'sel_e' });

    my ($s1, $e1) = &gen_nfa_normal($root->{e}->[0]);
    my ($s2, $e2) = &gen_nfa_normal($root->{e}->[2]) ;

    $start->{next} = $s1;
    $e1->{next} = $end;

    $start->{alt} = $s2;
    $e2->{next} = $end;

    return ($start, $end);
}

sub gen_nfa_context {
    my ($root) = @_;

    my $start = &create_node({ t => 'ctx_b' });
    my $end = &create_node({ t => 'ctx_e' });

    my ($s, $e) = &gen_nfa_normal($root->{e}->[1]);

    $start->{next} = $s;
    $e->{next} = $end;

    return ($start, $end);
}

sub nfa2dot {
    my $nfa = shift;
    my $file = shift || "nfa.dot";

    print "NFA:" . Dumper($nfa) if $debug;

    my $dot = GraphViz->new();
    my $visit = {};
    $visit->{$nfa->{id}}++;
    &nfa2dot_trv($nfa, $dot, $visit);

    print "DEBUG: Generaing nfa.dot....\n";
    $dot->as_dot($file);
}

sub nfa2dot_trv {
    my ($node, $dot, $visit) = @_;

    $visit->{$node->{id}}++;

    my $label = $node->{t} . ": ";
    if (exists $node->{n}) {
        $label .= $node->{n};
    }
    if (exists $node->{gid}) {
        $label .= "#" .$node->{gid};
    }
    if ($node->{t} eq "tk") {
        my $pt;
        if ($node->{p}) {
            ($pt = $node->{p}) =~ s/\\/\\\\/g;
            $pt =~ s/^\(\?\^:(.*)\)$/$1/;
            $pt =~ s/\\b$//;
            $pt = "/$pt/";
        } else {
            $pt = $node->{n};
        }
        $label .= " " . $pt;
    }

    my %config;
    if (&is_rule_node($node->{t})) {
        %config = (shape=>"box", style=>"bold");
    }
    $dot->add_node($node->{id}, label => $label, %config);
    print "Node: $node->{id}, $label\n" if $debug;

    %config = ();
    my %line_config = (style=>"bold", minlen=>3, weight=>10);
    foreach ('next', 'alt', 'ctx', 'left') {
        if (exists $node->{$_}) {
            if ($_ ne 'next') {
                %config = (label => $_);
            }
            if (&is_rule_node($node->{$_}->{t})) {
                %config = (%config, %line_config);
            }
            if ($_ eq "next" && $node->{t} =~ /^if_b$/) {
                $config{weight} = 1;
            } elsif ($_ eq "alt" && $node->{t} =~ /^(ctxb|rep_e)/) {
                $config{weight} = 1;
            } elsif ($_ eq "alt") {
                $config{weight} = 10;
            }
            $dot->add_edge($node->{id}, $node->{$_}->{id}, %config);
            print "Edge($_): $node->{id}, $node->{$_}->{id}\n" if $debug;
            &nfa2dot_trv($node->{$_}, $dot, $visit)
                unless exists $visit->{$node->{$_}->{id}};

        }
    }
}

sub is_rule_node {
    return $_[0] =~ /^(rule|rep|if|ctxb|stop)/;
}

sub nfa_optimize {
    my $node = shift;
    my @alt;
    my %visited;

    #    return;
    push(@alt, $node);
    while (@alt) {
        $node = shift @alt;
        &nfa_find_next_var($node, \@alt, \%visited);
    }
}

sub nfa_find_next_var {
    my $node = shift;
    my $alt = shift;
    my $visited = shift;

    my $next;

    if (exists $visited->{$node}) {
        if (exists $node->{p}) {
            return $node;
        } elsif (exists $node->{next_node}) {
            return $node->{next_node};
        } else {
            return undef;
        }
    }
    $visited->{$node} = $node;

    if (exists $node->{next}) {
        $next = &nfa_find_next_var($node->{next}, $alt, $visited);
        if ($next) {
            $node->{next_node}  = $next;
            $node->{next_p} = $next->{p} if $next->{p};
        }
    }

    if (exists $node->{alt}) {
        push(@$alt, $node->{alt});
    }

    if ($node->{t} eq "sel_b") { # give up becase there are more than two variables.
        return undef;
    }

    if (exists $node->{p}) {
        return $node;
    } else {
        return $next;
    }
}

sub nfa_match {
    my ($nfa, $start_ind, $end_ind, $t, $VAR) = @_;

    my $ID_TBL = {};
    my %GROUP_START;
    my @STATUS;
    my $ctx;

    my $node = $nfa->{next}; # skip start;
    print "nfa_match: skip the start node.\n" if $debug;
    my $ind = $start_ind;
    while (1) {
        if ($node->{t} eq "tk") {
            my $err = $ind >= $end_ind; # whether reached at the end of tokens.
            print "nfa_match: reached at the end of tokens. start=$start_ind, ind=$ind, end=$end_ind.\n" if ($err && $debug);
            unless ($err) {
                if ($node->{p}) {
                    # $err = ! &match_token($node->{p}, $node->{pair},
                    # 			$t->[$ind], $ID_TBL) unless $first_pass;

                    my ($matched, $begin_id) = &match_token_no_sideeffect($node->{p}, $node->{pair},
                                                                          $t->[$ind], $ID_TBL);
                    $err = !$matched;
                    if ($begin_id) {
                        &id_add($ID_TBL, $node->{pair}, $begin_id);
                    }

                    unless ($err) {
                        print "Matched Variable: $node->{n}\n" if $debug;
                        $VAR->{$node->{n}} = [ $ind, $ind ]; # regions
                        if ($node->{pair}) {
                            if (&id_is_end($node->{pair})) {
                                &cleanup_status_to($node->{pair}, \@STATUS);
                            }  else {
                                push(@STATUS, $node->{pair});
                            }
                        }
                    } else {
                        $VAR->{$node->{n}} = [ ];
                    }
                } else {
                    $err = &match_var($VAR, $node->{n}, $t, $ind);
                }
            }

            if ($err) {
                print "nfa_match: token match failure. ind=$ind.\n" if $debug;
                if (@STATUS) {
                    my $st = &pop_last_status(\@STATUS);
                    if ($st) {
                        ($ind, $node, $ID_TBL) = @$st;  # backtrack to the last brach.
                        print "nfa_match: backtracked. ind=$ind.\n" if $debug;
                        next;
                    }
                }
                print "nfa_match: no trackback status exist: start=$start_ind, ind=$ind, end=$end_ind.\n" if $debug;
                return $start_ind;  # no need to return $ctx
            }
            ++$ind;
        } elsif ($node->{t} eq "grp_b") {
            $GROUP_START{$node->{gid}} = $ind;
        } elsif ($node->{t} eq "grp_e") {
            my $s = $GROUP_START{$node->{gid}};
            $VAR->{$node->{n}} =  [ $s, $ind - 1 ];
        } elsif ($node->{t} eq "grp_en") {
            print "nfa_match: matched negative group. ind=$ind.\n" if $debug;
            return $start_ind;  # ignores backtrack and fails match.
            #	} elsif ($node->{t} eq "grp_x") {
            #	} elsif ($node->{t} eq "sel_b") {
            #	} elsif ($node->{t} eq "sel_e") {
            # should cancel the last status?
        } elsif ($node->{t} eq "ctx_b") {
            $ctx->[0] = $ind;
        } elsif ($node->{t} eq "ctx_e") {
            $ctx->[1] = $ind;
            print "nfa_match: saved the context region: [ $ctx->[0], $ctx->[1] ]\n" if $debug;
        } elsif ($node->{t} eq "end") {
            print "nfa_match: reached at end. start=$start_ind, ind=$ind.\n" if $debug;
            return ($ind, $ctx);
        }

        if ($ind < $end_ind && exists $node->{alt}) {
            print "nfa_match: save a state of group: ind=$ind\n" if $debug;
            print "nfa_match: check the next pattern: $node->{next_p} and $t->[$ind]\n"
                if ($debug and exists $node->{next_p});
            if (exists $node->{next_p} && $t->[$ind] !~ $node->{next_p}) {
                $node = $node->{alt};
                print "nfa_match: move to alt: $node->{t} ",
                    ($node->{p} || ""), "\n" if $debug;
                next;
            }
            push(@STATUS, [ $ind, $node->{alt}, &id_clone($ID_TBL) ]);
        }

        $node = $node->{next};
    }
    # never reached here.
}

# Experimentally optimization, but it does not improved the performance unexpectedly.
sub Xnfa_match {
    my ($nfa, $start_ind, $end_ind, $t, $VAR) = @_;

    my $ID_TBL = {};
    my %GROUP_START;
    my @STATUS;
    my $ctx;
    my $pre_matched = 0;  # already the next token is matched
    my $begin_id;

    my $node = $nfa->{next}; # skip start;
    print "nfa_match: skip the start node.\n" if $debug;
    my $ind = $start_ind;
    while (1) {
        if ($node->{t} eq "tk") {
            my $err = $ind >= $end_ind; # whether reached at the end of tokens.
            print "nfa_match: reached at the end of tokens. start=$start_ind, ind=$ind, end=$end_ind.\n" if ($err && $debug);
            unless ($err) {
                if (exists $node->{p}) {
                    my $matched;
                    if ($pre_matched) {
                        $matched = 1;
                    } else {
                        ($matched, $begin_id) = &match_token_no_sideeffect($node->{p}, $node->{pair},
                                                                           $t->[$ind], $ID_TBL);
                    }
                    $err = !$matched;
                    if ($begin_id) {
                        &id_add($ID_TBL, $node->{pair}, $begin_id);
                        $begin_id = undef;
                    }

                    unless ($err) {
                        print "Matched Variable: $node->{n}\n" if $debug;
                        $VAR->{$node->{n}} = [ $ind, $ind ]; # regions
                        if ($node->{pair}) {
                            if (&id_is_end($node->{pair})) {
                                &cleanup_status_to($node->{pair}, \@STATUS);
                            }  else {
                                push(@STATUS, $node->{pair});
                            }
                        }
                    } else {
                        $VAR->{$node->{n}} = [ ];
                    }
                } else {
                    $err = &match_var($VAR, $node->{n}, $t, $ind);
                }
            }

            if ($err) {
                print "nfa_match: token match failure. ind=$ind.\n" if $debug;
                if (@STATUS) {
                    my $st = &pop_last_status(\@STATUS);
                    if ($st) {
                        ($ind, $node, $ID_TBL) = @$st;  # backtrack to the last brach.
                        print "nfa_match: backtracked. ind=$ind.\n" if $debug;
                        next;
                    }
                }
                print "nfa_match: no trackback status exist: start=$start_ind, ind=$ind, end=$end_ind.\n" if $debug;
                return $start_ind;  # no need to return $ctx
            }
            ++$ind;
        } elsif ($node->{t} eq "grp_b") {
            $GROUP_START{$node->{gid}} = $ind;
        } elsif ($node->{t} eq "grp_e") {
            my $s = $GROUP_START{$node->{gid}};
            $VAR->{$node->{n}} =  [ $s, $ind - 1 ];
        } elsif ($node->{t} eq "grp_en") {
            print "nfa_match: matched negative group. ind=$ind.\n" if $debug;
            return $start_ind;  # ignores backtrack and fails match.
            #	} elsif ($node->{t} eq "grp_x") {
            #	} elsif ($node->{t} eq "sel_b") {
            #	} elsif ($node->{t} eq "sel_e") {
            # should cancel the last status?
        } elsif ($node->{t} eq "ctx_b") {
            $ctx->[0] = $ind;
        } elsif ($node->{t} eq "ctx_e") {
            $ctx->[1] = $ind;
            print "nfa_match: saved the context region: [ $ctx->[0], $ctx->[1] ]\n" if $debug;
        } elsif ($node->{t} eq "end") {
            print "nfa_match: reached at end. start=$start_ind, ind=$ind.\n" if $debug;
            return ($ind, $ctx);
        }

        if (exists $node->{alt}) {
            print "nfa_match: save a state of group: ind=$ind\n" if $debug;
            $pre_matched = 0;
            if ($ind < $end_ind) {
                ($pre_matched, $begin_id) = &match_token_no_sideeffect($node->{next_p}, $node->{next_node}->{pair},
                                                                       $t->[$ind], $ID_TBL);
                print "DEBUG: pre_matched=$pre_matched $node->{next_p} and $t->[$ind]\n" if $debug;
            }
            unless ($pre_matched) {

                #	    if (exists $node->{next_p} && $t->[$ind] !~ $node->{next_p}) {
                #				    print "DEBUG: SKIP the next. (1)\n";
                $node = $node->{alt};
                next;
            }
            push(@STATUS, [ $ind, $node->{alt}, &id_clone($ID_TBL) ]);
        }

        $node = $node->{next};
    }
    # never reached here.
}


sub cleanup_status_to {
    my ($id, $status) = @_;
    print "rewind_status: $id\n" if $debug;
    $id =~ s/^#E/#B/;
    while (@$status && $status->[-1] ne $id) {
        pop(@$status);
    }
}

sub pop_last_status {
    my $status = shift;
    my $s;
    do {
        $s = pop(@$status);
    } while ($s && $s =~ /^#/);
    return $s;
}


################################ NFA ###

sub parse_token_pattern {
    my $pt = shift;
    my $esc_pt = qr/[\{\}\\\(\)\$]/;
    my ($reg, $id, $name);

    if ($pt =~ /^VAR_TYPE\s+<(\$\w*)(#\w+)?:(?:(\w+)|\/([^\/]+)\/)(?:\/((?:\\.|(?<!\\).)*)\/)?>$/) {
        # 通常はありえないが、$x:/\// のように型名のパターンにスラッシュが入ったときに
        # ここの条件に合わず、エラーになる。
        ($name, $id, my $tp, my $tp_pt, my $tk_pt) = ($1, $2, $3, $4, $5);
        if ($tp) {
            $tp_pt = "$tp\\b";
        } else {
            $tp_pt =~ s|\\\\|\\|g;
        }
        if ($tk_pt) {
            $tk_pt =~ s|\\\\|\\|g;
            $reg = qr/^(?:$tp_pt)\s+(?:#\w+\s+)?<(?:$tk_pt)>$/;
        } else {
            $reg = qr/^(?:$tp_pt)/;
        }
    } elsif ($pt =~ /^VAR_TEXT\s+<(\$\w*)(#\w+)?:'(.*)'>$/) {
        ($name, $id, my $tk)  = ($1, $2, $3);
        $tk =~ s/$esc_pt/\\$&/g;
        $reg = qr/\s<$tk>$/;
    } elsif ($pt =~ /^VAR_REF\s+<(\$\w+)>$/) {
        $name = $1;
    } else {
        die "parse_token_pattern: Unknown token pattern: $pt\n";
    }
    $id ||= "";
    return ($reg, $id, $name);
}



####################################################################

sub extract_group_tokens {
    my $self = shift;
    my $tokens = $self->{tokens};
    my $num = 0;
    my @stack = ();
    my @_stack = ();
    foreach (@$tokens) {
        if (/^(\w+\s+<)\(([^:]+)(:.*>)$/) {
            my $id = "g".(++$num);
            $_ = "$1$2#B$id$3";  # begin id: #Bg123
            push(@stack, $id);
            push(@_stack, $_);
        } elsif (/^(\w+\s+<[^:]+)(:.*)\)>$/) {
            my $id = pop(@stack);
            pop(@_stack);
            unless ($id) {
                my $t = &strip_token($_);
                die "No group begin exists for $t.\n";
            }
            $_ = "$1#E$id$2>";  # end id: #Eg123
        }
    }
    if (@stack > 0) {
        my @t = map(&strip_token($_), @_stack);
        s/(\$\w+)#B\w+/\($1/ foreach (@t);
        my $t = join(", ", @t);
        die "No group end exists for: $t\n";
    }

    if ($debug && $num > 0) {
        print "##ExtractGroupTokens:\n";
        foreach (@$tokens) {
            print "$_\n";
            print "\n" if /^C_R/;
        }
    }
}

sub extract_pattern_replacement {
    my $self = shift;
    my $tokens = $self->{tokens};
    my $res = [];
    my %pat_rep;

    while (@$tokens) {
        my $tk = shift(@$tokens);
        if ($tk =~ /^PNAME\s+<(\@\w+)>$/) {
            my $pname = $1;
            shift(@$tokens); # TO;
            shift(@$tokens); # C_L;
            my $replace = [];
            while (@$tokens) {
                last if ($tokens->[0] =~ /^C_R/);
                push(@$replace, &extract_pname(shift @$tokens, \%pat_rep));
            }
            shift(@$tokens); # C_R;
            $pat_rep{$pname} = $replace;
            next;
        }
        push(@$res, &extract_pname($tk, \%pat_rep));
    }

    $self->{tokens} = $res;
    if ($debug) {
        print "## ExtractPatternReplacement:\n", join("\n", @$res), "\n";
    }
}

sub extract_pname {
    my $tk = shift;
    my $pat_rep = shift;
    if ($tk =~ /^VAR_PNAME\s<\$(\w*):(\@\w+)>$/) {
        my $name = $1 || "";
        my $pname = $2;
        unless (exists $pat_rep->{$pname}) {
            die "Undefined pattern replacement name: $pname.";
        }
        return (qq(B_GRP\t<[$name:>), @{$pat_rep->{$pname}}, qq(E_GRP\t<]>));
    }
    return $tk;
}


#########

sub parse_right {
    my $self = shift;
    my $tokens = shift;
    my $type = shift;

    my $rule = &join_token(@$tokens);

    my $ast_right = { json => ($type =~ /^B_GRP/ ? 1 : 0), r => [] };
    my $replacement = $ast_right->{r};
    foreach (@$tokens) {
        if (/^VAR_REF\s+<(.*)>$/) {
            my $var = $1;
            push(@$replacement, $var);
            unless ($self->{vars_left}->{$var}) {
                print Dumper($self->{vars_left}), "\n";
                die "parse_right: Undefined variable reference: $var in { $rule }";
            }
        } elsif (/^TOKEN_TEXT\s+<'(.*)'(#\w+)?:(\w+)>$/) {
            my $id = $2 ? " #$2" : "";
            push(@$replacement, "$3$id\t<$1>");
        } elsif (/^VAR_TYPE\s+<(\$\w+)(#\w+)?:(\w+)>$/) {
            my ($var, $ref, $type) = ($1, $2 || "", $3);
            push(@$replacement, qq($var$ref:$type));
            unless ($self->{vars_left}->{$var}) {
                print Dumper($self->{vars_left}), "\n";
                die "parse_right: Undefined variable reference: $var in { $rule }";
            }
        } elsif (/^VAR_ID\s+<(.*)>$/) {
            push(@$replacement, $1);
        } elsif (/^[BE]_GRP\s+<(.)>$/) {
            push(@$replacement, $1);
        } elsif ($ast_right->{json}) {
            push(@$replacement, $_);
        } else {
            die "parse_right: unknown type of token: $_ in { $rule }\n";
        }
    }
    if ($debug) {
        print "## right rule: $rule\n";
        print "##AST(right):", Dumper($ast_right), "\n";
    }

    return $ast_right;
}

sub add_ref {
    my ($tk, $id) = @_;
    state $_REF_COUNT = 0;
    state %_REF;

    return $tk unless $tk;

    my $ref = $_REF{$id};
    if ($ref) {
        delete $_REF{$id};
    } else {
        $_REF{$id} = ($ref = sprintf("#X%04d", ++$_REF_COUNT))
    }

    if ($tk =~ /^(\w+\s+)##\w+(\s+\<.*)$/) {
        $tk = "$1$ref$2";
    } elsif ($tk =~ /^(\w+)(\s+<.*)$/) {
        $tk = "$1 $ref$2";
    }
    return $tk;
}

sub change_type {
    my $type = pop(@_);
    if (@_ > 1) {
        map(s/^\w+\s+(?:#\w+\s+)?<(.*)>$/$1/, @_);
        return "$type\t<" . join("", @_) . ">";
    } elsif ($_[0]) {
        $_[0] =~ s/^\w+(\s+(?:#\w+\s+)?<.*>)$/$type$1/;
        return $_[0];
    }
    return ();
}

#########

sub parse_context {
    my $self = shift;
    my $tokens = $self->{tokens};

    my @left = ();
    until ($tokens->[0] =~ /^C_R/) {
        push(@left, shift(@$tokens));
    }
    ### left
    return $self->parse_left(\@left);
}

#########

sub isObj {
    return ref $_[0] eq "HASH";
}

# the original implementation of match_token, which has side effects to ID_TBL.
sub match_token {
    my ($pt, $id, $tk, $ID_TBL) = @_;

    print "match_token: $pt, $id =~ $tk " if $debug;

    my $matched = 0;
    if ($tk =~ $pt) {
        $matched = 1;

        if ($id) {
            my ($tk_id) = ($tk =~ m/^\w+\s+(#\w+)\b/);

            if (defined $tk_id && &id_is_end($id)) {
                $matched = &id_test($ID_TBL, $id, $tk_id);
            } else {
                &id_add($ID_TBL, $id, $tk_id);
            }
        }
    }

    print ($matched ? "Matched\n" : "Unmatched\n") if ($debug);
    return $matched;
}

# an alternative version of match_token, which does not have side effect to ID_TBL.
# Isntead, id_add() may be called after calling this rotuines.
sub match_token_no_sideeffect {
    my ($pt, $id, $tk, $ID_TBL) = @_;

    print "match_token: $pt, $id =~ $tk " if $debug;

    my $matched = 0;
    my $begin_id;
    if ($tk =~ $pt) {
        $matched = 1;

        if ($id) {
            my ($tk_id) = ($tk =~ m/^\w+\s+(#\w+)\b/);

            if (&id_is_end($id)) {
                $matched = &id_test($ID_TBL, $id, $tk_id);
            } else {
                $begin_id = $tk_id;
                #		&id_add($ID_TBL, $id, $tk_id);
            }
        }
    }

    print ($matched ? "Matched\n" : "Unmatched\n") if ($debug);
    return ($matched, $begin_id);
}


##################

sub id_new {
    return { id => {} };
}

sub id_clone {
    my $id = shift;
    return { id => { %{$id->{id} || {}} } };
}

sub id_is_end {
    return $_[0] =~ m/^#E/;  # defined in extract_group_tokens.
}

sub id_test {
    my ($ID_TBL, $id, $tk_id) = @_;

    $id =~ s/^#E/#B/;

    return ($ID_TBL->{id}->{$id} || "") eq $tk_id ? 1 : 0;
}

sub id_add {
    my ($ID_TBL, $id, $tk_id) = @_;
    $ID_TBL->{id}->{$id} = $tk_id;
}

sub id_have_matched {
    return $_[0]->{matched};
}

##################

sub match_var {
    my ($VAR, $name, $tk, $ind) = @_;

    print "match_var: $name => $tk->[$ind] " if $debug;

    if ($VAR->{$name} && &is_same_token($VAR->{$name}, $tk, $ind)) {
        print "Matched\n" if $debug;
        return 0;
    }
    print "Unmatched\n" if $debug;
    return 1;
}

sub is_same_token {
    my ($var_reg, $tk, $ind) = @_;
    return 0 unless $var_reg->[0] == $var_reg->[1]; # something wrong

    # the tokens should be compared uniformly.
    return &normalize_token($tk->[$var_reg->[0]])
        eq &normalize_token($tk->[$ind]);
}

sub normalize_token {
    my $t = shift;
    my @t = ($t =~ /^(\w+)\s+(?:(#\w+)\s+)?(<.*>)$/);
    $t[1] ||= "";
    return join("\t", @t);
}

#########

sub tokenizer {
    my $self = shift;
    my $rule = shift;

    my @tokens = $self->{Tokenizer}->set_input($rule)->tokens();
    my @res = ();
    my $in_cmt = 0;
    my $line = 1;
    my $line_skip = 1;
    foreach (@tokens) {
        if (/^SP_(?:NL|C)/) {
            ++$line;
            $line_skip = 1;
        }
        next if /^(SP|UNIT_(BEGIN|END))/;
        if (/^B_CMT/) {
            $in_cmt++;
            next;
        } elsif (/^E_CMT/) {
            $in_cmt--;
            next;
        }
        next if ($in_cmt);
        if (/^_(\w+)\t<.(\*?)>$/) { # syntax sugar, see rule-token2.def.
            my $t = ($2 ? "" : "_");
            $_ = "VAR_PNAME\t<\$:\@$t$1>";
        } elsif (/^(VAR_PNAME\s+<\$\w*:)(>)$/) {
            $_ = $1 . '@ANY' . $2;
        }
        if ($line_skip) {
            $line_skip = 0;
            push(@res, "#L $line");
        }
        chomp;
        push(@res, $_);
    }

    return @res;
}


sub include_path {
    my $self = shift;
    my $path = shift;
    $self->{path} = [ split(":", $path) ];
    return $self;
}

sub include_file {
    my $self = shift;
    my $fname = shift;
    my $path;
    if ($fname =~ m|^/|) { # absolute path
        $path = $fname;
    } else {
        foreach (@{$self->{path}}) {
            next unless (-f "$_/$fname");
            $path = "$_/$fname";
            last;
        }
    }
    die "include_file: No '$fname' exists." unless $path;
    open(my $f, '<', $path) || die "include_file: error for $path: $!";
    my @rules = $self->tokenizer(join("", <$f>));
    close($f);
    return @rules;
}

sub extract_include_files {
    my $self = shift;
    my $tokens = $self->{tokens};
    for (my $i = 0; $i< @$tokens; $i++) {
        if ($tokens->[$i] =~ /INCLUDE\s+<INCLUDE\s"(.*)">/) {
            splice(@$tokens, $i, 1, $self->include_file($1));
        }
    }
    return $self;
}

#########################################################################

sub strip_token {
    my ($t) = ($_[0] =~ m/^[^<]+<(.*)>$/);
    return $t;
}

sub check_syntax {
    my $tkn = shift;
    my (@pair, @grp);
    my $err;
    my $i;
    my $line = 1;
    my $in_right = 0;
    my $in_macro = 0;
    #    print Dumper($tkn);
    for ($i = 0; $i < @$tkn; $i++) {
        $_ = $tkn->[$i];

	if (/^PNAME/) {
	    $in_macro = 1;
	} elsif (!$in_macro && /^TO\b/) {
            $in_right = 1;
        } elsif (/^C_R/) {
            $in_right = 0 if $in_right;
            $in_macro = 0 if $in_macro;
        }

        if ($in_right && /^VAR_PNAME\b/) {
            $err = 1;
            last;
        }

        if (/^UNKNOWN\s+<(.*)>$/) {
            $err = 1;
            print "Unknown token: $1\n";
            last;
        } elsif (/^#L\s+(\d+)/) {
            $line = $1;
            next;
        }

        # check the the balance of pair tokens and curly braces.
        if (/^(?:C_L|(?:VAR_TYPE|TOKEN_TEXT)\s+<\()/) {
            push(@pair, [ $i, $_]);
        } elsif (/^(?:VAR_TYPE|TOKEN_TEXT)\s+<.*\)>$/) {
            unless (@pair && $pair[-1]->[1] =~ /^(VAR_TYPE|TOKEN_TEXT)\s+<\(/) {
                $err = 1;
                last;
            }
            pop(@pair);
        } elsif (/^C_R/) {
            unless (@pair && $pair[-1]->[1] =~ /^C_L/) {
                $err = 1;
                last;
            }
            pop(@pair);
        }

        # check the balance of group tokens, context parentheses and curly braces.
        if (/^(?:C_L|P_L|B_GRP)/) {
            push(@grp, [$i, $_]);
        } elsif (/^E_GRP/) {
            unless (@grp  && $grp[-1]->[1] =~ /^B_GRP/) {
                $err = 1;
                last;
            }
            pop(@grp);
        } elsif (/^P_R/) {
            unless (@grp && $grp[-1]->[1] =~ /^P_L/) {
                $err = 1;
                last;
            }
            pop(@grp);
        }  elsif (/^C_R/) {
            unless (@grp && $grp[-1]->[1] =~ /^C_L/) {
                $err = 1;
                last;
            }
            pop(@grp);
        }

    }

    if (!$err) {
        if (@pair) {
            $err = 1;
            $i = $pair[-1]->[0];
        } elsif (@grp) {
            $err = 1;
            $i = $grp[-1]->[0];
        }
    }

    if ($err) {
        my $j;
        for ($j = $i; $j >= 0; $j--) {
            if ($tkn->[$j] =~ /^#/) {
                $j++;
                last;
            }
        }
        my @rule;
        for (; $j < @$tkn; $j++) {
            last if ($tkn->[$j] =~ /^#/);
            (my $t = $tkn->[$j]) =~ s/^\w+\s+<(.*)>$/$1/;
            $t = "<< $t >>" if $j == $i;
            push(@rule, $t);
        }
        die "Error: Unbalanced tokens or unknown tokens at line $line:\n  ",
            join(" ", @rule[($#rule-10 > 0 ? $#rule-10 : 0) .. $#rule]), "\n\n";
    }
}

1;
