#!/usr/bin/perl
# generate random strings in a given grammar
# create a file with one rule per line in the form:
#  S -> aB | A | epsilon
# and run the program like this:
#  ./cfgrunner.pl [number of strings to generate] [max length] [-v] < rules-file.txt
#
# If specifying the -v switch (verbosity), you must include the first two 
# arguments (number of strings, max length). The verbosity flag will
# instruct the program to output the strings as they are found, as well as any
# ambiguous derivations.
# 
# Matt Sparks, 2007-03-07
use strict;
use Data::Dumper;

my %rules;

# read input
read_rules();

my $num_to_generate = shift @ARGV;
$num_to_generate ||= 100;

my $max_length = shift @ARGV;
$max_length ||= 50;

# print settings
print STDERR ">>> Using rules:\n";
print_rules();

printf STDERR ">>> Strings to generate: %d\n", $num_to_generate;
printf STDERR ">>> Max string length: %d\n", $max_length;

my %strings;
my %ambig;

while (keys %strings < $num_to_generate) {
    my($str,$deriv) = derive("S", ["S"], $max_length);
    next if $str == -1;

    if ($ARGV[0] eq "-v" && !$strings{$str}) { 
        printf STDERR "%d: %s\n", scalar(keys(%strings))+1, $str;
        #print Dumper $deriv;
    } elsif ($ARGV[0] eq "-v" && $strings{$str}) {
        if (acmp($deriv,$strings{$str}) != 0 && !$ambig{$str}) {
            printf STDERR "Grammar is ambiguous (%d)\n", scalar(keys(%ambig))+1;
            print STDERR return_derivation($deriv)."\n";
            print STDERR return_derivation($strings{$str})."\n";
            $ambig{$str} = 1;
            #exit;
        }
    }       

    $strings{$str} = $deriv;
}

for (sort keys %strings) {
    print "$_\n";
}

sub add_rule
{
    my($variable,@deriv) = @_;

    $rules{$variable}->{$_} = 1 for @deriv;
}

sub read_rules
{
    while (<STDIN>) {
        s/\s//g; # remove all whitespace
        s/\#.*?$//;
        my($variable,$deriv_str) = split /\-\>/;
        my @rules = split /\|/, $deriv_str;

        for (@rules) {
            s/epsilon//;
            add_rule($variable, $_);
        }
    }
}

sub print_rules
{
    for my $var (keys %rules) {
        my @var_rules = keys %{$rules{$var}};
        my $str = join(" | ", @var_rules);
        printf STDERR "$var -> %s\n", $str;
    }    
}

sub derive
{
    my($string, $derivsteps, $max_length) = @_;

    if ($string !~ /[A-Z]/) {
        #print STDERR "\n";
        return -1 if ($max_length > 0 && length $string > $max_length);

        # no variables left to derive
        return ($string, $derivsteps);
    }

    # still not derived and we're at max length (or so...)
    if ($max_length > 0 && length $string > $max_length**2) {
        #print STDERR "\n";
        return -1;
    }

    # find left-most variable
    my($var) = $string =~ /([A-Z])/;
    my $deriv = rand_deriv($var);
    #print "picked random rule: $var -> '$deriv'\n";

    my $new = $string;
    $new =~ s/$var/$deriv/; # replace it.
    #print STDERR "=> $new \n";

    push @$derivsteps, $new;

    return derive($new, $derivsteps, $max_length);
}

sub rand_deriv
{
    my($variable) = @_;

    my @derivs = keys %{$rules{$variable}};
    return $derivs[int(rand(scalar @derivs))];
}

sub acmp
{
    my($a1,$a2) = @_;
    my $a1_size = @$a1;

    return -1 if @$a1 < @$a2;
    return 1 if @$a2 > @$a1;

    for (0..$a1_size-1) {
        return -1 if $a2->[$_] ne $a1->[$_];
    }

    return 0;
}

sub return_derivation
{
    my($steps) = @_;
    return join(" => ",@$steps);
}
