#!/usr/bin/perl
#
#	cpm-v5:	a prototype "C with pattern matching" to C translator..
#
#	This fifth version of our tool reads every line of input,
#	identifies %when, %assert, %while and %foreach lines,
#	copies all other lines to stdout unchanged,
#	and then parses each % line into it's pieces.
#	It reads the Datadec metadata file (that datadec-1.3 can now
#	can now generate via datadec -m) in order to determine the
#	types of shape parameters.
#
#	So all shape parameter types in the input .cpm file MUST BE REMOVED.
#
#	(C) June 2018, Duncan C. White
#

use strict;
use warnings;
use 5.010;
use Data::Dumper;
use Function::Parameters;


#
# my %types = read_datadec_metadata( $filename );
#	Read the datadec-generated metadata file $filename, listing types,
#	shapes and a list of the parameter types that shape takes.
#	Build and return %types, a mapping from type_shape to the comma
#	separated list of typenames.
#
fun read_datadec_metadata( $filename )
{
	open( my $fh, '<', $filename ) ||
		die "can't open datadec-generated $filename\n";
	my %result;
	while( <$fh> )
	{
		chomp;
		s/\s+/_/;
		my( $ts, $paramtypes ) = split( /\s+/ );
		$result{$ts} = $paramtypes;
		#print "ts=$ts, types=$paramtypes\n";
	}
	close( $fh );
	return %result;
}


my %istype;		# set of datadec types that exist
my %shapetypes;		# mapping from type_shape -> list of parameter types
my $infh;		# fd of current C+PM file we're translating
my $lineno = 0;		# current line no inside $infh
my $currline;		# current line (set by nextline(), used in fatal())


#
# my $line = nextline();
#	Read the next line from $infh, incrementing $lineno afterwards,
#	and return it.
#
fun nextline()
{
	my $line = <$infh>;
	$currline = $line;
	$lineno++;
	return $line;
}


#
# fatal( $whatsleft, $msg );
#	Given $whatsleft (a suffix of $currline) and a message $msg, print
#	a standard-formatted fatal error, pointing to the correct place in
#	the line (using length(currline) - length(whatsleft) as the basic
#	source of indentation information), and die.
#
fun fatal( $whatsleft, $msg )
{
	$currline =~ s/^\t/        /;		# expand tabs to spaces
	$whatsleft =~ s/^\t/        /;
	my $pos = length($currline) - length($whatsleft) - 1;
	my $indent = ' ' x $pos;
	my $err = "$currline$indent^ Error at line $lineno: $msg\n";
	die "\n$err\n";
}


#
# my( $command, $type, $var, $shape, $arglist ) = parse_match( $line );
#	After checking that $line starts with a %when|%assert|%while, parse
#	the rest of the line.
#	If it parses return (command, type, var, shape, arglist)
#	otherwise die via fatal()
#
#	'%when|%assert|%while' TYPE(ID) VAR(ID) 'is' SHAPE(ID) ( '(' ARGLIST ')' )
#	where ARGLIST is a comma separated list of ['-'] paramnames,
#	where each parameter name is an ID.
#
fun parse_match( $line )
{
	$line =~ s/^(%\S+)\s*//;
	my $command = $1;
	$command =~ s/shape$//;	# %when or %whenshape etc.. reduce to %when
	my $sofar = $command;

	my $pretypeline = $line;
	fatal( $line, "ID (type name) expected after <<$sofar>>" )
		unless $line =~ s/^(\w+)\s+//;
	my $type = $1;
	$sofar .= " $type";

	# now check that the type actually exists:-)
	fatal( $pretypeline, "No such type <<$type>>" ) unless $istype{$type};

	fatal( $line, "ID (var name) expected after <<$sofar>>" )
		unless $line =~ s/^(\w+)\s+//;
	my $var = $1;
	$sofar .= " $var";

	fatal( $line, "'is' expected after <<$sofar>>" )
		unless $line =~ s/^is\s+//;
	$sofar .= " is";

	my $preshapeline = $line;
	fatal( $line, "ID (constructor name) expected after <<$sofar>>" )
		unless $line =~ s/^(\w+)\s*//;
	my $shape = $1;
	$sofar .= " $shape";

	# now check that the shape actually exists:-)
	fatal( $preshapeline, "No shape <<$shape>> in <<$type>>" )
		unless defined $shapetypes{"${type}_$shape"};

	# that may be all..
	return( $command, $type, $var, $shape, "" ) if $line =~ /^\s*$/;

	# or we need '(' arglist ')'
	fatal( $line, "'(' expected after <<$sofar>>" )
		unless $line =~ s/^\(\s*//;

	fatal( substr($line,-1,1), "')' expected at end of line" ) unless
		$line =~ s/\s*\)$//;

	# have an arglist left now.  should really check it's
	# syntactically valid but that's not really our business.
	return( $command, $type, $var, $shape, $line );
}


#
# my( $command, $type, $var, $paramvar ) = parse_foreach( $line );
#	After checking that $line starts with a %foreach, parse
#	the rest of the line.
#	If it parses return (command, type, var, paramvar)
#	otherwise die via fatal()
#
#	'%foreach' PARAMVAR 'in' TYPE VAR
#
fun parse_foreach( $line )
{
	$line =~ s/^(%\S+)\s*//;
	my $command = $1;
	$command =~ s/shape$//;	# reduce %foreachshape etc.. to %foreach

	my $sofar = $command;
	fatal( $line, "ID (var name) expected after <<$sofar>>" )
		unless $line =~ s/^(\w+)\s+//;
	my $paramvar = $1;
	$sofar .= " $paramvar";

	fatal( $line, "'in' expected after <<$sofar>>" )
		unless $line =~ s/^in\s+//;
	$sofar .= " in";

	my $pretypeline = $line;
	fatal( $line, "ID (type name) expected after <<$sofar>>" )
		unless $line =~ s/^(\w+)\s+//;
	my $type = $1;
	$sofar .= " $type";

	# now check that the type $type exists
	fatal( $pretypeline, "No such type <<$type>>" ) unless $istype{$type};

	# now check that $type is a list, ie. has a "cons" shape
	fatal( $pretypeline, "No such <<$type>> shape <<cons>>" )
		unless defined $shapetypes{"${type}_cons"};

	fatal( $line, "ID (var name) expected after <<$sofar>>" )
		unless $line =~ s/^(\w+)//;
	my $var = $1;

	fatal( $line, "end-of-line expected after <<$sofar>>" ) unless
		$line =~ s/^\s*$//;

	return( $command, $type, $var, $paramvar );
}


#
# my $breakdown = take_object_apart( $type, $var, $shape, $arglist );
#	Generate a single long line of C code that will take the object
#	apart, into it's component arguments:
#	- Declare variables for all arguments in $arglist unless they
#	  have a leading '-'.
#	- then call the get_{type}_{shape}() deconstructor with the
#	  addresses of each of the argument variables.
#	Return the take apart string..
#
fun take_object_apart( $type, $var, $shape, $arglist )
{
	# remove any and all whitespace
	$arglist =~ s/\s+//g;

	# if the shape has no arguments, then we don't need to take it apart
	return "" unless $arglist;

	# ok, we have one or more arguments, comma separated:
	my @arg = split(/,/, $arglist );

	# retrieve the shape types from %shapetypes
	my $types = $shapetypes{"${type}_$shape"};
	die "No such shape $shape of type $type\n" unless defined $types;
	my @type = split(/,/, $types );
	my $n = @type;
	my $has = @arg;

	die "type $type, shape $shape($types): should have $n parameters,".
	    " but has $has arguments ($arglist)\n"
	    	unless $n == $has;

	my $declns = "";
	foreach my $i (0..$#type)
	{
		$declns .= "$type[$i] $arg[$i]; " unless $arg[$i] =~ /^-/;
	}
	my $argstr = join( ', ', map { s/^-//; "&$_" } @arg );
	my $decons = "get_${type}_${shape}( $var, $argstr );";
	my $result = "$declns$decons\n";
	return $result;
}


#
# handle_when( $line, $indent, $ofh );
#	Ok, $line starts with a %when (still in the line), and we've already
#	removed any leading indentation (in $indent).  Handle the %when line
#	and it's following '{', printing valid C output to $ofh.
#
fun handle_when( $line, $indent, $ofh )
{
	my( $command, $type, $var, $shape, $arglist ) = parse_match( $line );

	# produce the %when comment line
	print $ofh "$indent// $line:\n";

	# produce the if-line
	my $test = "${type}_kind($var) == ${type}_is_${shape}";
	print $ofh "${indent}if( $test )\n";

	# get the next line, check that it's a bare '{', and print it out
	my $line = nextline();
	fatal( $line, "$command: { expected at eof" ) unless defined $line;
	fatal( $line, "$command: bare { expected at same indentation, " )
		unless $line =~ /^$indent\s*\{\s*$/;
	print $ofh $line;

	# then print out code to take the object apart
	my $takeapart = take_object_apart( $type, $var, $shape, $arglist );
	print $ofh "${indent}\t$takeapart" if $takeapart;
}


#
# handle_while( $line, $indent, $ofh );
#	Ok, $line starts with a %while (still in the line), and we've already
#	removed any leading indentation (in $indent).  Handle the %while line
#	and it's following '{', printing valid C output to $ofh.
#
fun handle_while( $line, $indent, $ofh )
{
	my( $command, $type, $var, $shape, $arglist ) = parse_match( $line );

	# produce the %while comment line
	print $ofh "$indent// $line:\n";

	# produce the while-line
	my $test = "${type}_kind($var) == ${type}_is_${shape}";
	print $ofh "${indent}while( $test )\n";

	# get the next line, check that it's a bare '{', and print it out
	my $line = nextline();
	fatal( $line, "$command: { expected at eof" ) unless defined $line;
	fatal( $line, "$command: bare { expected at same indentation, " )
		unless $line =~ /^$indent\s*\{\s*$/;
	print $ofh $line;

	# then print out code to take the object apart
	my $takeapart = take_object_apart( $type, $var, $shape, $arglist );
	print $ofh "${indent}\t$takeapart" if $takeapart;
}


#
# handle_foreach( $line, $indent, $ofh );
#	Ok, $line starts with a %foreach (still in the line), and we've already
#	removed any leading indentation (in $indent).  Handle the %foreach line
#	and it's following '{', printing valid C output to $ofh.
#
fun handle_foreach( $line, $indent, $ofh )
{
	my( $command, $type, $var, $paramvar ) = parse_foreach( $line );

	# produce the %foreach comment line
	print $ofh "$indent// $line:\n";

	# produce the while-line
	my $test = "${type}_kind($var) == ${type}_is_cons";
	print $ofh "${indent}while( $test )\n";

	# get the next line, check that it's a bare '{', and print it out
	my $line = nextline();
	fatal( $line, "$command: { expected at eof" ) unless defined $line;
	fatal( $line, "$command: bare { expected at same indentation, " )
		unless $line =~ /^$indent\s*\{\s*$/;
	print $ofh $line;

	# then print out code to take the object apart
	my $takeapart = take_object_apart( $type, $var,
		"cons", "$paramvar, -$var" );
	print $ofh "${indent}\t$takeapart" if $takeapart;
}


#
# handle_assert( $line, $indent, $ofh );
#	Ok, $line starts with a %assert (still in the line), and we've already
#	removed any leading indentation (in $indent).  Handle the %assert line,
#	printing valid C output to $ofh.
#
fun handle_assert( $line, $indent, $ofh )
{
	my( $command, $type, $var, $shape, $arglist ) = parse_match( $line );

	# produce the %assert comment line
	print $ofh "$indent// $line:\n";

	# produce the assert-line
	my $test = "${type}_kind($var) == ${type}_is_${shape}";
	print $ofh "${indent}assert( $test );\n";

	# then print out code to take the object apart
	my $takeapart = take_object_apart( $type, $var, $shape, $arglist );
	print $ofh "${indent}$takeapart" if $takeapart;
}


#
# handle_line( $line, $ofh );
#	handle $line [and if necessary, any subsequent lines of input],
#	print out whatever text is generated (or copied) to $ofh
#
fun handle_line( $line, $ofh )
{
	unless( $line =~ /^\s*%/ )
	{
		print $ofh "$line\n";
		return;
	}
	$line =~ s/^(\s*)//;
	my $indent = $1;
	print "debug: line is <<$line>> at line $lineno\n";

	if( $line =~ /^%when/ )
	{
		handle_when( $line, $indent, $ofh );
	} elsif( $line =~ /^%while/ )
	{
		handle_while( $line, $indent, $ofh );
	} elsif( $line =~ /^%assert/ )
	{
		handle_assert( $line, $indent, $ofh );
	} elsif( $line =~ /^%foreach/ )
	{
		handle_foreach( $line, $indent, $ofh );
	} else
	{
		fatal( $line, "%when/%while/%assert/%foreach expected" );
	}
}


die "Usage: cpm-v5 datadec-metadata-file filename\n" unless @ARGV == 2;
my $mdfilename = shift;
die "cpm: no datadec-generated metadata file $mdfilename, ".
    "run datadec -m to generate it\n" unless -f $mdfilename;

my $inputfilename = shift;
my $cfilename = $inputfilename;
$cfilename =~ s/pm$//;

open( $infh, '<', $inputfilename ) || die "cpm: can't open $inputfilename\n";

%shapetypes = read_datadec_metadata( $mdfilename );
#print Dumper \%shapetypes;
%istype = map { s/_.*$//; $_ => 1 } keys %shapetypes;
#print Dumper \%istype;

unlink( $cfilename );
open( my $cfh, '>', $cfilename ) || die "cpm: can't create $cfilename\n";

while( defined( $_ = nextline() ) )
{
	chomp;
	handle_line( $_, $cfh );
}
close( $infh );
close( $cfh );
