#!/usr/bin/perl
#
# Script to estimate marker allele frequencies
# from the loki output.  Reuires the use of the
#   output frequency "freqfile" 
# command in the loki parameter file.  
#
# Usage: freq.pl -l -p ppoint -s spacing freqfile
#
# -l option outputs in format suitable for LINKAGE data files
# -p option sets calculation of confidence limits.
# e.g. -p 0.025 will give the 2.5% and 97.5% confidence limits.
# This requires more memory and will slow down the script.
# This option is ignored if -l option also set.
# -s sets the spacing of samples considered (default=1)
# -i sets the range of iterations to consider
#
# Works with loki_2.2 and above
# 
# Simon Heath - September 2000
#
use strict;
use Getopt::Std;
my($flag,@mkname,$nmk,%opt,@freq,@fd,$i,$j,$k,$n,$mk,$ngen,$line,%al);
my($idx,@ff,$ffr,@nall,@all,$nflag,$est_aff,$ct,$start,$stop);
getopts('lp:s:i:h?',\%opt);
if($opt{h} || $opt{'?'}) {
	 print "usage: freq.pl -l -p ppoint -s spacing freqfile\n";
	 exit(0);
}
undef $opt{p} if($opt{l});
die "percentage point for confidence limits must be between 0 and 1\n" if($opt{p}<0.0 || $opt{p}>1.0);
$opt{s}=1 if($opt{s}<1);
# -i option sets a range of iterations to consider
if($opt{i}) {
  my $tmp=$opt{i};
  if($tmp=~/^([1-9][0-9]*)(.*)/) {
    $start=$1;
	 $tmp=$2;
  }
  if($tmp=~/^[,-:](.*)/) {
    $tmp=$1;
	 if($tmp=~/^([1-9][0-9]*)/) {
	   $stop=$1;
	 }
  }
  die "Bad -i option\n" if(!defined($start) && !defined($stop));
  if(defined($start) && defined($stop) && $start>$stop) {
    $tmp=$start;
	 $start=$stop;
	 $stop=$tmp;
  }
  $start=1 if(!defined($start));
}
while(<>) {
	 $line++;
	 if($flag) {
		  split;
		  $j=@_;
		  next if(!$j);
		  $ct++;
		  next if(($ct-1)%$opt{s});
		  next if($_[0]<$start);
		  last if(defined($stop) && $_[0]>$stop);
		  $idx=1;
		  for($mk=0;$mk<$nmk;$mk++) {
				my @tmp;
				for($i=0;$i<$nall[$mk]-$nflag;$i++) {
					 for($k=0;$k<$ngen+$est_aff;$k++) {
						  die "Not enough columns at line $line\n" if($idx>=$j);
						  $tmp[$k]+=$_[$idx];
						  if($opt{p}) {
								$ff[$mk][$i][$k][$n]=$_[$idx];
						  }
						  $freq[$mk][$i][$k]+=$_[$idx++];
					 }
				}
				if($nflag) {
					 for($k=0;$k<$ngen+$est_aff;$k++) {
						  if($opt{p}) {
								$ff[$mk][$i][$k][$n]=1.0-$tmp[$k];
						  }
						  $freq[$mk][$i][$k]+=1.0-$tmp[$k];
					 }
				}
		  }
		  $n++;
	 } else {
		  if(/^---*$/) {$flag=1;}
		  elsif(/^\d+ (\S+): (.*)$/) {
				$mkname[$nmk]=$1;
				@fd=split ' ',$2;
				$nall[$nmk]=@fd;
				for($i=0;$i<$nall[$nmk];$i++) {
					 $all[$nmk]{$i}=$fd[$i];
				}
				if($fd[$i-1]=~/\((.*)\)/) {
					 $nflag=1;
					 $all[$nmk]{$i-1}=$1;
				}
				$nmk++;
		  } elsif(/No. genetic groups: (\d+)/) {
				$ngen=$1;
		  } elsif(/Estimating allele frequencies amongst affecteds/) {
				$est_aff=1;
		  }
	 }
}
if($n) {
	 for($mk=0;$mk<$nmk;$mk++) {
		  print "3 $nall[$mk] # $mkname[$mk]" if($opt{l});
		  print "\n";
		  $j=$all[$mk];
		  %al=%$j;
		  foreach $i(sort {$al{$a}<=>$al{$b}} keys %al) {
				if($opt{l}) {
					 printf "%.6f ",$freq[$mk][$i][0]/$n;
				} else {
					 printf "%-10s",$mkname[$mk];
					 printf " %-8s",$al{$i};
					 for($k=0;$k<$ngen+$est_aff;$k++) {
						  printf " %.4f",$freq[$mk][$i][$k]/$n;
						  if($opt{p}) {
								$ffr=$ff[$mk][$i][$k];
								my @tmp=sort{$a<=>$b}@$ffr;
								printf " (%.4f-%.4f) ",$tmp[$n*$opt{p}],$tmp[$n*(1.0-$opt{p})];
						  }
					 }
				}
				print "\n" unless $opt{l};
		  }
		  print "\n\n" if($opt{l});
	 }
}
