use strict;
use warnings;
use Compress::Zlib;

my $field = $ARGV[0];
my $file = gzopen ("skydotID_$field.sql.gz", "rb");
my ($wtFactor, $minPoints, $percentile, $BadFlags) = (0.1, 50, 2, 15636);
my $statsid = 0;
my (@data, @sorted);

printf "#ID	Max	Avg	Min	N	Pct%02d	Median	Pct%02d	Stdev	Skewness	Kurtosis	MSSD	EA1	EA2	EA3\n", $percentile, 100 - $percentile;


# Open the file with the HJD for each frame
my $FrameFile = "Skydot-Frames.bin";
(open FRAME, $FrameFile) || (die "File $FrameFile not found!");
binmode FRAME;


sub HJD
{	# Returns HJD - 2450000
	my $frame = $_[0];
	seek FRAME, 3 * ($frame - 1), 0;
	my $buffer;
	read FRAME, $buffer, 3;
	my ($b2, $b1) = unpack("SC", $buffer);
	my $x = ($b2 << 8) | $b1;
	return 1000 + $x/10000;
}


sub createdatafile
{	# Create a data file
	open DAT, ">NSVS$statsid.dat";

	for my $rec (@data)
	{
		printf DAT "%.4f\t%.3f\t%.3f\t%s\n", HJD($rec->{frame}), $rec->{mag}, $rec->{err}/1000, $rec->{flags};
	}
	close DAT;
}


sub stdevrange
{	# Calculate standard deviation on a given range
	my ($from, $to) = @_;
	my ($sumwt, $sumx, $sumx2) = (0, 0, 0);
	for my $i (0..@data-1)
	{
		my ($w, $m) = ($data[$i]->{wt}, $data[$i]->{mag});
		next if ($m > $to);
		next if ($m < $from);
		$sumwt += $w;
		$sumx += $w * $m;
		$sumx2 += $w * $m * $m;
	}
	return sqrt($sumx2/$sumwt - ($sumx/$sumwt)**2);
}


sub medianerr
{	# Returns the median uncertainty in a given range
	my ($from, $to) = @_;
	my @errsorted = sort { $a->{err} <=> $b->{err} } map { $sorted[$_] } ($from..$to);
	return $errsorted[int(($to - $from)/2)]->{err};
}

sub stats
{
	my $count = @data;

	# Only do the calculations when there are enough data points
	return if ($count < $minPoints);

	@sorted = sort { $a->{mag} <=> $b->{mag} } @data;  # Sort the series on magnitude
	my ($max, $min) = ($sorted[0]->{mag}, $sorted[$count-1]->{mag});
	my $pct = int($count * $percentile/100 + 0.5);
	my ($pctfirst, $pctlast) = ($sorted[$pct]->{mag}, $sorted[$count-1-$pct]->{mag});
	my ($median, $quartile) = ($sorted[int($count/2)]->{mag}, $sorted[int(3*$count/4)]->{mag});

	# Calculate average etc., but skip everything that falls outside of ($pctfirst, $pctlast)
	my ($cnt, $summag, $sumwt, $ssd, $sumssdwt) = (0, 0, 0, 0, 0, 0);
	my $prev;
	for my $i (0..$count-1)
	{
		my ($w, $m) = ($data[$i]->{wt}, $data[$i]->{mag});
		next unless (($m >= $pctfirst) && ($m <= $pctlast));
		$cnt++;
		$summag += $w * $m;
		$sumwt += $w;
		if (defined $prev)
		{
			my $ssdwt = sqrt($w * $w + ($data[$prev]->{wt})**2);
			$sumssdwt += $ssdwt;
			$ssd += $ssdwt * ($m - $data[$prev]->{mag})**2;
		}
		$prev = $i;
	}

	my $avg = $summag/$sumwt;

	my ($sum2, $sum3, $sum4) = (0, 0, 0);
	for my $i (0..$count-1)
	{
		my ($w, $m) = ($data[$i]->{wt}, $data[$i]->{mag});
		next unless (($m >= $pctfirst) && ($m <= $pctlast));
		$m -= $avg;
		$sum2 += $w * $m**2;
		$sum3 += $w * $m**3;
		$sum4 += $w * $m**4;
	}

	my $variance = $cnt * $sum2/$sumwt/($cnt - 1);
	my $stdev = sqrt($variance);
	my $skewness = $sum3/$sumwt/$stdev/$variance;
	my $kurtosis = $sum4/$sumwt/$variance/$variance;
	my $mssd = 1 - $ssd/$sumssdwt/$variance/2;

	my $eaindex1 = ($pctlast - $median)/($median - $pctfirst);
	my $eaindex2 = stdevrange($median, $pctlast)/stdevrange($pctfirst, $median);
	my $eaindex3 = $eaindex2 * medianerr(int($count/10), int($count/5))/medianerr($count - int($count/10) - 1, $count - 1);

	printf "$statsid\t%.2f\t%.2f\t%.2f\t%d\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\n", 
			$max, $avg, $min, 
			$count, $pctfirst, $median, $pctlast, $stdev, $skewness, $kurtosis, 
			$mssd, $eaindex1, $eaindex2, $eaindex3;

	# Now apply all the cuts
	return if ($eaindex1 < 2);
	return if ($eaindex2 < 2);
	return if ($eaindex3 < 0.8);
	return if ($mssd > 0.75);
	return if ($skewness < 0.8);

	createdatafile;
}


while ($file->gzreadline(my $line))
{
	chomp($line);
	my ($id, $frame, $x, $y, $mag, $emag, $flags) = split(';', $line);
	if ($id != $statsid)
	{
		stats if ($statsid);
		undef @data;
		$statsid = $id;
		print STDERR "$id " if ($id % 100 == 0);
	}
	next if ($flags & $BadFlags);

	my %rec = (mag => $mag/1000, wt => 1/($emag * $emag + $wtFactor), err => $emag, frame => $frame, flags => $flags);
	push @data, \%rec;
}

print STDERR "\n";

stats if ($statsid);

