#!/usr/bin/perl -w

# Kassandra v0.1.0b
# Copyright (C) 2001  Gunter Kuhnle

# Script to analyse a set of mass spectra generated by a SampleControl
# run. Kassandra distributes the spectra files according to the
# information obtained by the logfile generated by SampleControl

# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
# 02111-1307, USA.

# Author contact: gunter.kuhnle@web.de



# 
# Instructions 
#
# kassandra.pl [filenames]
#
# [filenames] is the list of mass spectra files. Furthermore,
# kassandra requires the logfile of the SampleControl run (named
# '00logfile.txt') and a mass table named "masses.dat" with name and
# mass of each compound.
#
# Kassandra creates directories for each assay (according to the
# sample name in the logfile) and distributes the data in the
# appropriate directory. The peak heights are determined applying a
# simple peak searching algorithm with the masses provided by the mass
# table and the relative intensity of each compound is calculated.
#
# Time/intensity-tables for each compound are generated and written in
# the respective directory. 
#
# Kassandra requires mass spectra and logfiles in the unix text
# format. The mac2unix perlscript may be used to convert the Mac
# textfiles generated by Multiview in the appropriate format.



# Declaration of Subroutines

sub Welcome;
sub ReadLogfile;
sub ScanLogfile;
sub CreateResultfiles;
sub DistributeRawdata;
sub CreatePlotfiles;
sub ReadMassfile;
sub GeneratePlotdata;

my $debug_info;	

$debug_info = 9; # 9: verbose, 0: none

my $logfile;
$logfile = "00logfile.txt";


my @acq_data;	# Sample name of each datafile
my @time_data;	# Time of each datafile
my @data_type; 

my $deviation;
 $deviation = 0.001; # Deviation from calc. Mass
 		     # Must be adjusted!

my @datafile_names;

my %converter;
my $mass_datafile;
 $mass_datafile = "masses.dat"; # Mass table filename

@datafile_names = @ARGV;



Welcome;		# Welcome message
ReadMassfile; 		# Read masstable
ReadLogfile;	        # Read logfile
ScanLogfile;		# Process logfile
CreateResultfiles;	# Create the Resultfile
DistributeRawdata; 	# Distribute the raw data files in the
			# appropriate directories
CreatePlotfiles;        # Create plotfiles
GeneratePlotdata;	# Write plotfiles

# -------------------------------------------------------------------
# Declaration of Subroutines
# -------------------------------------------------------------------
# -------------------------------------------------------------------

sub Welcome
{
    # Print Welcome Message

    print "\nKassandra v0.0.1\n";
    print "================\n\n";

    print "Programm to analyse a set of mass spectra generated\n";
    print "by an SampleControl run. (c)2001 gk\n\n";
    print "# Debug Level $debug_info\n" if ($debug_info);
}


sub ReadLogfile
{
    # Read the SampleControl logfile and store the data in @acq_data
    # and in @time_data.

    print "# Call of ReadLogFile with $logfile\n"
             if ($debug_info > 5);

    local *LOGFILE;
    local $seconds; local $h; local $m; local $s;
    local $dummy;
    local $noon_passed;
    local @input_line;
    local $_;
    local $firsttime; local $lasttime;
  
    $#acq_data = -1;
    $#time_data = -1;
    $firsttime = 0;
    $lasttime = 0;
    $noon_passed = 0;
  
    open(LOGFILE, $logfile) or die "Can't open logfile $logfile\n";
    while (<LOGFILE>)
    {
     	chop;
     	if ($_ =~ /Started/) 
     	{ 
       	    @input_line = split(/ /, $_);
       	    
	    push @acq_data, $input_line[10];
    
            ($h,$m,$s) = split(/:/, $input_line[5]);
            ($s,$dummy) = split(/\./, $s);
            $seconds = $h*3600+$m*60+$s;

            if ($firsttime == 0) { $firsttime = $seconds };

            $seconds = $seconds - $firsttime;
    
            # As the time format is in 12h format, we have to look 
	    # wether we went over 12/24h
    
            if ($lasttime > $seconds) {$noon_passed=1};
            
	    $seconds = $seconds + $noon_passed*12*3600;
            $lasttime = $seconds;
	    
            push @time_data, $seconds;
    	}
    }
    close (LOGFILE);
}
# End of Sub ReadLogfile

sub ScanLogfile
{
    # Scan Logfile for the reaction/data types acquired

    print "# Call of ScanLogfile with $#acq_data+1 entries!\n"
             if ($debug_info > 5);
    
    local $read_data;
    local %already_seen;
    
    $#data_type = -1;

    foreach $read_data (@acq_data)
    { 
        undef %already_seen;
    	for (@data_type) {$already_seen{$_} = 1;}
 
        if ($already_seen{$read_data}) {} 
        else 
	{
	    push @data_type, $read_data;
	    print "# $read_data pushed\n" if ($debug_info > 8);
	} 
    }
    
    print "# $#data_type Data Types found!\n" if ($debug_info);
}



sub DistributeRawdata
{
    # Distribute raw data files to specific directories

    local $iterator;
    local $read_data;
    local $_;
    local *TIMEFILE;

    # Create Directories
    foreach (@data_type) 
    {
    	mkdir $_, 0750; 
	mkdir "$_/rawdata", 0750;
	mkdir "$_/process", 0750
    }
    
    $iterator = 1;

    foreach $read_data (@acq_data)
    {
  	if ($iterator < 10) 
  	{ rename ("0$iterator.txt", "$read_data/rawdata/0$iterator.txt");
	  rename ("0$iterator.result", "$read_data/process/0$iterator.result");
	  open(TIMEFILE, "> $read_data/process/0$iterator.time");
	  print TIMEFILE "$time_data[$iterator-1]\n";
	  close(TIMEFILE);
	}
        else 
  	{ rename ("$iterator.txt", "$read_data/rawdata/$iterator.txt");
	  rename ("$iterator.result", "$read_data/process/$iterator.result");
	  open(TIMEFILE, "> $read_data/process/$iterator.time");
	  print TIMEFILE "$time_data[$iterator-1]\n";
	  close(TIMEFILE);
	}
    
  	$iterator++;
    }
}

# CreateResultfiles

sub CreateResultfiles
{
    local $olddata;
    local $_;
    local $output_filename;
    local $fn;
    local $filecounter;

    local $iterator;
    local $iterator_end;

    local $dummy;
    local $guess;

    local @datafile;
    local $datafile_mass;

    local $start_mass;
    local $end_mass;

    local $increment;

    local $high_intensity;
    local $intensity;
    local $high_intensity_mass;

    print "# Call of create result files\n" if ($debug_info > 5);

    if ($#datafile_names < 0)
     { die "Insufficient information. Please, specify filename!\n";}

    print "# Called with $#datafile_names files.\n" if ($debug_info > 5);


    $filecounter=0;

    print "# Starting to process $#datafile_names files.\n"
        if ($debug_info > 9);

    # Start to process file(s) given by command line parameters

    foreach $fn (@datafile_names)
    {
     	$filecounter++;
  	
	print "# Processing file $fn, $filecounter/$#datafile_names ... "
	   if ($debug_info);

  	($output_filename,$dummy) = split(/\./,$fn);
	unlink ("$output_filename.result");

  	# Read file in @datafile
  	open(DATAFILE, "$fn") or die "Can't open data file $fn!\n";
 	undef @datafile;
  	   while (<DATAFILE>) { push(@datafile,$_);}
  	close(DATAFILE);


        ($start_mass,$dummy) = split(/\s/,$datafile[0]);
        ($end_mass,$dummy) = split(/\s/,$datafile[$#datafile]);

  	# Find the array-index near the compound mass
  	foreach $compound (@keytable)
  	{
	    print "# Processing $compound\n" if ($debug_info > 9);
	    
    	    $guess = int( $#datafile/(($end_mass - $start_mass)/
                          ($converter{$compound} - $start_mass)));
           
	    ($datafile_mass, $dummy) = split(/\s/, $datafile[$guess]);

	    if ($datafile_mass < $converter{$compound})
                { $increment = 1; } else { $increment = -1; }
    
    	    print "# $compound, $guess, $increment\n" if ($debug_info > 9);
	    
    	    while ($increment != 0)
            {  
        	$olddata = $datafile_mass;

        	($datafile_mass, $dummy) = 
		      split(/\s/, $datafile[$guess+$increment]);

        	if (($olddata/$converter{$compound}) <
	    	    ($datafile_mass/$converter{$compound}))
	           { $increment = 0; } else
	           { 
		       $guess = $guess + $increment;
	  
	               if ($datafile_mass > $converter{$compound})
	   	       { $increment = -1;} else { $increment = +1;}
		   }
	    }
           
	    # Move index to $deviation below compound mass

    	    $iterator=$guess;

    	    while (abs($datafile_mass-$converter{$compound})/
                       $converter{$compound} < $deviation)
    	    { 
	        $iterator--;
                ($datafile_mass, $dummy) = split(/\s/, $datafile[$iterator]);
            }

            $iterator_end = ($guess-$iterator)+$guess;
            $high_intensity = 0;
    
            while ($iterator <= $iterator_end)
            {
                ($datafile_mass, $intensity) = 
			split(/\s/, $datafile[$iterator]);
                if ($intensity > $high_intensity)
                {
        	    $high_intensity = $intensity;
		    $high_intensity_mass = $datafile_mass;
                }
                $iterator++
            }
  
            
            open(OUTPUT, ">> $output_filename.result");
            print OUTPUT "$compound\t$high_intensity_mass\t$high_intensity\n";
            close(OUTPUT);
        }   
        print "done!\n" if ($debug_info);
    }
}


sub CreatePlotfiles
{
    # Create Plotfiles

    local $iterator;
    local $comp; local $mass; local $intensity;
    local %result_data;
    local $_;
    local $time;
    local *TIMEFILE;
    local *RESULT;
    local *PLOTFILE;
    local $sum;

    # Create Directories
    foreach (@data_type) 
    { 
        open (PLOTFILE, "> $_/$_.data") or die "Can't open $_/$_.data!\n";
	print PLOTFILE "# Plotfile for $_\n# Created by Kassandra\n";

        print PLOTFILE "# time\t";

        foreach $compound (@keytable)
	{
	   print PLOTFILE "$compound\t";
	}

	print PLOTFILE "\n";
	
	close (PLOTFILE);
    }
    
    $iterator = 1;

    foreach $read_data (@acq_data)
    {
        open(PLOTFILE, ">> $read_data/$read_data.data") 
	   or die "Can't open $read_data.data\n";
    
        undef %result_data;
	
  	if ($iterator < 10) 
  	{ 
          open(TIMEFILE, "$read_data/process/0$iterator.time") 
	    or die "Can't open $read_data/process/0$iterator.time\n";
	  open(RESULT, "$read_data/process/0$iterator.result")
	    or die "Can't open $read_data/process/0$iterator.result\n";
	}
        else 
  	{
          open(TIMEFILE, "$read_data/process/$iterator.time") 
	    or die "Can't open $read_data/process/$iterator.time\n";
	  open(RESULT, "$read_data/process/$iterator.result")
	    or die "Can't open $read_data/process/$iterator.result\n";
	}

        while (<RESULT>)
	{
	  chop;
	  ($comp,$mass,$intensity) = split(/\s/, $_);
	  $result_data{$comp} = $intensity;
	}
       
        $sum = 0;

	foreach (keys %result_data)
	{
	  $sum = $sum + $result_data{$_};
	}

        while (<TIMEFILE>)
	{
	  chop;
	  $time = $_;
	}
 	
	print PLOTFILE "$time\t";

	foreach $compound (@keytable)
	  { printf PLOTFILE "%.5f\t",$result_data{$compound}/$sum; }
	  
        close(RESULT);
	close(TIMEFILE);

	print PLOTFILE "\n";
        close(PLOTFILE);
  	$iterator++;
    }

}

sub ReadMassfile
{
    local $compound;
    local $mass;

    local *MASSFILE;
    open(MASSFILE, $mass_datafile) or 
          die "Can't open Mass Data file $mass_datafile\n";
    while (<MASSFILE>)
    {
  	($compound, $mass) = split(/\s/, $_);
  	$converter{$compound} = $mass;

	print "# Read $compound,$mass from $mass_datafile.\n" 
	    if ($debug_info > 5);
    }
    # Generate Keytable, sorted by increasing mass
    @keytable = sort {$converter{$a} cmp $converter{$b}} keys %converter;
    print "# Keytable generated\n" if ($debug_info > 9);
}

# GeneratePlotdata

sub GeneratePlotdata
{
    local $sample;
    local $compound;
    local @plotfile;
    local $_;
    local *PLOTFILE;
    local *PLOTDATA;
    local $readline;
    local $outputline;
    local $pointer;

    foreach $sample (@data_type)
    {
        $#plotfile = -1;
	
    	open(PLOTFILE, "$sample/$sample.data")
	   or die "Can't open $sample/$sample.data!\n";
        while (<PLOTFILE>)
	{ if ($_ =~ /^#/) {} else { push @plotfile, $_; } }
	close(PLOTFILE);

	$pointer = 1;

	foreach $compound (@keytable)
	{
	    open(PLOTDATA, "> $sample/$compound")
	          or die "Can't open $sample/$compound!\n";
           
	    
	   
	    foreach $readline (@plotfile)
	    {
		@outputline = split(/\s/, $readline);
	        print PLOTDATA "$outputline[0]\t$outputline[$pointer]\n";
	    }
	    close(PLOTDATA);
            $pointer++;
        }
    }
}