#!/usr/bin/perl -w
# Kassandra v0.1.0b
# Copyright (C) 2001 Gunter Kuhnle
# Script to analyse a set of mass spectra generated by a SampleControl
# run. Kassandra distributes the spectra files according to the
# information obtained by the logfile generated by SampleControl
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
# 02111-1307, USA.
# Author contact: gunter.kuhnle@web.de
#
# Instructions
#
# kassandra.pl [filenames]
#
# [filenames] is the list of mass spectra files. Furthermore,
# kassandra requires the logfile of the SampleControl run (named
# '00logfile.txt') and a mass table named "masses.dat" with name and
# mass of each compound.
#
# Kassandra creates directories for each assay (according to the
# sample name in the logfile) and distributes the data in the
# appropriate directory. The peak heights are determined applying a
# simple peak searching algorithm with the masses provided by the mass
# table and the relative intensity of each compound is calculated.
#
# Time/intensity-tables for each compound are generated and written in
# the respective directory.
#
# Kassandra requires mass spectra and logfiles in the unix text
# format. The mac2unix perlscript may be used to convert the Mac
# textfiles generated by Multiview in the appropriate format.
# Declaration of Subroutines
sub Welcome;
sub ReadLogfile;
sub ScanLogfile;
sub CreateResultfiles;
sub DistributeRawdata;
sub CreatePlotfiles;
sub ReadMassfile;
sub GeneratePlotdata;
my $debug_info;
$debug_info = 9; # 9: verbose, 0: none
my $logfile;
$logfile = "00logfile.txt";
my @acq_data; # Sample name of each datafile
my @time_data; # Time of each datafile
my @data_type;
my $deviation;
$deviation = 0.001; # Deviation from calc. Mass
# Must be adjusted!
my @datafile_names;
my %converter;
my $mass_datafile;
$mass_datafile = "masses.dat"; # Mass table filename
@datafile_names = @ARGV;
Welcome; # Welcome message
ReadMassfile; # Read masstable
ReadLogfile; # Read logfile
ScanLogfile; # Process logfile
CreateResultfiles; # Create the Resultfile
DistributeRawdata; # Distribute the raw data files in the
# appropriate directories
CreatePlotfiles; # Create plotfiles
GeneratePlotdata; # Write plotfiles
# -------------------------------------------------------------------
# Declaration of Subroutines
# -------------------------------------------------------------------
# -------------------------------------------------------------------
sub Welcome
{
# Print Welcome Message
print "\nKassandra v0.0.1\n";
print "================\n\n";
print "Programm to analyse a set of mass spectra generated\n";
print "by an SampleControl run. (c)2001 gk\n\n";
print "# Debug Level $debug_info\n" if ($debug_info);
}
sub ReadLogfile
{
# Read the SampleControl logfile and store the data in @acq_data
# and in @time_data.
print "# Call of ReadLogFile with $logfile\n"
if ($debug_info > 5);
local *LOGFILE;
local $seconds; local $h; local $m; local $s;
local $dummy;
local $noon_passed;
local @input_line;
local $_;
local $firsttime; local $lasttime;
$#acq_data = -1;
$#time_data = -1;
$firsttime = 0;
$lasttime = 0;
$noon_passed = 0;
open(LOGFILE, $logfile) or die "Can't open logfile $logfile\n";
while (<LOGFILE>)
{
chop;
if ($_ =~ /Started/)
{
@input_line = split(/ /, $_);
push @acq_data, $input_line[10];
($h,$m,$s) = split(/:/, $input_line[5]);
($s,$dummy) = split(/\./, $s);
$seconds = $h*3600+$m*60+$s;
if ($firsttime == 0) { $firsttime = $seconds };
$seconds = $seconds - $firsttime;
# As the time format is in 12h format, we have to look
# wether we went over 12/24h
if ($lasttime > $seconds) {$noon_passed=1};
$seconds = $seconds + $noon_passed*12*3600;
$lasttime = $seconds;
push @time_data, $seconds;
}
}
close (LOGFILE);
}
# End of Sub ReadLogfile
sub ScanLogfile
{
# Scan Logfile for the reaction/data types acquired
print "# Call of ScanLogfile with $#acq_data+1 entries!\n"
if ($debug_info > 5);
local $read_data;
local %already_seen;
$#data_type = -1;
foreach $read_data (@acq_data)
{
undef %already_seen;
for (@data_type) {$already_seen{$_} = 1;}
if ($already_seen{$read_data}) {}
else
{
push @data_type, $read_data;
print "# $read_data pushed\n" if ($debug_info > 8);
}
}
print "# $#data_type Data Types found!\n" if ($debug_info);
}
sub DistributeRawdata
{
# Distribute raw data files to specific directories
local $iterator;
local $read_data;
local $_;
local *TIMEFILE;
# Create Directories
foreach (@data_type)
{
mkdir $_, 0750;
mkdir "$_/rawdata", 0750;
mkdir "$_/process", 0750
}
$iterator = 1;
foreach $read_data (@acq_data)
{
if ($iterator < 10)
{ rename ("0$iterator.txt", "$read_data/rawdata/0$iterator.txt");
rename ("0$iterator.result", "$read_data/process/0$iterator.result");
open(TIMEFILE, "> $read_data/process/0$iterator.time");
print TIMEFILE "$time_data[$iterator-1]\n";
close(TIMEFILE);
}
else
{ rename ("$iterator.txt", "$read_data/rawdata/$iterator.txt");
rename ("$iterator.result", "$read_data/process/$iterator.result");
open(TIMEFILE, "> $read_data/process/$iterator.time");
print TIMEFILE "$time_data[$iterator-1]\n";
close(TIMEFILE);
}
$iterator++;
}
}
# CreateResultfiles
sub CreateResultfiles
{
local $olddata;
local $_;
local $output_filename;
local $fn;
local $filecounter;
local $iterator;
local $iterator_end;
local $dummy;
local $guess;
local @datafile;
local $datafile_mass;
local $start_mass;
local $end_mass;
local $increment;
local $high_intensity;
local $intensity;
local $high_intensity_mass;
print "# Call of create result files\n" if ($debug_info > 5);
if ($#datafile_names < 0)
{ die "Insufficient information. Please, specify filename!\n";}
print "# Called with $#datafile_names files.\n" if ($debug_info > 5);
$filecounter=0;
print "# Starting to process $#datafile_names files.\n"
if ($debug_info > 9);
# Start to process file(s) given by command line parameters
foreach $fn (@datafile_names)
{
$filecounter++;
print "# Processing file $fn, $filecounter/$#datafile_names ... "
if ($debug_info);
($output_filename,$dummy) = split(/\./,$fn);
unlink ("$output_filename.result");
# Read file in @datafile
open(DATAFILE, "$fn") or die "Can't open data file $fn!\n";
undef @datafile;
while (<DATAFILE>) { push(@datafile,$_);}
close(DATAFILE);
($start_mass,$dummy) = split(/\s/,$datafile[0]);
($end_mass,$dummy) = split(/\s/,$datafile[$#datafile]);
# Find the array-index near the compound mass
foreach $compound (@keytable)
{
print "# Processing $compound\n" if ($debug_info > 9);
$guess = int( $#datafile/(($end_mass - $start_mass)/
($converter{$compound} - $start_mass)));
($datafile_mass, $dummy) = split(/\s/, $datafile[$guess]);
if ($datafile_mass < $converter{$compound})
{ $increment = 1; } else { $increment = -1; }
print "# $compound, $guess, $increment\n" if ($debug_info > 9);
while ($increment != 0)
{
$olddata = $datafile_mass;
($datafile_mass, $dummy) =
split(/\s/, $datafile[$guess+$increment]);
if (($olddata/$converter{$compound}) <
($datafile_mass/$converter{$compound}))
{ $increment = 0; } else
{
$guess = $guess + $increment;
if ($datafile_mass > $converter{$compound})
{ $increment = -1;} else { $increment = +1;}
}
}
# Move index to $deviation below compound mass
$iterator=$guess;
while (abs($datafile_mass-$converter{$compound})/
$converter{$compound} < $deviation)
{
$iterator--;
($datafile_mass, $dummy) = split(/\s/, $datafile[$iterator]);
}
$iterator_end = ($guess-$iterator)+$guess;
$high_intensity = 0;
while ($iterator <= $iterator_end)
{
($datafile_mass, $intensity) =
split(/\s/, $datafile[$iterator]);
if ($intensity > $high_intensity)
{
$high_intensity = $intensity;
$high_intensity_mass = $datafile_mass;
}
$iterator++
}
open(OUTPUT, ">> $output_filename.result");
print OUTPUT "$compound\t$high_intensity_mass\t$high_intensity\n";
close(OUTPUT);
}
print "done!\n" if ($debug_info);
}
}
sub CreatePlotfiles
{
# Create Plotfiles
local $iterator;
local $comp; local $mass; local $intensity;
local %result_data;
local $_;
local $time;
local *TIMEFILE;
local *RESULT;
local *PLOTFILE;
local $sum;
# Create Directories
foreach (@data_type)
{
open (PLOTFILE, "> $_/$_.data") or die "Can't open $_/$_.data!\n";
print PLOTFILE "# Plotfile for $_\n# Created by Kassandra\n";
print PLOTFILE "# time\t";
foreach $compound (@keytable)
{
print PLOTFILE "$compound\t";
}
print PLOTFILE "\n";
close (PLOTFILE);
}
$iterator = 1;
foreach $read_data (@acq_data)
{
open(PLOTFILE, ">> $read_data/$read_data.data")
or die "Can't open $read_data.data\n";
undef %result_data;
if ($iterator < 10)
{
open(TIMEFILE, "$read_data/process/0$iterator.time")
or die "Can't open $read_data/process/0$iterator.time\n";
open(RESULT, "$read_data/process/0$iterator.result")
or die "Can't open $read_data/process/0$iterator.result\n";
}
else
{
open(TIMEFILE, "$read_data/process/$iterator.time")
or die "Can't open $read_data/process/$iterator.time\n";
open(RESULT, "$read_data/process/$iterator.result")
or die "Can't open $read_data/process/$iterator.result\n";
}
while (<RESULT>)
{
chop;
($comp,$mass,$intensity) = split(/\s/, $_);
$result_data{$comp} = $intensity;
}
$sum = 0;
foreach (keys %result_data)
{
$sum = $sum + $result_data{$_};
}
while (<TIMEFILE>)
{
chop;
$time = $_;
}
print PLOTFILE "$time\t";
foreach $compound (@keytable)
{ printf PLOTFILE "%.5f\t",$result_data{$compound}/$sum; }
close(RESULT);
close(TIMEFILE);
print PLOTFILE "\n";
close(PLOTFILE);
$iterator++;
}
}
sub ReadMassfile
{
local $compound;
local $mass;
local *MASSFILE;
open(MASSFILE, $mass_datafile) or
die "Can't open Mass Data file $mass_datafile\n";
while (<MASSFILE>)
{
($compound, $mass) = split(/\s/, $_);
$converter{$compound} = $mass;
print "# Read $compound,$mass from $mass_datafile.\n"
if ($debug_info > 5);
}
# Generate Keytable, sorted by increasing mass
@keytable = sort {$converter{$a} cmp $converter{$b}} keys %converter;
print "# Keytable generated\n" if ($debug_info > 9);
}
# GeneratePlotdata
sub GeneratePlotdata
{
local $sample;
local $compound;
local @plotfile;
local $_;
local *PLOTFILE;
local *PLOTDATA;
local $readline;
local $outputline;
local $pointer;
foreach $sample (@data_type)
{
$#plotfile = -1;
open(PLOTFILE, "$sample/$sample.data")
or die "Can't open $sample/$sample.data!\n";
while (<PLOTFILE>)
{ if ($_ =~ /^#/) {} else { push @plotfile, $_; } }
close(PLOTFILE);
$pointer = 1;
foreach $compound (@keytable)
{
open(PLOTDATA, "> $sample/$compound")
or die "Can't open $sample/$compound!\n";
foreach $readline (@plotfile)
{
@outputline = split(/\s/, $readline);
print PLOTDATA "$outputline[0]\t$outputline[$pointer]\n";
}
close(PLOTDATA);
$pointer++;
}
}
}