# $Author: mm238 $ $Revision: 1.1 $ $Date: 2003/09/05 10:23:02 $
#
# Copyright (C) 2002 MRC and Martin Madera
#
# This program may be freely redistributed and/or modified 
# under the terms of the GNU General Public License as published
# by the Free Software Foundation; either version 2 of the 
# License, or (at your option) any later version.
#
# --------------------------------------------------------------
#
# READING FILES
# =============
#
#    $model = HiddenMarkovModel->readSAM("file.mod");
#    $model = HiddenMarkovModel->readHMMER("file.hmm");
#    $model = HiddenMarkovModel->readPSIBLAST("file.psiblast");
#
# or 
#
#    $model = HiddenMarkovModel->clever_read("file.ext");
#
# which decides by itself which routine to call.
#
#
# WRITING FILES
# =============
#
#    $model->writeSAM("file.mod");
#    $model->writeHMMER("file.hmm");
#    $model->writePSIBLAST("file.psiblast");
#
# or
#
#    $model->clever_write("file.ext");
#
# which will work it out by itself.
#
#
# NB writePSIBLAST needs to know the seed sequence; you set this by
# doing 
#
#    $model->{seed} = "AASFASDFASDFASDF";
#
# or via
#
#    $model->read_seedseq("file.fasta");    
#
# before calling writePSIBLAST. If you don't do that, writePSIBLAST
# will figure out the highest-scoring sequence and will use that
# instead, and will save the sequence to file.psiblast.fa. Keep the 
# file, you will need it when using the profile.
# 
# WARNING: the PSI-BLAST binary format only works on x86 and alphas
# and architectures that store C floats in the same way
#
#
package HiddenMarkovModel;
use vars qw($DD $MD $ID $DM $MM $IM $DI $MI $II);
use strict;


##
## order of transition probabilities in the @{trans} array
##
($DD, $MD, $ID, $DM, $MM, $IM, $DI, $MI, $II) = (0..8);
 

##
## allocation etc.
##
sub new
{
    my ($pkg) = @_;
    my  $obj  = &clear({});
    bless $obj, $pkg;    
};

sub clear
{
    my ($obj) = @_;

    $obj->{trans}       = [];
    $obj->{matches}     = [];
    $obj->{inserts}     = [];
    $obj->{fims}        = [];
    $obj->{calibration} = "";
    $obj->{M}           = 0;

    return $obj;
};


##
## *** SAM ***
##
sub readBlock
{
    local *HANDLE = shift;  # file handle to read from
    my $filename = shift;   # name of the corresponding file
    my $cols = shift;       # number of columns to read
    my $rows = shift;       # number of rows to read
    my $array = [];         # array to return

    my ($i, @a);

    for $i (1 .. $rows)
    {
	@a = split /\s+/, <HANDLE>;
	die "Parsing error on line $. of $filename!\n..."
	    unless @a == $cols;

	push @{$array}, @a;
    };

    return $array;
};

sub writeBlock
{
    local *HANDLE = shift;  # file handle to write to
    my $array = shift;      # the array to write
    my $cols  = shift;      # ... number of columns
    my $rows  = shift;      # ... number of rows
    
    my ($i, $j);

    for $i (0 .. $rows-1) {
	for $j (0 .. $cols-1) {
	    printf HANDLE "%8.6f ", $array->[$i*$cols+$j];
	};
	print HANDLE "\n";
    };
};

sub pad
{
    my $len  = shift;    # pad $text so that the final length is $len
    my $text = shift;    # the text to pad
    return " "x($len-length $text) . $text;
};

sub readSAM
{
    # two ways of using this method:
    #
    # (a) $obj->readSAM("filename");
    #
    # (b) $obj = PackageXYZ->readSAM("filename");
    #
    my ($obj, $filename) = @_;

    my $m = -2;         # current position in the model
    my $last = 0;       # is this the last position?
    my $fim="";         # the fim line

    my ($array)=[];
    my (@a, $i);


    if( ref($obj) )
    {
	# $obj is a reference => (a) above
	$obj->clear();
    }
    else
    {
	# $obj not a reference => (b) above
	# (assume $obj is a package name)
	$obj = new $obj;	
    };


    open( SAM, "$filename" ) 
	or die "Cannot open $filename: $!";
    
    while(<SAM>)
    {
	$m=-1 if /^Begin/;
	if( /^TYPE/ ) { $fim = $_; next; };
	next unless $m >=-1;

	$m++;
	if( $m >0 )
	{
	    if ( /^End/ ) {
		$last=1;
	    } 
	    elsif( /^\s*(\S+)/ ) {
		die "Parsing error on line $. of $filename!\n..."
		    unless $1 == $m;	    
	    } 
	    else {
		die "Parsing error on line $. of $filename!\n...";
	    };
	};

	push @{$obj->{fims}}, $fim;
	$fim = "";

	push @{$obj->{trans}},   &readBlock( *SAM, $filename, 3, 3 );
	push @{$obj->{matches}}, &readBlock( *SAM, $filename, 5, 4 );
	push @{$obj->{inserts}}, &readBlock( *SAM, $filename, 5, 4 );

	last if $last;
    };
    close SAM;

    $obj->{M} = $m-1;
    return $obj;
};

sub writeSAM
{
    my ($obj, $filename) = @_;  
    my $m;                      # current position in the model

    open( SAM, ">$filename" ) or die "Cannot open $filename: $!\n...";
    print SAM "MODEL\nalphabet protein\n";

    for $m (0 .. $obj->{M}+1)
    {
	print SAM ${$obj->{fims}}[$m]
	    unless ${$obj->{fims}}[$m] eq "";

	if( $m == 0 ) { 
	    print SAM "Begin\n";
	} elsif ( $m == $obj->{M}+1 ) {
	    print SAM "End\n";
	} else {
	    print SAM pad(4,$m)."\n";
	};

	&writeBlock(*SAM, $obj->{trans}[$m],   3, 3);
	&writeBlock(*SAM, $obj->{matches}[$m], 5, 4);
	&writeBlock(*SAM, $obj->{inserts}[$m], 5, 4);
    };

    print SAM "\nENDMODEL\n";
    close SAM;
};


##
## *** HMMER ***
##

my $header1 =
    "ALPH  Amino\n".
    "DATE  Today\n".
    "COM   Converted from Sam model\n".
    "XT      -8455     -4  -1000  -1000  -8455     -4  -8455     -4 \n".
    "NULT      -4  -8455\n".
    "NULE     595  -1558     85    338   -294    453  -1158    197    249    902  -1085   -142    -21   -313     45    531    201    384  -1998   -644 \n";
my $header2 = 
    "HMM        A      C      D      E      F      G      H      I      K      L      M      N      P      Q      R      S      T      V      W      Y    \n".
    "         m->m   m->i   m->d   i->m   i->i   d->m   d->d   b->m   m->e\n";

my @NULE = ( 595, -1558, 85, 338, -294, 453, -1158, 197, 249, 902, 
	     -1085, -142, -21, -313, 45, 531, 201, 384, -1998, -644 );

sub score
{
    my $prob = shift;       # convert this probability to a score
    my $nullProb = shift;   # given this null probability
    
    return $prob != 0 ? int( 0.5+1000*log($prob/$nullProb)/log(2.0) ) : "*";
};

sub prob
{
    my $score = shift;      # convert this score to a probability
    my $nullProb = shift;   # given this null probability

    return $score=~ /\*/ ? 0 : $nullProb*exp(log(2)*$score/1000);
};

sub readLine
{
    local *HANDLE = shift;  # file handle to read from
    my $filename = shift;   # name of the file
    my @numbers = @_;       # possible numbers of data items expected
                            # on the line
    my @array;

    $_ = <HANDLE>;
    @array = split /\s+/;
    shift @array;

    die "Parsing error on line $. of $filename\n..."
	unless grep( scalar(@array) == $_, @numbers );

    return @array;
};

sub writeLine 
{
    local *HANDLE = shift;   # file handle to print to

    print HANDLE &pad(6, shift);
    while( @_ ) { print HANDLE &pad(7, shift); };
    print HANDLE "\n";
};

sub readHMMER
{
    # two ways of using this method:
    #
    # (a) $obj->readHMMER("filename");
    #
    # (b) $obj = PackageXYZ->readHMMER("filename");
    #
    my ($obj, $filename, $global) = @_;

    my $m;              # current position in the model
                        # transition probabilities: $xy = P(X->Y)
    my ($dd, $dm, $md, $mm, $mi, $im, $ii, $bm, $me, $bd );

    my ($i, $total, @a);


    if( ref($obj) )
    {
	# $obj is a reference => (a) above
	$obj->clear();
    }
    else
    {
	# $obj not a reference => (b) above
	# (assume $obj is a package name)
	$obj = new $obj;	
    };


    open( HMMER, "$filename" ) 
	or die "Cannot open $filename: $!\n...";

    ##
    ## header
    ##
    while(<HMMER>) 
    {
	if( /^LENG\s\s(\d+)/ )
	{
	    $obj->{M}=$1;

	    # "allocate" @trans
	    for $i (0 .. $obj->{M}+1)
	    {
		push @{$obj->{trans}},  [0,0,0, 0,0,0, 0,0,0];
	    };

	    # the "Begin" position values
	    push @{$obj->{matches}}, 
	    [ 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0 ];

	    push @{$obj->{inserts}}, 
	    [ 0.077850, 0.016640, 0.057325, 0.065451, 0.039844,
	      0.062177, 0.024314, 0.060479, 0.064418, 0.082109,
	      0.024547, 0.047302, 0.040276, 0.041960, 0.048771,
	      0.067948, 0.059895, 0.072042, 0.011740, 0.034912 ];
	}
	elsif( /^HMM\s/ )
	{
	    <HMMER>;
	    last;
	}
	elsif( /^EVD/ )
	{
	    $obj->{calibration} = $_;
	};
    };

    ##
    ## "3-line"
    ##
    @a = readLine( *HMMER, $filename, 3 );
    $bd = prob( $a[2], 1 );   # Similar FS and LS configs


    ##
    ## data
    ##
    for $m (1 .. $obj->{M})
    {
	##
	## match emissions
	##
	@a = readLine( *HMMER, $filename, 21, 22 );
	pop @a if scalar(@a)==22;   # what's this last number ??!
	shift @a;

	for $i (0 .. 19) { $a[$i] = prob( $a[$i], prob($NULE[$i],1/20) ); };
	push @{$obj->{matches}}, [@a];

	##
	## insert emissions
	##
	@a = readLine( *HMMER, $filename, 21 );
	shift @a;
	for $i (0 .. 19) { $a[$i] = prob( $a[$i], prob($NULE[$i],1/20) ); };
	push @{$obj->{inserts}}, [@a];

	##
	## state transitions
	##
	@a = readLine( *HMMER, $filename, 10 );
	shift @a;

	for $i (0 .. 8) { $a[$i] = prob( $a[$i], 1 ); };
	($mm, $mi, $md, $im, $ii, $dm, $dd, $bm, $me) = @a;

	if( $m == $obj->{M} )  # Very end
	{
	    $dd=0; $md=0; 
	    $dm=1; $mm=1; $im=1;
	           $mi=0; $ii=0;
	} 
	elsif ( $m == 1 )      # First node
	{
	  # Here Madera & Gough's script is failing, or at least not correct, I think..
	  # HMMer LS config allow b->m1 and b->d1 transitions which is similar to Sam arch.
	  # b->d1 is stored in "3-line" only, while b->m1 is stored in this line.
	  # HMMer FS config allows b->mX and b->d1 transitions. This is done by
	  # distributing half of the b->m1 probs. to the other b->mX transitions
	  # 
	  # Sam actually corresponds better to the LS version as this is the one closest to 
	  # the underlying alignment. 
	  
	  # What is happening to the mX->e transition!? Madera & Gough just leave them here although they have
	  # to deal with them in Sam -> HMMer conversion
	  

	  if ($global==0) {# Here we're giving back the probs HMMer gives to b->mX to get LS config style.
	    $bm = 2*$bm;}  # This is not done by Madera & Gough.

	  $total = $bm + $bd;
	  $bm /= $total;
	  $bd /= $total;
	  
	  $obj->{trans}[1][$MM] = $bm; 
	  $obj->{trans}[1][$MD] = $bd;
	  $obj->{trans}[1][$IM] = 1;  # No HMMer counterpart
	};

	# renormalisation
	# We are including (giving back) the m->e probs (Madera & Gough aren't)
	# However, special case at the very last node due to HMMer having P(m->e)=1 here
	if ($global != 0 && $m != $obj->{M}) {	
	  $mm += $me;}
	$total = $mi + $md + $mm;

	if( $total ) {
	    $mi /= $total;
	    $md /= $total;
	    $mm /= $total;
	};
	$total = $dd + $dm;
	if( $total ) {
	    $dd /= $total;
	    $dm /= $total;
	};
	$total = $ii + $im;
	if( $total ) {
	    $ii /= $total;
	    $im /= $total;
	};

	# save the probabilities
	$obj->{trans}[$m+1][$MM] = $mm;
	$obj->{trans}[$m  ][$MI] = $mi;
	$obj->{trans}[$m+1][$MD] = $md;
	$obj->{trans}[$m+1][$IM] = $im;
	$obj->{trans}[$m  ][$II] = $ii;
	$obj->{trans}[$m+1][$DM] = $dm;
	$obj->{trans}[$m+1][$DD] = $dd;
    };
    close HMMER;

    ##
    ## the "End" + last position values
    ##
    push @{$obj->{matches}}, 
    [ 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0 ];

    pop  @{$obj->{inserts}};

    push @{$obj->{inserts}}, 
    [ 0.077850, 0.016640, 0.057325, 0.065451, 0.039844,
      0.062177, 0.024314, 0.060479, 0.064418, 0.082109,
      0.024547, 0.047302, 0.040276, 0.041960, 0.048771,
      0.067948, 0.059895, 0.072042, 0.011740, 0.034912 ];

    push @{$obj->{inserts}}, 
    [ 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0 ];

    return $obj;
};

sub writeHMMER
{
    my ($obj, $filename, $global) = @_;
    
    # transition probabilities: $xy = P(X->Y)
    my ($dd, $dm, $md, $mm, $mi, $im, $ii, $id, $bm, $me, $bd);

    my ($denom, $total, $i, $j, @a);

    open( HMMER, ">$filename" ) or die "Cannot open $filename: $!\n...";

    ##
    ## header
    ##
    print HMMER "HMMER2.0\n";
    print HMMER "NAME  $filename\n";
    print HMMER "LENG  $obj->{M}\n";
    print HMMER $header1;
    print HMMER $obj->{calibration};
    print HMMER $header2;


    ##
    ## B->D1 line
    ##

    # Convert to Fs or Ls HMMer mode?
    # 
    # The Sam mode is LS-ish (as I see it)
    $bd    = $obj->{trans}[1][$MD];
    $denom = $bd + $obj->{trans}[1][$MM];
    writeLine( *HMMER, "", score(1-$bd/$denom,1), "*", score($bd/$denom,1) );

    for $i (1 ... $obj->{M})
    {
	##
	## match emissions
	##
	@a=();       
	for $j (0 .. 19) {
	    push @a, score( $obj->{matches}[$i][$j], prob($NULE[$j],1/20) );
	};
	writeLine( *HMMER, $i, @a, $i );

	##
	## insert emissions
	##
	@a=();
	for $j (0 .. 19) {
	    push @a,  $i==$obj->{M} ? "*" : 
		score( $obj->{inserts}[$i][$j], prob($NULE[$j],1/20) );
	};
	writeLine( *HMMER, "-", @a );

	##
	## state transitions
	##
	if( $i == $obj->{M} )
	{
	    $mm=$mi=$md=$im=$ii=$dm=$dd=0;
	    $me=1;
	}
	else 
	{
	    $mm = $obj->{trans}[$i+1][$MM];
	    $mi = $obj->{trans}[ $i ][$MI];
	    $md = $obj->{trans}[$i+1][$MD];
	    $im = $obj->{trans}[$i+1][$IM];
	    $ii = $obj->{trans}[ $i ][$II];
	    $dm = $obj->{trans}[$i+1][$DM];
	    $dd = $obj->{trans}[$i+1][$DD];
	    $me = ($global==1) ? 0 : 1/(2*($obj->{M}-1)-$i);   # Exit state 

	    # renormalization
	    $total = $dd + $dm;
	    $dd   /= $total;
	    $dm   /= $total;
	    $total = $ii + $im;
	    $id   /= $total;
	    $im   /= $total;
	    $total = $me + $mi + $md + $mm;
	    $me =  ($global == 1) ? "*" : $me / $total;
	    $mi   /= $total;
	    $md   /= $total;
	    $mm   /= $total;
	};
	
	# this seems to be the b->m transition into this match state...
	  if ($global == 1) {
	    $bm = ($i==1) ? 1-$bd/$denom 
	      : "*";
	  }
	
	else {  # Converting to HMMer FS-config. As Sam is LS-ish it is having a b->m1 prob. 2 times
	        # larger than we want to see in FS-config. We compensate by division 1/2.
	  $bm = ($i==1)  ?  ($obj->{trans}[1][$MM])/2  
	    :  ($obj->{trans}[1][$MM]/($obj->{M}-1))/2;
	  $bm = $bm / $denom;
	}
	
	writeLine( *HMMER, "-", 
		   score($mm, 1), score($mi, 1), score($md, 1),
		   score($im, 1), score($ii, 1), score($dm, 1),
		   score($dd, 1), score($bm, 1), score($me, 1) );
    };

    print HMMER "//\n";
    close HMMER;

    warn "NB HMMER models need to be calibrated for best performance!\n";
};


##
## *** PSI-BLAST *** 
##

# the order in which PSI-BLAST saves its 20 emission probabilities
# apears to be ARNDCQEGHILKMFPSTWYV
my %order = (    0 => 0,   4 => 1,   3 => 2,   6 => 3,  13 => 4, 
		 7 => 5,   8 => 6,   9 => 7,  11 => 8,  10 => 9,
		12 => 10,  2 => 11, 14 => 12,  5 => 13,  1 => 14,
		15 => 15, 16 => 16, 19 => 17, 17 => 18, 18 => 19  );

sub readPSIBLAST
{
    # two ways of using this method:
    #
    # (a) $obj->readPSIBLAST("filename");
    #
    # (b) $obj = PackageXYZ->readPSIBLAST("filename");
    #
    my ($obj, $filename) = @_;

    if( ref($obj) )
    {
	# $obj is a reference => (a) above
	$obj->clear();
    }
    else
    {
	# $obj not a reference => (b) above
	# (assume $obj is a package name)
	$obj = new $obj;	
    };

    die "File $filename doesn't exist!\n... " unless -e $filename;
    die "Can't read $filename!\n... " unless -r $filename;

    my ($binM, $M, $seed, $m, $i, $binP, @inserts, $sum);

    open PSI, $filename	or die "Error reading parsed $filename: $!\n... ";
    
    binmode PSI;
 
    # number of nodes, int
    read PSI, $binM, 4;
    $M = unpack "I", $binM;
    $obj->{M} = $M;

    # the seed sequence
    read PSI, $seed, $M;
    $obj->{seed} = $seed;

    # now the probabilities
    for $m (1 .. $obj->{M})
    {
	for $i (0 .. 19)
	{
	    read PSI, $binP, 8;
	    $obj->{matches}[$m][$order{$i}] = unpack "d", $binP;
	};
    };
    close PSI;

    # calculate inserts: arithmetic average of match state emmissions
    for $i (0..19)
    {
	for $m (1..$M)
	{
	    $inserts[$i] += $obj->{matches}[$m][$i];
	};
	$inserts[$i] /= $M;
	$sum += $inserts[$i];
    };
    for $i (0..19)
    {
	$inserts[$i] /= $sum;
    };
    
    # now sort out the rest of the model
    for $m (1..$M)
    {
	$obj->{ trans }[$m] = [ # DD     MD      ID     DM     MM     IM 
				0.850, 0.020,  0.000 , 0.150, 0.965, 0.350,
				# DI     MI      II
				0.000, 0.015,  0.650 ];
	$obj->{inserts}[$m] = [@inserts];
    };

    return $obj;
};

sub writePSIBLAST
{
    my ($obj, $filename) = @_; 
    my ($seed, $m, $i, $max, $aa);

    
    # sort out what to do about the seed sequence
    if( defined $obj->{seed} )
    {
	# splendid, we have the seed
	# (get this e.g. by running read_seedseq above)
	#
	# but need to check it's OK !
	#
	die "Seed sequence and model length do not match!\n... " 
	    unless length($seed) == $obj->{M};
    }
    else
    {
	# uh-oh, no seed sequence
	# better create one
	warn "No seed sequence given for $filename!\n";
	warn "Using most likely sequence, dumping it into $filename.fa ...\n";
	
	$obj->{seed} = $obj->most_likely_seq;

	open F, ">$filename.fa" 
	    or die "Cannot open $filename.fa for writing: $!\n... ";
	print F ">$filename seed sequence\n";
	print F "$obj->{seed}\n";
	close F;
    };

    # write the PSI-BLAST profile
    open PSI, ">$filename" or die "Cannot open $filename: $!\n...";

    binmode PSI;

    # header
    syswrite PSI, pack "I", $obj->{M}, 4;
    syswrite PSI, $obj->{seed}, $obj->{M};

    # print the match emission probabilities
    for $m (1 .. $obj->{M})
    {
	for $i (0 .. 19)
	{
	    syswrite PSI, pack "d", $obj->{matches}[$m][$order{$i}], 8;
	};
    };
    close PSI;
};

# rudimentary hack to read a PSI-BLAST seed sequence from a FASTA file
sub read_seedseq
{
    my ($obj, $filename) = @_;
    my ($seed);
    
    open SEED, "$filename" or die "Cannot open $filename: $!\n... ";
    while(<SEED>)
    {
	if( /^>/ )
	{
	    defined($seed) ? last : next;
	};

	next unless /^(\S+)/;
	$seed .= $1;
    };
    close SEED;

    $obj->{seed} = $seed;
};


# generate the most likely sequence from a model
# WARNING: ignores transition probabilities, only looks at match emmissions

my @num2let = (
	       'A', 'C', 'D', 'E', 'F', 
	       'G', 'H', 'I', 'K', 'L', 
	       'M', 'N', 'P', 'Q', 'R', 
	       'S', 'T', 'V', 'W', 'Y'
	      );

sub most_likely_seq
{
    my ($obj) = @_;
    my ($m, $i, $best_i, $max, $seq);

    for $m (1 .. $obj->{M})
    {
        $max = 0;
        for $i (0 .. 19)
        {
            next unless $obj->{matches}[$m][$i] > $max;
            $max = $obj->{matches}[$m][$i];
            $best_i = $i;
        };

        $seq .= $num2let[$best_i];
    };
    
    return $seq;
};


##
## *** CLEVER READ & WRITE ***
##
sub clever_read
{
    # two ways of using this method:
    #
    # (a) $obj->clever_read("filename");
    #
    # (b) $obj = PackageXYZ->clever_read("filename");
    #
    my ($obj, $filename, $global) = @_;

    if( ref($obj) )
    {
	# $obj is a reference => (a) above
	$obj->clear();
    }
    else
    {
	# $obj not a reference => (b) above
	# (assume $obj is a package name)
	$obj = new $obj;	
    };

    if( $filename =~ /\.mod$/ )
    {
	die "File $filename doesn't exist!\n... " unless -e $filename;
	die "Can't read $filename!\n... " unless -r $filename;
	
	if( $filename !~ /\.asc\.mod/ and
	    `grep '^BINARY' $filename`      )
	{
	    # hmm, looks like a binary file => get the ASCII version

	    my $f = $filename;

	    $f =~ s/\.mod$//;
	    print `hmmconvert $f.asc -model_file $f.mod -binary_output 0`;
	    wait;
	    
	    $filename = "$f.asc.mod";
	};
	    
	$obj->readSAM($filename);
    }
    elsif( $filename =~ /\.hmm$/ )
    {
	$obj->readHMMER($filename, $global);
    }
    elsif( $filename =~ /\.pbla$/ or 
	   $filename =~ /\.profile$/ or
	   $filename =~ /\.psiblast$/ or
	   $filename =~ /\.psi$/         )
    {
	$obj->readPSIBLAST($filename);
    }
    else
    {
	print STDERR "Uknown file extension for file $filename!\n\n";
	print STDERR "Can only read the following:\n\n";
	print STDERR "\t.mod or .asc.mod (SAM 3.0 ASCII)\n";
	print STDERR "\t.hmm (HMMER 2.x)\n";
	print STDERR "\t.profile, .pbla or .psiblast (PSI-BLAST)\n\n";
	die "... ";
    };

    return $obj;
};

sub clever_write
{
    my ($obj, $filename, $global) = @_;

    die "File $filename exists, remove it first!\n... "
	if -e $filename;

    if( $filename =~ /\.mod$/ )
    {
	$obj->writeSAM($filename);
    }
    elsif( $filename =~ /\.hmm$/ )
    {
	$obj->writeHMMER($filename, $global);
    }
    elsif( $filename =~ /\.pbla$/ or 
	   $filename =~ /\.profile$/ or
	   $filename =~ /\.psiblast$/ or
	   $filename =~ /\.psi$/         )
    {
	$obj->writePSIBLAST($filename);
    }
    else
    {
	print STDERR "Uknown file extension for file $filename!\n\n";
	print STDERR "Can only read the following:\n\n";
	print STDERR "\t.mod or .asc.mod (SAM ASCII)\n";
	print STDERR "\t.hmm (HMMER 2.x)\n";
	print STDERR "\t.profile, .pbla or .psiblast (PSI-BLAST)\n\n";
	die "... ";
    };

    return $obj;
};


return 1;