# 2004.10.18
# jdwang@thmu.edu.tw 
#

use strict;

#print "Please type the filename of the DNA sequence data: ";
#my $InputFile = <STDIN>;
my $InputFile = "PartialSrc.txt";
my ($OutputFileName, $Ext) = $InputFile =~ /(.+)\.(.+)/;

open (FILEHANDLE, $InputFile) or die " can't not open $InputFile";
#
# store the start&End lines for each sequence.
#
my $LineCnt = 0;
my $SeqCnt = 0;
my @SeqStartLine = ();
#
# skip Title line  "表單編號          病歷號     病患姓名        生日     "
#
my $line = <FILEHANDLE>;
# skip space line
$line = <FILEHANDLE>;
while ( $line = <FILEHANDLE>)
{
		$LineCnt++;
		if ($line =~ /----------------------/) {
	  		push @SeqStartLine,$LineCnt;
	  		$SeqCnt++;
	  		#print "Sequence($SeqCnt) Begin at Line ($LineCnt)\n";
		} 
} # end of while 
push @SeqStartLine, $LineCnt+1; # mark the last line
close (FILEHANDLE);

#exit;

my $outputfile = $OutputFileName."_ParsedData";
#
if ( ( -e $outputfile) ) { # 判斷是否已經存在
				unless ( open(COUNTBASE, ">>$outputfile") ) {
    				print "Cannot open file \"$outputfile\" to write to!!\n\n";
    				exit;
				}
} else {
				unless ( open(COUNTBASE, ">>$outputfile") ) {
    				print "Cannot open file \"$outputfile\" to write to!!\n\n";
    				exit;
				}	
				print COUNTBASE "Num#P1#P2#P_Name#P_Sex#P_Birthday\n";
}
#
# Show the lines which contain the start of sequences in Fasta format
#
open (FILEHANDLE, $InputFile) or die "can't not open";
#
# skip Title line  "表單編號          病歷號     病患姓名        生日     "
#
$line = <FILEHANDLE>;
# skip space line
$line = <FILEHANDLE>;
#
# for each sequence, generate the suffix string 
# and put them into hash
#	
my $SeqID=0;
#
# Retrieve sequences one by one and 
# store its corresponding suffix strings
#
my $SeqStartline = shift @SeqStartLine;
my $SeqEndLine;
my $Cnt = 0;
for (my $index =0; $index < $SeqCnt;++$index)
{
    		$SeqID++;		
			# the beginning line
			my $line = <FILEHANDLE>;
			chomp($line);
			#
    		# extract the information of the sequence from the headline
    		#
    		#my @FileItem = split /#/, $line; 
    		#print $SeqID.":".$FileItem[0]."\n";
    		#
    		# Identify the Ending lines of one sequence
    		#
    		$SeqEndLine = shift @SeqStartLine;
    		#
    		# generate the sequence 
    		#
    		for (my $LineCnt=$SeqStartline;$LineCnt<$SeqEndLine-1;++$LineCnt) {
       		   $line = <FILEHANDLE>;  
    		   #print $line."\n\n";
    		   #
    		   #
    		   #
       		   if ($line =~ /CM-T250/) { 
	       		   $Cnt++; # 新增一筆
	       		   #
	       		   # 表示是 "CM-T250    160510 0015851333 鍾發章       男"  
    		   	   #	
	       		   my ($P1,$P2,$P_Name,$P_Sex,$P_Birthday,$RemainInfo) = $line =~ /CM-T250\s+(\d+)\s(\d+)\s+(\D+)\s+(\D+)\s+(\d+)\s+(.+)/;
       		   
       		   		print $P1."\n";
       		   		print $P2."\n";
       		  		print $P_Name."\n";
       		   		print $P_Sex."\n";
       		   		print $P_Birthday."\n";
       		   		print "\n\n".$RemainInfo."\n";
       		   
       		   		print COUNTBASE $Cnt."#".$P1."#".$P2."#".$P_Name."#".$P_Sex."#".$P_Birthday."\n";
       		   } else { 
	       		    	#
	       		    	# Klebsiella pneumoniae
        		    		#S :Gentamicin
        				#R :Ampicillin
        				#S :Cephalothin(Cefazolin,Cephradine,Cephalexin)
        				#S :Amoxicillin/Clavnlanic
        				#S :Amikacin
	       		    	#
       			}
       		   #exit;
    		} # end of for (my $LineCnt=$SeqStartline;$LineCnt<$SeqEndLine-1;++$LineCnt)
			############################################################################
    		#
    		# For extracting next sequence
    		#
    		$SeqStartline = $SeqEndLine;
} # end of for (my $index =0; $index < $SeqCnt;++$index)
close(COUNTBASE);