# 2004.10.18 # jdwang@thmu.edu.tw # use strict; #print "Please type the filename of the DNA sequence data: "; #my $InputFile = <STDIN>; my $InputFile = "PartialSrc.txt"; my ($OutputFileName, $Ext) = $InputFile =~ /(.+)\.(.+)/; open (FILEHANDLE, $InputFile) or die " can't not open $InputFile"; # # store the start&End lines for each sequence. # my $LineCnt = 0; my $SeqCnt = 0; my @SeqStartLine = (); # # skip Title line "表單編號 病歷號 病患姓名 生日 " # my $line = <FILEHANDLE>; # skip space line $line = <FILEHANDLE>; while ( $line = <FILEHANDLE>) { $LineCnt++; if ($line =~ /----------------------/) { push @SeqStartLine,$LineCnt; $SeqCnt++; #print "Sequence($SeqCnt) Begin at Line ($LineCnt)\n"; } } # end of while push @SeqStartLine, $LineCnt+1; # mark the last line close (FILEHANDLE); #exit; my $outputfile = $OutputFileName."_ParsedData"; # if ( ( -e $outputfile) ) { # 判斷是否已經存在 unless ( open(COUNTBASE, ">>$outputfile") ) { print "Cannot open file \"$outputfile\" to write to!!\n\n"; exit; } } else { unless ( open(COUNTBASE, ">>$outputfile") ) { print "Cannot open file \"$outputfile\" to write to!!\n\n"; exit; } print COUNTBASE "Num#P1#P2#P_Name#P_Sex#P_Birthday\n"; } # # Show the lines which contain the start of sequences in Fasta format # open (FILEHANDLE, $InputFile) or die "can't not open"; # # skip Title line "表單編號 病歷號 病患姓名 生日 " # $line = <FILEHANDLE>; # skip space line $line = <FILEHANDLE>; # # for each sequence, generate the suffix string # and put them into hash # my $SeqID=0; # # Retrieve sequences one by one and # store its corresponding suffix strings # my $SeqStartline = shift @SeqStartLine; my $SeqEndLine; my $Cnt = 0; for (my $index =0; $index < $SeqCnt;++$index) { $SeqID++; # the beginning line my $line = <FILEHANDLE>; chomp($line); # # extract the information of the sequence from the headline # #my @FileItem = split /#/, $line; #print $SeqID.":".$FileItem[0]."\n"; # # Identify the Ending lines of one sequence # $SeqEndLine = shift @SeqStartLine; # # generate the sequence # for (my $LineCnt=$SeqStartline;$LineCnt<$SeqEndLine-1;++$LineCnt) { $line = <FILEHANDLE>; #print $line."\n\n"; # # # if ($line =~ /CM-T250/) { $Cnt++; # 新增一筆 # # 表示是 "CM-T250 160510 0015851333 鍾發章 男" # my ($P1,$P2,$P_Name,$P_Sex,$P_Birthday,$RemainInfo) = $line =~ /CM-T250\s+(\d+)\s(\d+)\s+(\D+)\s+(\D+)\s+(\d+)\s+(.+)/; print $P1."\n"; print $P2."\n"; print $P_Name."\n"; print $P_Sex."\n"; print $P_Birthday."\n"; print "\n\n".$RemainInfo."\n"; print COUNTBASE $Cnt."#".$P1."#".$P2."#".$P_Name."#".$P_Sex."#".$P_Birthday."\n"; } else { # # Klebsiella pneumoniae #S :Gentamicin #R :Ampicillin #S :Cephalothin(Cefazolin,Cephradine,Cephalexin) #S :Amoxicillin/Clavnlanic #S :Amikacin # } #exit; } # end of for (my $LineCnt=$SeqStartline;$LineCnt<$SeqEndLine-1;++$LineCnt) ############################################################################ # # For extracting next sequence # $SeqStartline = $SeqEndLine; } # end of for (my $index =0; $index < $SeqCnt;++$index) close(COUNTBASE);