# prom3.pl (GetContigInfo) # # Daniel Westreich, July 2002 # Mullins Lab, Microbiology Dept. # University of Washington # # Usage: prom3.pl filename # use IO::Socket; $host = "www.ncbi.nlm.nih.gov"; $error = false; open(OUT, ">contiginfo.out"); while(<>) { if (/^\#/) #comments { next; } chomp($_); @words= split(/\|/); if (@words == 0) { next; } $gene = @words[0]; $contigurl = @words[1]; print "For gene ".$gene."\n"; $temp = GetContigInfo($gene, $contigurl); if ($temp =~ /Error/) { $error = true; print OUT $temp."\n"; } else { print OUT $gene."|".$temp."\n"; } } if ($error eq true) { print "** ERROR. Check log file contiginfo.out for details.\n\n"; print "When errors are corrected, invoke next step with 'prom4.pl contiginfo.out'\n"; exit(); } else { print "\n\nNo errors. Continue with step 4 (command line 'prom4.pl contiginfo.out')? [Y/n]"; if ( =~/^[nN]/) { print "You can invoke the next step of this process at any time with 'prom4.pl contiginfo.out'"; } else { system("prom4.pl contiginfo.out"); } } # ABANDONED GETTING CONTIGID below; can always get it later from the url itself sub GetContigInfo { $gene = $_[0]; $url = $_[1]; $remote = IO::Socket::INET->new( Proto => "tcp", PeerAddr => $host, PeerPort => "http(80)", ); unless($remote) { die "cannot connect to http daemon on $host" } $remote->autoflush(1); print $remote "GET $url HTTP/1.0\n\n"; $contigid = "#$gene: Error - failed to get contigid from $url"; while (<$remote>) { if ($_ =~ /checkbox.+[1-9].+\[gi\:(.+)\]/) { $gi = $1; last; } } while (<$remote>) { if ($_ =~ /plus strand/) { $strand = "Crick"; last; } if ($_ =~ /minus strand/) { $strand = "Watson"; last; } } -close $remote; return $gi."|".$strand; }