# prom2.pl (GetContigURLs) # # Daniel Westreich, July 2002 # Mullins Lab, Microbiology Dept. # University of Washington # # Usage: prom2.pl filename # use IO::Socket; $host = "www.ncbi.nlm.nih.gov"; $error = false; open(OUT, ">contigurls.out"); while(<>) { if (/^\#/) #comments { next; } chomp($_); @words= split(/\|/); if (@words == 0) { next; } $gene = @words[0]; $mapurl = @words[1]; print "For gene ".$gene."\n"; $contigurl = GetContigURLFromMap($gene, $mapurl); if ($contigurl =~ /Error/) { $error = true; print OUT $contigurl."\n"; } else { #UGLY HACK $contigurl =~ s/cgi/fcgi/; print OUT $gene."|".$contigurl."\n"; } } if ($error eq true) { print "** ERROR. Check log file contigurls.out for details.\n\n"; print "When errors are corrected, invoke next step with 'prom3.pl contigurls.out'\n"; exit(); } else { print "\n\nNo errors. Continue with step 3 (command line 'prom3.pl contigurls.out')? [Y/n]"; if ( =~/^[nN]/) { print "You can invoke the next step of this process at any time with 'prom3.pl contigurls.out'"; } else { system("prom3.pl contigurls.out"); } } ###################### # looking for url associated with "sv" after $gene in $url sub GetContigURLFromMap { $gene = $_[0]; $url = $_[1]; $remote = IO::Socket::INET->new( Proto => "tcp", PeerAddr => $host, PeerPort => "http(80)", ); unless($remote) { die "cannot connect to http daemon on $host" } $remote->autoflush(1); print $remote "GET $url HTTP/1.0\n\n"; $contigurl = "#Error: no 'sv' tag found after $gene in $url"; while (<$remote>) { if ($_ =~ />$gene<.+>svsv