# prom5.pl (GetPromoters) # # Daniel Westreich, July 2002 # Mullins Lab, Microbiology Dept. # University of Washington # # Usage: prom5.pl filename # use IO::Socket; $RANGE_BACK = 4300; $RANGE_FRONT = 199; $host = "www.ncbi.nlm.nih.gov"; $promfirst="http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?cmd=&txt=on&save=&cfm=&_from="; $prommid1="&_to="; $prommid2="&_strand="; $prommid3="&list_uids="; $promend="&db=&view=fasta&__from=&__to=&__strand="; open(LIST, ">promoterlist.out"); while(<>) { if (/^\#/) #comments { next; } chomp($_); @words= split(/\|/); if (@words == 0) { next; } $gene = @words[0]; $gi = @words[1]; $strand = @words[2]; $address = @words[3]; print "For gene ".$gene."\n"; GetPromoterSequences($gi,$strand,$address,$gene); print LIST $gene."_promoter.out\n"; } ###################### sub GetPromoterSequences { $gi = $_[0]; $strand = $_[1]; $address = $_[2]; $gene = $_[3]; $outfile = ">".$gene."_promoter.out"; open(OUT, $outfile); if ($strand eq "Crick") { $strand = 2; $begaddress = (int($address) - $RANGE_FRONT); $endaddress = (int($address) + $RANGE_BACK); } else { $strand = 0; $begaddress = (int($address) - $RANGE_BACK); $endaddress = (int($address) + $RANGE_FRONT); } $docurl = $promfirst.$begaddress.$prommid1.$endaddress.$prommid2.$strand.$prommid3.$gi.$promend; # print ($docurl); $cSeqs = 0; $remote = IO::Socket::INET->new( Proto => "tcp", PeerAddr => $host, PeerPort => "http(80)", ); unless($remote) { die "cannot connect to http daemon on $host" } $remote->autoflush(1); print $remote "GET $docurl HTTP/1.0\n\n"; # DEBUG # print OUT $docurl."\n\n"; while (<$remote>) { if (/\>gi\|/) { print OUT; last; } } while (<$remote>) { print OUT; } -close $remote; }