# prom1.pl (GetMapURLs) # # Daniel Westreich, October 2002 # Mullins Lab, Microbiology Dept. # University of Washington # # Usage: prom1.pl filename # Alt usage: prom1.pl filename > out.htm # use IO::Socket; #http://www.ncbi.nlm.nih.gov/mapview/map_search.cgi?chr=hum_chr.inf&query= #&qchr=&advsrch=off $prefix = "http://"; $host = "www.ncbi.nlm.nih.gov"; $cgiinfo = "/mapview/"; $frontdoc = "map_search.cgi?chr=hum_chr.inf&query="; $enddoc = "&qchr=&advsrch=off"; $error = false; open(OUT, ">mapurls.out"); while(<>) { if (/^\#/) { print "Skipping line...\n"; next; } chomp($_); $gene = $_; print "Beginning our search for gene ".$gene."\n"; # 2.pl $mapurl = GetMapURLFromGeneName($gene); print OUT $mapurl."\n\n"; if ($mapurl =~ /Error/) { $error = true; } } if ($error eq true) { print "** ERROR. Check log file mapurls.out for details.\n\n"; print "When errors are corrected, invoke next step with 'prom2.pl mapurls.out'\n"; exit(); } else { print "\n\nNo errors. Continue with step 2 (command line 'prom2.pl mapurls.out')? [Y/n]"; if ( =~/^[nN]/) { print "Invoke the next step of this process at any time with 'prom2.pl mapurls.out'"; } else { system("prom2.pl mapurls.out"); } } ###################### sub GetMapURLFromGeneName { $gene = $_[0]; $docurl = $cgiinfo.$frontdoc.$gene.$enddoc; $cSeqs = 0; $remote = IO::Socket::INET->new( Proto => "tcp", PeerAddr => $host, PeerPort => "http(80)", ); unless($remote) { die "cannot connect to http daemon on $host" } $remote->autoflush(1); print $remote "GET $docurl HTTP/1.0\n\n"; while (<$remote>) { if ($_ =~ /Genes_seq/) { $cSeqs++; @words = split(/\s+/); chomp(@words); $length = @words; for ($i = 0; $i < $length; $i++) { if (@words[$i] =~ /Genes_seq/) # now at "href=...Genes_seq..." { @words = split(/>/,@words[$i]); # now at "href=maps...fill=40" @words = split(/href=/,@words[0]); #now at "maps...fill=40" $Genes_seq_url = $prefix.$host.$cgiinfo.@words[1]; } } } if ($cSeqs > 1) { last; } } -close $remote; if ($cSeqs > 1) { return "# $gene: Error - user must disambiguate Genes_seq tags in ".$prefix.$host.$docurl; } if ($cSeqs == 0) { return "# $gene: Error - did not find a Genes_seq tag in ".$prefix.$host.$docurl; } print OUT "#Successfully got map URL for $gene\n"; return $gene."|".$Genes_seq_url; }