#!/usr/bin/perl use warnings; use strict; ############################################# # downloads the lists of journal names and their # abbreviations from the Web of Science database # and generates a master list containing all # of the journals listed. # # by Alex Chubaty (alex.chubaty@gmail.com) # [12 Feb 2012] ############################################# package jParser; use base 'HTML::Parser'; my @contents = ""; my $flag = 0; sub start { my ($self, $tag, $attr, $attrseq, $origtext) = @_; if ($tag eq "dl") { $flag = 1; } } sub text { my ($self, $text) = @_; if ($flag) { $text =~ s/&/&/g; $text =~ s/&/\\&/g; push(@contents, $text); } } sub end { my ($self, $tag, $origtext) = @_; if ($tag eq "dl") { $flag = 0; } } package main; use LWP::Simple; my $letter; my $url; my @jabbrev; my @jfull; my @letters = ("A".."Z"); my @urls = ("http://images.webofknowledge.com/WOK46/help/WOS/0-9_abrvjt.html"); foreach $letter (@letters) { $url = "http://images.webofknowledge.com/WOK46/help/WOS/" . $letter . "_abrvjt.html"; push(@urls, $url); } foreach $url (@urls) { my $html = get($url); die "$0: get failed" unless defined $html; my $parser = new jParser; $parser->parse($html); } open(OUTFILE, ">output.txt") || die "$!"; print OUTFILE @contents, "\n"; close(OUTFILE); open(INFILE, 'output.txt'); undef $/; my $textfile = ; close(INFILE); $textfile =~ s/\n\n/\n/g; $textfile =~ s/\n\t/\t/g; $textfile =~ s/\n\'92/\'92/; open(OUTFILE, ">masterlist.txt"); print OUTFILE $textfile; # add custom journal entries print OUTFILE "THE AMERICAN NATURALIST\tAM NAT\n"; print OUTFILE "THE CANADIAN ENTOMOLOGIST\tCAN ENTOMOL\n"; print OUTFILE "TRENDS IN ECOLOGY AND EVOLUTION\tTRENDS ECOL EVOL\n"; close(OUTFILE); unlink("output.txt"); exit