2007年11月15日木曜日

住所抽出

CLさんのとこから^^
http://blog.cgfm.jp/cota/archives/54
http://blog.cgfm.jp/cota/archives/51

movimap_scrape.pl
#!/usr/local/bin/perl
use strict;
use warnings;
use Web::Scraper;
use Data::Dumper;
use Template;
use URI;
use Geography::AddressExtract::Japan;

use encoding "utf8", STDOUT => "utf8";

my $uri = URI->new("http://www.movimap.net/kodawariblog/family/");
my $scraper = scraper {
process '//div[@class="box"]/div[@class="box01"]/h3/a', 'link[]' => '@href', 'title[]' => 'TEXT';
process '//div[@class="box"]/div[@class="box02"]/p', 'address[]' => 'TEXT';
};
my $result = $scraper->scrape($uri);

my @list;
for my $i ( 0 .. scalar @{$result->{"address"}} - 1 ){
my $addr = Geography::AddressExtract::Japan->extract($result->{"address"}->[$i]);
push @list, {
address => $addr->[0]->{"city"} . $addr->[0]->{"aza"} . $addr->[0]->{"number"},
link => $result->{"link"}->[$i],
title => $result->{"title"}->[$i],
};
}
my $tt = Template->new({
INCLUDE_PATH => ".", EVAL_PERL => 1
});

$tt->process("map.tt", { result => @list });

Geography::AddressExtract::Japan最高っす!

0 件のコメント: