#!/usr/bin/env ruby
# webcat.rb(books)
# ISBN-search -> detail-parse
# original; isbn2bib.rb for bk1 by takahashi
# ver.0.1 2003.1.27-2003.1.27
# ver.0.2 2004.01.06-2004.01.06 新キャッシュに対応
if __FILE__ == $0
require 'cookie'
require 'webagent'
require 'cache'
require 'cachebib'
end
class CacheBib_webcatplus < CacheBib
attr_accessor :access_id
attr_accessor :title2, :title3, :author2
attr_accessor :pubtype, :type, :note, :note2, :code, :case
def initialize ()
super
@shop_id = 'wcp'
@url_site = 'http://webcatplus.nii.ac.jp/'
end
#===== ===== parts ===== =====
def word_delivery ()
return '', false
end
#===== ===== url ===== =====
def url_isbnsearch ( isbn = @isbn )
url = @url_site + "assoc.cgi?mode=equal0&isbn=#{isbn}"
return url
end
def url_bibinfo ( id = @id )
url = @url_site + "tosho.cgi?mode=tosho{@access_id}=#{id}"
return url
end
#===== ===== parse ===== =====
def parse_isbnsearch ( str )
count = []
str.scan(%r|]+?>(.*?)|){
count << [$1, $2, $3] unless $1.nil?
}
if count.uniq.size > 1
message("WAR; ISBN-search hit multi-column. ISBN; #{@isbn}")
end
@access_id, @id, @title = count.first
return count
end
def parse_bibinfo ( str )
#----- set key and data ------
mode_tbl = {
# :data1 => '
',
:desc => '

',
:index => '

',
# :data2 => '

',
:title => '
タイトル',
:author => '
責任表示',
:type => '
資料種別',
:pubtype => '
版表示',
:publisher => '
出版事項',
:pubsize => '
形態事項',
:isbn => '
ISBN', #dummy
:note2 => '
内容著作注記',
:note => '
注記',
:title2 => '
別タイトル',
:title3 => '
統一タイトル',
:author2 => '
著者標目',
:code => '
分類',
:case => '
件名'
}
mode = :none
data = Hash.new()
#----- read and sepalate by key(mode_tbl) ------
str.each{|line|
l = line.chomp.chomp("\r").kconv(@insystemKcode)
mode_tbl.each{ |key,keydata|
mode = key if /#{keydata}/s =~ l
}
if mode != :none
if data[mode]
data[mode] << l
else
data[mode] = l
end
end
}
#----- parse -----
keys = [:desc, :index]
stocks = [nil, nil]
keys.each_index{ |i|
key = keys[i]
if !data[key].nil?
if data[key] =~ /#{mode_tbl[key]}.+?
(.+?)<\/td>/smi
stock = $1.gsub(/\s+/, ' ').gsub(/ /i, "\t").gsub(/<[^>]+>/,'').strip
stocks[i] = stock if stock != ''
end
end
}
@desc, @index = stocks
keys = [:title, :author, :type, :pubtype, :publisher, :pubsize, :note2, :note, :title2, :title3, :author2, :code, :case]
stocks = [nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil]
keys.each_index{ |i|
key = keys[i]
if !data[key].nil?
if data[key] =~ /#{mode_tbl[key]}.+? | (.+?)<\/td>/smi
stock = $1.gsub(/\s+/, ' ').gsub(/ /i, "\t").gsub(/<[^>]+>/,'').strip
stocks[i] = stock if stock != ''
end
end
}
@title, @author, @type, @pubtype, @publisher, @pubsize, @note2, @note, @title2, @title3, @author2, @code, @case = stocks
if @title =~ /\((.+?)\)$/
@title, @label = $`.strip, $1.strip
end
@author2.gsub!(/ /, '') unless @author2.nil?
if @pubsize =~ /;/
str1, str2 = $`.strip , $'.strip
@page, @pubsize = str1, str2 if str1 =~ /p/
@page, @pubsize = str2, str1 if str2 =~ /p/
end
@note2.gsub!(/ /, '') unless @note2.nil?
end
def set_info #===== set book-info(@info) from local-value =====
super
#----- set info add ----- #webcat plus独自の情報類
@info["access_id"] = @access_id
@info["title2"] = @title2
@info["title3"] = @title3
@info["author2"] = @author2
@info["pubtype"] = @pubtype
@info["type"] = @type
@info["note"] = @note
@info["note2"] = @note2
@info["code"] = @code
@info["case"] = @case
end
end
if __FILE__ == $0
p CacheBib_webcatplus.new.get_data('4-15-010739-4')
end
|