#!/usr/bin/env ruby
# webcat.rb(books) ver.0.1 2003.1.27-2003.1.27
# ISBN-search -> detail-parse
# original; isbn2bib.rb for bk1 by takahashi
require 'net/http'
require 'cgi'
#require 'simpleuri'
#require 'webagent'
class CacheBib_webcat < CacheBib
attr_accessor :isbn
attr_accessor :flag_review #, :affiliate_code
attr_accessor :url_site
attr_accessor :title, :title2, :title3, :author, :author2, :publisher
attr_accessor :size, :note, :note2, :type, :type2, :code, :case
attr_accessor :desc, :index
attr_accessor :ncid, :accessid
attr_accessor :reviews, :delivery
attr_accessor :insystemKcode, :outputKcode, :messageKcode
attr_accessor :detail_id
def initialize ()
super
@reviews = Array.new()
@flag_review = true
@url_site = 'http://webcatplus.nii.ac.jp/'
@insystemKcode = 3 #sjis #1-jis,2-euc,3-sjis
@outputKcode = 2 #euc
@messagetKcode = 3 #sjis
end
#===== ===== parts ===== =====
def word_delivery ()
delivery = ''
flag = false
# title-check -> no-info / get-info check
if !@title.nil?
flag = true
if @delivery.nil?
delivery = "ない"
else
if @delivery =~ /翌日〜3日/
delivery = "1〜3日"
elsif @delivery =~ /3日〜6日/
delivery = "3〜6日"
elsif @delivery =~ /3日〜3週間/
delivery = "3〜21日"
else
delivery = "?"
end
end
else
message = "WAR; maybe data-error, no-title."
@stderr.puts message.kconv(@messageKcode)
end
return delivery, flag
end
#===== ===== url ===== =====
def url_isbnsearch ()
url = @url_site + "assoc.cgi?mode=equal0&isbn=#{@isbn}"
return url
end
def url_bibinfo ()
url = @url_site + "tosho.cgi?mode=tosho{@accessid}=#{@ncid}"
return url
end
#===== ===== parse ===== =====
def parse_isbnsearch (str)
count = 0
str.each{ |line|
if line =~ %r|]+?>(.*?)| then
# if line =~ %r|(.*?)| then
@accessid, @ncid, @title = $1, $2, $3
if @ncid.nil?
@title = nil
else
count = count + 1
end
end
}
if count > 1
message = "WAR; ISBN-search hit multi-column. ISBN; " + @isbn
@stderr.puts message.kconv(@messageKcode)
end
end
def parse_bibinfo (str)
#----- set key and data ------
mode_tbl = {
# :data1 => '
',
:desc => '

',
:index => '

',
# :data2 => '

',
:title => '
タイトル',
:author => '
責任表示',
:type => '
資料種別',
:type2 => '
版表示',
:publisher => '
出版事項',
:size => '
形態事項',
:isbn => '
ISBN', #dummy
:note2 => '
内容著作注記',
:note => '
注記',
:title2 => '
別タイトル',
:title3 => '
統一タイトル',
:author2 => '
著者標目',
:code => '
分類',
:case => '
件名'
}
mode = :none
data = Hash.new()
#----- read and sepalate by key(mode_tbl) ------
str.each{|line|
l = line.chomp.chomp("\r").kconv(@insystemKcode)
mode_tbl.each{ |key,keydata|
mode = key if /#{keydata}/s =~ l
}
if mode != :none
if data[mode]
data[mode] << l
else
data[mode] = l
end
end
}
#----- parse -----
keys = [:desc, :index]
stocks = [nil, nil]
keys.each_index{ |i|
key = keys[i]
if !data[key].nil?
if data[key] =~ /#{mode_tbl[key]}.+?
(.+?)<\/td>/smi
stocks[i] = $1.gsub(/\s+/, ' ').gsub(/ /i, "\t").gsub(/<[^>]+>/,'').strip
end
end
}
@desc, @index = stocks
keys = [:title, :author, :type, :type2, :publisher, :size, :note2, :note, :title2, :title3, :author2, :code, :case]
stocks = [nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil]
keys.each_index{ |i|
key = keys[i]
if !data[key].nil?
if data[key] =~ /#{mode_tbl[key]}.+? | (.+?)<\/td>/smi
stocks[i] = $1.gsub(/\s+/, ' ').gsub(/ /i, "\t").gsub(/<[^>]+>/,'').strip
end
end
}
@title, @author, @type, @type2, @publisher, @size, @note2, @note, @title2, @title3, @author2, @code, @case = stocks
end
#===== ===== main ===== =====
def set_info #===== set local-value from @info (from cache) =====
#----- set book-info (from cache) -----
@flag_overprint = true if @info["overprint"]
@ncid = @info["ncid"]
@flag_isbnsearch = true if !@ncid
@title = @info["title"]
@delivery = @info["delivery"]
end
def get_info #===== get book-info(@info) from url =====
message = 'CHK; access; ' + @url_site + ' -> ' + @isbn
@stderr.puts message.kconv(@messageKcode)
#----- get detail-code -----
if @flag_isbnsearch
agent = WebAgent.new()
agent.uri = url_isbnsearch
agent.get()
body = agent.body.kconv(@insystemKcode)
parse_isbnsearch(body)
end
#----- get book-info -----
agent = WebAgent.new()
agent.uri = url_bibinfo
agent.get()
body = agent.body.kconv(@insystemKcode)
parse_bibinfo(body)
if !@flag_overprint
@info = {}
end
@info["isbn"] = @isbn
@info["title"] = @title
@info["desc"] = @desc
@info["index"] = @index
@info["author"] = @author
@info["type"] = @type
@info["type2"] = @type2
@info["publisher"] = @publisher
@info["size"] = @size
@info["note2"] = @note2
@info["note"] = @note
@info["title2"] = @title2
@info["title3"] = @title3
@info["pubdate"] = @pubdate
@info["author2"] = @author2
@info["code"] = @code
@info["case"] = @case
@info["ncid"] = @ncid
@info["accessid"] = @accessid
@info["overprint"] = true if @flag_overprint
if @flag_review
@info["reviews"] = @reviews
end
end
end
|