#!/usr/bin/env ruby # webcat.rb(books) ver.0.1 2003.1.27-2003.1.27 # ISBN-search -> detail-parse # original; isbn2bib.rb for bk1 by takahashi require 'net/http' require 'cgi' #require 'simpleuri' #require 'webagent' class CacheBib_webcat < CacheBib attr_accessor :isbn attr_accessor :flag_review #, :affiliate_code attr_accessor :url_site attr_accessor :title, :title2, :title3, :author, :author2, :publisher attr_accessor :size, :note, :note2, :type, :type2, :code, :case attr_accessor :desc, :index attr_accessor :ncid, :accessid attr_accessor :reviews, :delivery attr_accessor :insystemKcode, :outputKcode, :messageKcode attr_accessor :detail_id def initialize () super @reviews = Array.new() @flag_review = true @url_site = 'http://webcatplus.nii.ac.jp/' @insystemKcode = 3 #sjis #1-jis,2-euc,3-sjis @outputKcode = 2 #euc @messagetKcode = 3 #sjis end #===== ===== parts ===== ===== def word_delivery () delivery = '' flag = false # title-check -> no-info / get-info check if !@title.nil? flag = true if @delivery.nil? delivery = "ない" else if @delivery =~ /翌日〜3日/ delivery = "1〜3日" elsif @delivery =~ /3日〜6日/ delivery = "3〜6日" elsif @delivery =~ /3日〜3週間/ delivery = "3〜21日" else delivery = "?" end end else message = "WAR; maybe data-error, no-title." @stderr.puts message.kconv(@messageKcode) end return delivery, flag end #===== ===== url ===== ===== def url_isbnsearch () url = @url_site + "assoc.cgi?mode=equal0&isbn=#{@isbn}" return url end def url_bibinfo () url = @url_site + "tosho.cgi?mode=tosho&#{@accessid}=#{@ncid}" return url end #===== ===== parse ===== ===== def parse_isbnsearch (str) count = 0 str.each{ |line| if line =~ %r|]+?>(.*?)| then # if line =~ %r|(.*?)| then @accessid, @ncid, @title = $1, $2, $3 if @ncid.nil? @title = nil else count = count + 1 end end } if count > 1 message = "WAR; ISBN-search hit multi-column. ISBN; " + @isbn @stderr.puts message.kconv(@messageKcode) end end def parse_bibinfo (str) #----- set key and data ------ mode_tbl = { # :data1 => '
', :desc => '内容', :index => '目次', # :data2 => '書誌情報', :title => 'タイトル', :author => '責任表示', :type => '資料種別', :type2 => '版表示', :publisher => '出版事項', :size => '形態事項', :isbn => 'ISBN', #dummy :note2 => '内容著作注記', :note => '注記', :title2 => '別タイトル', :title3 => '統一タイトル', :author2 => '著者標目', :code => '分類', :case => '件名' } mode = :none data = Hash.new() #----- read and sepalate by key(mode_tbl) ------ str.each{|line| l = line.chomp.chomp("\r").kconv(@insystemKcode) mode_tbl.each{ |key,keydata| mode = key if /#{keydata}/s =~ l } if mode != :none if data[mode] data[mode] << l else data[mode] = l end end } #----- parse ----- keys = [:desc, :index] stocks = [nil, nil] keys.each_index{ |i| key = keys[i] if !data[key].nil? if data[key] =~ /#{mode_tbl[key]}.+?(.+?)<\/td>/smi stocks[i] = $1.gsub(/\s+/, ' ').gsub(/
/i, "\t").gsub(/<[^>]+>/,'').strip end end } @desc, @index = stocks keys = [:title, :author, :type, :type2, :publisher, :size, :note2, :note, :title2, :title3, :author2, :code, :case] stocks = [nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil] keys.each_index{ |i| key = keys[i] if !data[key].nil? if data[key] =~ /#{mode_tbl[key]}.+?(.+?)<\/td>/smi stocks[i] = $1.gsub(/\s+/, ' ').gsub(/
/i, "\t").gsub(/<[^>]+>/,'').strip end end } @title, @author, @type, @type2, @publisher, @size, @note2, @note, @title2, @title3, @author2, @code, @case = stocks end #===== ===== main ===== ===== def set_info #===== set local-value from @info (from cache) ===== #----- set book-info (from cache) ----- @flag_overprint = true if @info["overprint"] @ncid = @info["ncid"] @flag_isbnsearch = true if !@ncid @title = @info["title"] @delivery = @info["delivery"] end def get_info #===== get book-info(@info) from url ===== message = 'CHK; access; ' + @url_site + ' -> ' + @isbn @stderr.puts message.kconv(@messageKcode) #----- get detail-code ----- if @flag_isbnsearch agent = WebAgent.new() agent.uri = url_isbnsearch agent.get() body = agent.body.kconv(@insystemKcode) parse_isbnsearch(body) end #----- get book-info ----- agent = WebAgent.new() agent.uri = url_bibinfo agent.get() body = agent.body.kconv(@insystemKcode) parse_bibinfo(body) if !@flag_overprint @info = {} end @info["isbn"] = @isbn @info["title"] = @title @info["desc"] = @desc @info["index"] = @index @info["author"] = @author @info["type"] = @type @info["type2"] = @type2 @info["publisher"] = @publisher @info["size"] = @size @info["note2"] = @note2 @info["note"] = @note @info["title2"] = @title2 @info["title3"] = @title3 @info["pubdate"] = @pubdate @info["author2"] = @author2 @info["code"] = @code @info["case"] = @case @info["ncid"] = @ncid @info["accessid"] = @accessid @info["overprint"] = true if @flag_overprint if @flag_review @info["reviews"] = @reviews end end end