#!/usr/bin/env ruby # webcat.rb(books) # ISBN-search -> detail-parse # original; isbn2bib.rb for bk1 by takahashi # ver.0.1 2003.1.27-2003.1.27 # ver.0.2 2004.01.06-2004.01.06 新キャッシュに対応 if __FILE__ == $0 require 'cookie' require 'webagent' require 'cache' require 'cachebib' end class CacheBib_webcatplus < CacheBib attr_accessor :access_id attr_accessor :title2, :title3, :author2 attr_accessor :pubtype, :type, :note, :note2, :code, :case def initialize () super @shop_id = 'wcp' @url_site = 'http://webcatplus.nii.ac.jp/' end #===== ===== parts ===== ===== def word_delivery () return '', false end #===== ===== url ===== ===== def url_isbnsearch ( isbn = @isbn ) url = @url_site + "assoc.cgi?mode=equal0&isbn=#{isbn}" return url end def url_bibinfo ( id = @id ) url = @url_site + "tosho.cgi?mode=tosho&#{@access_id}=#{id}" return url end #===== ===== parse ===== ===== def parse_isbnsearch ( str ) count = [] str.scan(%r|]+?>(.*?)|){ count << [$1, $2, $3] unless $1.nil? } if count.uniq.size > 1 message("WAR; ISBN-search hit multi-column. ISBN; #{@isbn}") end @access_id, @id, @title = count.first return count end def parse_bibinfo ( str ) #----- set key and data ------ mode_tbl = { # :data1 => '
', :desc => '内容', :index => '目次', # :data2 => '書誌情報', :title => 'タイトル', :author => '責任表示', :type => '資料種別', :pubtype => '版表示', :publisher => '出版事項', :pubsize => '形態事項', :isbn => 'ISBN', #dummy :note2 => '内容著作注記', :note => '注記', :title2 => '別タイトル', :title3 => '統一タイトル', :author2 => '著者標目', :code => '分類', :case => '件名' } mode = :none data = Hash.new() #----- read and sepalate by key(mode_tbl) ------ str.each{|line| l = line.chomp.chomp("\r").kconv(@insystemKcode) mode_tbl.each{ |key,keydata| mode = key if /#{keydata}/s =~ l } if mode != :none if data[mode] data[mode] << l else data[mode] = l end end } #----- parse ----- keys = [:desc, :index] stocks = [nil, nil] keys.each_index{ |i| key = keys[i] if !data[key].nil? if data[key] =~ /#{mode_tbl[key]}.+?(.+?)<\/td>/smi stock = $1.gsub(/\s+/, ' ').gsub(/
/i, "\t").gsub(/<[^>]+>/,'').strip stocks[i] = stock if stock != '' end end } @desc, @index = stocks keys = [:title, :author, :type, :pubtype, :publisher, :pubsize, :note2, :note, :title2, :title3, :author2, :code, :case] stocks = [nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil] keys.each_index{ |i| key = keys[i] if !data[key].nil? if data[key] =~ /#{mode_tbl[key]}.+?(.+?)<\/td>/smi stock = $1.gsub(/\s+/, ' ').gsub(/
/i, "\t").gsub(/<[^>]+>/,'').strip stocks[i] = stock if stock != '' end end } @title, @author, @type, @pubtype, @publisher, @pubsize, @note2, @note, @title2, @title3, @author2, @code, @case = stocks if @title =~ /\((.+?)\)$/ @title, @label = $`.strip, $1.strip end @author2.gsub!(/ /, '') unless @author2.nil? if @pubsize =~ /;/ str1, str2 = $`.strip , $'.strip @page, @pubsize = str1, str2 if str1 =~ /p/ @page, @pubsize = str2, str1 if str2 =~ /p/ end @note2.gsub!(/ /, '') unless @note2.nil? end def set_info #===== set book-info(@info) from local-value ===== super #----- set info add ----- #webcat plus独自の情報類 @info["access_id"] = @access_id @info["title2"] = @title2 @info["title3"] = @title3 @info["author2"] = @author2 @info["pubtype"] = @pubtype @info["type"] = @type @info["note"] = @note @info["note2"] = @note2 @info["code"] = @code @info["case"] = @case end end if __FILE__ == $0 p CacheBib_webcatplus.new.get_data('4-15-010739-4') end