#!/usr/bin/env ruby # bk1.rb ver.0.5 2002.3.21-2003.5.22 # ISBN-search -> detail-parse # original; isbn2bib.rb for bk1 by takahashi # コミック等が対照となる場合、通常と微妙にフォーマットが変わる為、幾つかのデータを拾い損ねる場合有り。未対応。 require 'net/http' require 'cgi' #require 'simpleuri' #require 'webagent' class Review_bk1 attr_accessor :url, :info attr_accessor :title, :reviewer_title, :reviewer, :date, :point, :content attr_accessor :insystemKcode, :outputKcode, :messageKcode def initialize () @insystemKcode = 3 #sjis #1-jis,2-euc,3-sjis @outputKcode = 2 #euc @messagetKcode = 3 #sjis end def parse_review (str) #----- set key and data ------ mode_tbl = { '' => :title, '' => :reviewer_title, '' => :reviewer, '' => :date, '' => :point, '' => :content, '' => :none } mode = :none data = Hash.new() #----- read and sepalate by key(mode_tbl) ------ str.each{|line| l = line.chomp.chomp("\r").kconv(@insystemKcode) l.strip! if mode_tbl[l] mode = mode_tbl[l] end if mode != :none if data[mode] data[mode] << line else data[mode] = line end end } #----- parse ----- @title = data[:title].to_s.gsub(/<[^>]+>/,'').strip @reviewer_title = data[:reviewer_title].to_s.gsub(/<[^>]+>/,'').strip @reviewer = data[:reviewer].to_s.gsub(/<[^>]+>/,'').strip @date = data[:date].to_s.gsub(/<[^>]+>/,'').strip @point = data[:point].to_s.gsub(/<[^>]+>/,'').sub(/評価(★マーク)/s,'').sub(/ /s,'').strip @content = data[:content].to_s.gsub(/<[^>]+>/,'').strip @content.gsub!(/\n/,"\t") end def get_review () agent = WebAgent.new() agent.uri = @url agent.get() body = agent.body.kconv(@insystemKcode) parse_review(body) @info = {} @info["title"] = @title @info["review_title"] = @reviewer_title @info["review"] = @reviewer @info["date"] = @date @info["point"] = @point @info["content"] = @content end end class CacheBib_bk1 < CacheBib attr_accessor :isbn attr_accessor :flag_review, :affiliate_code attr_accessor :url_site attr_accessor :title, :author, :publisher, :size, :pubdate, :price, :desc attr_accessor :image, :delivery attr_accessor :reviews, :reviewlist attr_accessor :chiefeditor, :author_intro, :series attr_accessor :author_auid, :translator_auid, :chiefeditor_auid attr_accessor :insystemKcode, :outputKcode, :messageKcode attr_accessor :bibid def initialize () super @reviews = Array.new() @flag_review = true @url_site = 'http://www.bk1.co.jp/' @affiliate_code = 'p-ippo00145' @insystemKcode = 3 #sjis #1-jis,2-euc,3-sjis @outputKcode = 2 #euc @messagetKcode = 3 #sjis end #===== ===== parts ===== ===== def word_delivery () delivery = '' flag = false # title-check -> no-info / get-info check if !@title.nil? flag = true if @delivery.nil? delivery = "ない" else if @delivery =~ /24/ delivery = "24時間" elsif @delivery =~ /3/ delivery = "2〜3日" elsif @delivery =~ /取/ delivery = "取寄せ" else delivery = "?" end end else message = "WAR; maybe data-error, no-title." @stderr.puts message.kconv(@messageKcode) end return delivery, flag end #===== ===== url ===== ===== def url_isbnsearch () url = @url_site + "cgi-bin/srch/srch_result_book.cgi/?" url << "isbn=#{@isbn}" url << "&aid=#{@affiliate_code}" if !@affiliate_code.nil? url << "&idx=3" return url end def url_bibinfo () url = @url_site + "cgi-bin/srch/srch_detail.cgi/?" url << "bibid=#{@bibid}" url << "&aid=#{@affiliate_code}" if !@affiliate_code.nil? url << "&idx=3" return url end def url_review () url = @url_site + "cgi-bin/srch/srch_rev.cgi/?" url << "bibid=#{@bibid}&volno=#{@volno}&revid=#{@revid}" url << "&aid=#{@affiliate_code}" if !@affiliate_code.nil? return url end #===== ===== parse ===== ===== def parse_isbnsearch (str) count = 0 str.each{|line| if line =~ %r|\s*(.*)
| then @bibid, @title = $1, $2 if @bibid.nil? @title = nil else count = count + 1 end end } if count > 1 message = "WAR; ISBN-search hit multi-column. ISBN; " + @isbn @stderr.puts message.kconv(@messageKcode) end end def parse_bibinfo (str) #----- set key and data ------ mode_tbl = { '' => :image, '' => :title, '' => :author, '' => :p_s_i_d, '' => :price_shubetsu, '' => :price, '' => :none, '' => :delivery, '' => :content, ' ' => :comment, '' => :none, '' => :review, '' => :review } mode = :none data = Hash.new() #----- read and sepalate by key(mode_tbl) ------ str.each{|line| l = line.chomp.chomp("\r").kconv(@insystemKcode) if mode_tbl[l] mode = mode_tbl[l] end if mode != :none if data[mode] data[mode] << l else data[mode] = l end end } #----- parse ----- data[:image] =~ Regexp.new("([^<]*)") @title = $1 if data[:title] =~ /
/i titles = data[:title].split(/
/i) if titles[1] =~ Regexp.new("([^<]*)") @series = $1.strip if @series =~ /^\((.+)\)$/ @series = $1.strip end end end if !data[:author].nil? authors = data[:author].split(/
/i) authors.each { |line| str = line.to_s.gsub(/<[^>]+>/,'').delete("\n").strip if line =~ /auid=(\d+)/ then auid = $1 end if str =~ /(著$|〔著〕$|編$)/ then name = $` name += '編'if str =~ /編$/ if @author.nil? then @author = '' else @author << "\t" end @author << name if !auid.nil? if @author_auid.nil? then @author_auid = '' else @author_auid << "\t" end @author_auid << auid end elsif str =~ /訳$/ then name = $` if @translator.nil? then @translator = '' else @translator << "\t" end @translator << name if !auid.nil? if @translator_auid.nil? then @translator_auid = '' else @translator_auid << "\t" end @translator_auid << auid end elsif str =~ /監修$/ then name = $` if @chiefeditor.nil? then @chiefeditor = '' else @chiefeditor << "\t" end @chiefeditor << name if !auid.nil? if @chiefeditor_auid.nil? then @chiefeditor_auid = '' else @chiefeditor_auid << "\t" end @chiefeditor_auid << auid end end } if @author =~ /^著者:(.+)$/ then @author = $1.strip end end data[:p_s_i_d] =~ Regexp.new("出版:([^<]+)
.*サイズ:([^<]+)
.*ISBN:([^<]+)
.*発行年月:([^<]+)
.*") @publisher = $1 @size = $2 @isbn = $3 @pubdate = $4 if !data[:price].nil? if data[:price] =~ /
/ data[:price] = $` end @price = data[:price].to_s.gsub(/<[^>]+>/,'').strip end data[:delivery] =~ Regexp.new("alt=\"(.*?)\"") @delivery = $1 if !data[:content].nil? if data[:content] =~ /内容説明/s desc = data[:content].to_s.sub(/.*内容説明<\/b>/s, '').sub(/ /s,'').sub(/.*/,'').gsub(/<[^>]+>/,'').strip @desc = desc if desc.size > 0 end end if !data[:comment].nil? authors = data[:comment].gsub(/.*/,'').split('') authors.each { |line| intro = line.sub(/.*著者紹介<\/b>/s, '').gsub(/ /e,'').gsub(/<[^>]+>/,'').strip if intro.size > 0 if @author_intro.nil? then @author_intro = '' else @author_intro << "\t" end @author_intro << intro end } end revlist1 = [] data[:review].to_s.scan(%r|この書評は購入の参考になりましたか?(.*?)いいえ|) {|ids| revid = ids[0].scan(/name="revid" value="(\d+)"/)[0] revlist1 << revid } revlist2 = [] data[:review].to_s.scan(%r|)|) {|ids| revstr = ids[0].scan(/>(.*?)<\/a>/)[0] revlist2 << revstr } reviewlist = [] revlist1.each_index { |i| reviewlist << revlist1[i] reviewlist << revlist2[i] } @reviewlist = reviewlist.join("\t") if reviewlist.size > 0 if @flag_review data[:review].to_s.scan(%r|この書評は購入の参考になりましたか?(.*?)いいえ|){ |ids| @volno = ids[0].scan(/name="volno" value="(\d+)"/)[0] @revid = ids[0].scan(/name="revid" value="(\d+)"/)[0] review = Review_bk1.new() review.url = url_review review.get_review @reviews << review.info } end end #===== ===== main ===== ===== def set_info #===== set local-value from @info (from cache) ===== #----- set book-info (from cache) ----- @flag_overprint = true if @info["overprint"] @bibid = @info["bibid"] @flag_isbnsearch = true if !@bibid @title = @info["title"] @delivery = @info["delivery"] end def get_info #===== get book-info(@info) from url ===== message = 'CHK; access; ' + @url_site + ' -> ' + @isbn @stderr.puts message.kconv(@messageKcode) #----- get detail-code ----- if @flag_isbnsearch agent = WebAgent.new() agent.uri = url_isbnsearch agent.get() body = agent.body.kconv(@insystemKcode) parse_isbnsearch(body) end #----- get book-info ----- agent = WebAgent.new() agent.uri = url_bibinfo agent.get() body = agent.body.kconv(@insystemKcode) parse_bibinfo(body) if !@flag_overprint @info = {} end @info["image"] = @image @info["title"] = @title @info["author"] = @author @info["translator"] = @translator @info["publisher"] = @publisher @info["size"] = @size @info["isbn"] = @isbn @info["pubdate"] = @pubdate @info["price"] = @price @info["delivery"] = @delivery @info["desc"] = @desc @info["bibid"] = @bibid @info["reviewlist"] = @reviewlist @info["chiefeditor"] = @chiefeditor @info["author_auid"] = @author_auid @info["translator_auid"] = @translator_auid @info["chiefeditor_auid"] = @chiefeditor_auid @info["author_intro"] = @author_intro @info["series"] = @series @info["overprint"] = true if @flag_overprint if @flag_review info["reviews"] = @reviews end end end