#!/usr/bin/env ruby # esbooks.rb ver.0.4 2002.3.21-2003.1.20 # ISBN-search -> detail-parse # original; isbn2bib.rb for bk1 by takahashi #>集英社コバルト文庫 #:genre => '■ジャンル名', #:delivery => '■発送可能時期', #売上ランク #評価ポイント #review #以上、やり残し require 'net/http' require 'cgi' #require 'simpleuri' #require 'webagent' class CacheBib_esbooks < CacheBib attr_accessor :isbn attr_accessor :flag_review #, :affiliate_code attr_accessor :url_site attr_accessor :title, :author, :publisher, :size, :pubdate, :price, :desc attr_accessor :image, :translator, :genre, :genre1, :genre2, :genre3 attr_accessor :reader, :stock, :delivery attr_accessor :reviews, :reviewlist attr_accessor :insystemKcode, :outputKcode, :messageKcode attr_accessor :product_cd def initialize () super @reviews = Array.new() @flag_review = true @url_site = 'http://www.esbooks.co.jp/' @insystemKcode = 3 #sjis #1-jis,2-euc,3-sjis @outputKcode = 2 #euc @messagetKcode = 3 #sjis end #===== ===== parts ===== ===== def word_delivery () delivery = '' flag = false # title-check -> no-info / get-info check if !@title.nil? flag = true if @delivery.nil? delivery = "ない" else if @delivery =~ /翌日〜3日/ delivery = "1〜3日" elsif @delivery =~ /3日〜6日/ delivery = "3〜6日" elsif @delivery =~ /3日〜3週間/ delivery = "3〜21日" else delivery = "?" end end else message = "WAR; maybe data-error, no-title." @stderr.puts message.kconv(@messageKcode) end return delivery, flag end #===== ===== url ===== ===== def url_isbnsearch () # url = @url_site + "bks.svl?CID=BKS504&access_method=isbn_cd&input_data=#{@isbn}" url = @url_site + "search?isbn=#{@isbn}" return url end def url_bibinfo () # url = @url_site + "bks.svl?CID=BKS503&product_cd=#{@product_cd}" url = @url_site + "product/keyword/keyword?accd=#{@product_cd}" return url end #===== ===== parse ===== ===== def parse_isbnsearch (str) count = 0 str.each{ |line| # if line =~ %r|\s*(.*?)| then if line =~ %r|(.+?)|s then @product_cd, @title = $1, $2 if @product_cd.nil? @title = nil else count = count + 1 end end } if count > 1 message = "WAR; ISBN-search hit multi-column. ISBN; " + @isbn @stderr.puts message.kconv(@messageKcode) end end def parse_bibinfo (str) #----- set key and data ------ mode_tbl = { :title => 'class="colum-title"', :image => '/bks/images/i', :author => '著者/訳者名', :publisher => '出版社名', :genre => '■ジャンル名', :pubdate => '発行年月', :isbn => 'ISBNコード', :size => 'ページ・サイズ', :reader => '読者対象', :stock => '在庫状況', :price => '販売価格', :delivery => '■発送可能時期', :content => '【本の内容】', :content2 => '【出版社からのコメント】', :review => '/i @title = $1.gsub(/<[^>]+>/,'').strip end end data[:image] =~ Regexp.new("]+>/,'').strip if !$1.nil? @author = string.strip if !string.nil? if string =~ /(.*)/(〔ほか〕)?〔?[著|編]〕?/s names = $1.split(/〔?[著|編]〕?[\s| |$]/s) names.collect! { |name| name.strip.chomp('/') } @author = names.join("\t") end if string =~ /.*/(〔ほか〕)?〔?[著|編]〕?(.*)/(〔ほか〕)?〔?訳〕?/s names = $2.split( /〔?訳〕?[\s| |$]/s) names.collect! { |name| name.strip.chomp('/') } names.collect! { |name| name.gsub(/^ */,'').strip } @translator = names.join("\t") end end data[:publisher] =~ Regexp.new(":(.*)") @publisher = $1.gsub(/<[^>]+>/,'').strip if !$1.nil? data[:genre] =~ Regexp.new(":(.*)") @genre = $1.gsub(/<[^>]+>/,'').strip if !$1.nil? data[:genre] =~ Regexp.new("genre1_name=(.+?)&") @genre1 = $1 data[:genre] =~ Regexp.new("genre2_name=(.+?)&") @genre2 = $1 data[:genre] =~ Regexp.new("genre3_name=(.+?)&") @genre3 = $1 data[:pubdate] =~ Regexp.new(":(.*)") @pubdate = $1.gsub(/<[^>]+>/,'').strip if !$1.nil? data[:isbn] =~ Regexp.new(":(.*)[ ]?") @isbn = $1.gsub(/<[^>]+>/,'').delete(' ').strip if !$1.nil? data[:size] =~ Regexp.new(":(.*)") @size = $1.gsub(/<[^>]+>/,'').strip if !$1.nil? data[:reader] =~ Regexp.new(":(.*)") @reader = $1.gsub(/<[^>]+>/,'').strip if !$1.nil? data[:stock] =~ Regexp.new(":(.*)") @stock = $1.strip if !$1.nil? if @stock =~ /(.+?)<\/tr>/i @stock = $1.gsub(/<[^>]+>/,'').strip end data[:price] =~ Regexp.new(":(.*?)") str = $1 @price = str.gsub(/<[^>]+?>/,'').strip.delete("\s") if !str.nil? # if !data[:delivery].nil? # data[:delivery] =~ Regexp.new(":(.*?)>") # @delivery = $1.to_s.gsub(/<[^>]+>/,'').strip # end if @stock =~ /((.+?))/s @delivery = $1 end data[:content] =~ Regexp.new("#{mode_tbl[:content]}(.*?)") @desc = $1.to_s.gsub(/<[^>]+>/,'').strip @desc = '' if @desc == @delivery data[:content2] =~ Regexp.new("#{mode_tbl[:content2]}(.*?)") tmp = $1.to_s.gsub(/<[^>]+>/,'').strip tmp = '' if tmp == @delivery @desc += tmp reviewsplit = data[:review].to_s.split(mode_tbl[:review]) reviewlist = [] reviewsplit.each { |column| if column =~ /cust_id=(.+?)'/ custid = $1 reviewlist << custid end if column =~ /(.+?)店長/ custid = $1.strip reviewlist << custid end } reviewsplit = data[:review2].to_s.split(mode_tbl[:review2]) reviewsplit.each { |column| if column =~ /cust_id=(.+?)'/ custid = $1 reviewlist << custid end if column =~ /(.+?)店長/ custid = $1.strip reviewlist << custid end } @reviewlist = reviewlist.join("\t") if reviewlist.size > 0 if @flag_review reviewsplit = data[:review].to_s.split(mode_tbl[:review]) reviewsplit.each{ |column| review = {} if column =~ /cust_id=(.+?)'/ review["cust_id"] = $1 end if column =~ /(.+?)店長の『(.+?)』コーナー/s review["reviewer"] = $1.strip review["corner"] = $2.strip end if column =~ /#{review["reviewer"]}\s*\[(.+?)\]\s*−\s*(.+?日)(.+?)<\/td>/s review["value"] = $1 review["date"] = $2 review["content"] = $3 review["content"].gsub!(/^<.+?>/,'').strip! review["content"].gsub!(/\n/,"\t") end @reviews << review if review.size > 0 } reviewsplit = data[:review2].to_s.split(mode_tbl[:review2]) reviewsplit.each{ |column| review = {} if column =~ /cust_id=(.+?)'/ review["cust_id"] = $1 end if column =~ /(.+?)店長/s review["reviewer"] = $1.strip end if column =~ /<\/strong>(.+?)
(.+?)<\/td>/s review["content"] = $1 review["date"] = $2 review["content"].gsub!(/<[^>]+>/,'').gsub!(/\s+/,'').strip! review["date"].strip! end @reviews << review if review.size > 0 } end end #===== ===== main ===== ===== def set_info #===== set local-value from @info (from cache) ===== #----- set book-info (from cache) ----- @flag_overprint = true if @info["overprint"] @product_cd = @info["product_cd"] @flag_isbnsearch = true if !@product_cd @title = @info["title"] @delivery = @info["delivery"] end def get_info #===== get book-info(@info) from url ===== message = 'CHK; access; ' + @url_site + ' -> ' + @isbn @stderr.puts message.kconv(@messageKcode) #----- get detail-code ----- if @flag_isbnsearch agent = WebAgent.new() agent.uri = url_isbnsearch agent.get() body = agent.body.kconv(@insystemKcode) parse_isbnsearch(body) end #----- get book-info ----- agent = WebAgent.new() agent.uri = url_bibinfo agent.get() body = agent.body.kconv(@insystemKcode) parse_bibinfo(body) if !@flag_overprint @info = {} end @info["isbn"] = @isbn @info["title"] = @title @info["author"] = @author @info["publisher"] = @publisher @info["size"] = @size @info["pubdate"] = @pubdate @info["price"] = @price @info["desc"] = @desc @info["image"] = @image @info["translator"] = @translator @info["genre"] = @genre @info["genre1"] = @genre1 @info["genre2"] = @genre2 @info["genre3"] = @genre3 @info["reader"] = @reader @info["stock"] = @stock @info["delivery"] = @delivery @info["product_cd"] = @product_cd @info["reviewlist"] = @reviewlist @info["overprint"] = true if @flag_overprint if @flag_review @info["reviews"] = @reviews end end end