#!/usr/bin/env ruby # yahoo.rb(books) ver.0.1 2003.1.19-2003.1.20 # ISBN-search -> detail-parse # original; isbn2bib.rb for bk1 by takahashi #改変まだ、準備のみ。 #esbooksより #:genre => '■ジャンル名', #:delivery => '■発送可能時期', #売上ランク #評価ポイント #review #以上、やり残し require 'net/http' require 'cgi' #require 'simpleuri' #require 'webagent' class CacheBib_yahoo < CacheBib attr_accessor :isbn attr_accessor :flag_review #, :affiliate_code attr_accessor :url_site attr_accessor :title, :author, :publisher, :size, :pubdate, :price, :desc, :series attr_accessor :image, :translator, :genre, :genre1, :genre2, :genre3 attr_accessor :reader, :stock, :delivery attr_accessor :reviews, :reviewlist attr_accessor :insystemKcode, :outputKcode, :messageKcode attr_accessor :detail_id def initialize () super @reviews = Array.new() @flag_review = true @url_site = 'http://books.yahoo.co.jp/' @insystemKcode = 3 #sjis #1-jis,2-euc,3-sjis @outputKcode = 2 #euc @messagetKcode = 3 #sjis end #===== ===== parts ===== ===== def word_delivery () delivery = '' flag = false # title-check -> no-info / get-info check if !@title.nil? flag = true if @delivery.nil? delivery = "ない" else if @delivery =~ /翌日〜3日/ delivery = "1〜3日" elsif @delivery =~ /3日〜6日/ delivery = "3〜6日" elsif @delivery =~ /3日〜3週間/ delivery = "3〜21日" else delivery = "?" end end else message = "WAR; maybe data-error, no-title." @stderr.puts message.kconv(@messageKcode) end return delivery, flag end #===== ===== url ===== ===== def url_isbnsearch () url = @url_site + "bin/search_key?p=#{@isbn}" return url end def url_bibinfo () url = @url_site + "bin/detail?id=#{@detail_id}" return url end #===== ===== parse ===== ===== def parse_isbnsearch (str) count = 0 str.each{ |line| if line =~ %r|(.*?)| then @detail_id, @title = $1, $2 if @detail_id.nil? @title = nil else count = count + 1 end end } if count > 1 message = "WAR; ISBN-search hit multi-column. ISBN; " + @isbn @stderr.puts message.kconv(@messageKcode) end end def parse_bibinfo (str) #----- set key and data ------ mode_tbl = { :stock => 'Yahoo!ショッピング取扱状況:', # :delivery => '', :image => '書籍画像', :publisher => '出版社名', :title => '書籍名', :series => 'シリーズ名', :author => '著者名', :pubdate => '出版年月 ', :size => 'ページ数・版型 ', :isbn => 'ISBNコード', :price => '価格', :content => '内容', :reader => '読者対象', :review => '' # :genre => '■ジャンル名' } mode = :none data = Hash.new() #----- read and sepalate by key(mode_tbl) ------ str.each{|line| l = line.chomp.chomp("\r").kconv(@insystemKcode) mode_tbl.each{ |key,keydata| mode = key if /#{keydata}/s =~ l } if mode != :none if data[mode] data[mode] << l else data[mode] = l end end } #----- parse ----- if !data[:stock].nil? if data[:stock] =~ /#{mode_tbl[:stock]}(.+?)<\/td>/smi @stock = $1.gsub(/<[^>]+>/,'').strip if @stock =~ /^(.+?)\((.+)\)$/ @stock, @delivery = $1.strip, $2.strip end end end if !data[:image].nil? if data[:image] =~ /#{mode_tbl[:image]}.+?(/smi tmp = $1 if tmp =~ //smi @publisher = $1.gsub(/<[^>]+>/,'').strip end end if !data[:title].nil? if data[:title] =~ /#{mode_tbl[:title]}.+?(/smi @title = $1.gsub(/<[^>]+>/,'').strip end end if !data[:series].nil? if data[:series] =~ /#{mode_tbl[:series]}.+?(/smi @series = $1.gsub(/<[^>]+>/,'').strip end end if !data[:author].nil? if data[:author] =~ /#{mode_tbl[:author]}.+?(/smi @author = $1.gsub(/<[^>]+>/,'').strip end end if !data[:pubdate].nil? if data[:pubdate] =~ /#{mode_tbl[:pubdate]}.+?(/smi @pubdate = $1.gsub(/<[^>]+>/,'').strip end end if !data[:size].nil? if data[:size] =~ /#{mode_tbl[:size]}.+?(/smi @size = $1.gsub(/<[^>]+>/,'').strip end end if !data[:isbn].nil? if data[:isbn] =~ /#{mode_tbl[:isbn]}.+?(/smi @isbn = $1.gsub(/<[^>]+>/,'').strip end end if !data[:price].nil? if data[:price] =~ /#{mode_tbl[:price]}.+?(/smi @price = $1.gsub(/<[^>]+>/,'').strip end end if !data[:content].nil? if data[:content] =~ /#{mode_tbl[:content]}.+?(/smi @content = $1.gsub(/<[^>]+>/,'').strip end end if !data[:reader].nil? if data[:reader] =~ /#{mode_tbl[:reader]}.+?(/smi @reader = $1.gsub(/<[^>]+>/,'').strip end end =begin reviewsplit = data[:review].to_s.split(mode_tbl[:review]) reviewlist = [] reviewsplit.each { |column| if column =~ /cust_id=(.+?)'/ custid = $1 reviewlist << custid end if column =~ /(.+?)店長/ custid = $1.strip reviewlist << custid end } reviewsplit = data[:review2].to_s.split(mode_tbl[:review2]) reviewsplit.each { |column| if column =~ /cust_id=(.+?)'/ custid = $1 reviewlist << custid end if column =~ /(.+?)店長/ custid = $1.strip reviewlist << custid end } @reviewlist = reviewlist.join("\t") if reviewlist.size > 0 if @flag_review reviewsplit = data[:review].to_s.split(mode_tbl[:review]) reviewsplit.each{ |column| review = {} if column =~ /cust_id=(.+?)'/ review["cust_id"] = $1 end if column =~ /(.+?)店長の『(.+?)』コーナー/s review["reviewer"] = $1.strip review["corner"] = $2.strip end if column =~ /#{review["reviewer"]}\s*\[(.+?)\]\s*−\s*(.+?日)(.+?)<\/td>/s review["value"] = $1 review["date"] = $2 review["content"] = $3 review["content"].gsub!(/^<.+?>/,'').strip! review["content"].gsub!(/\n/,"\t") end @reviews << review if review.size > 0 } reviewsplit = data[:review2].to_s.split(mode_tbl[:review2]) reviewsplit.each{ |column| review = {} if column =~ /cust_id=(.+?)'/ review["cust_id"] = $1 end if column =~ /(.+?)店長/s review["reviewer"] = $1.strip end if column =~ /<\/strong>(.+?)
(.+?)<\/td>/s review["content"] = $1 review["date"] = $2 review["content"].gsub!(/<[^>]+>/,'').gsub!(/\s+/,'').strip! review["date"].strip! end @reviews << review if review.size > 0 } end =end end #===== ===== main ===== ===== def set_info #===== set local-value from @info (from cache) ===== #----- set book-info (from cache) ----- @flag_overprint = true if @info["overprint"] @detail_id = @info["detail_id"] @flag_isbnsearch = true if !@detail_id @title = @info["title"] @delivery = @info["delivery"] end def get_info #===== get book-info(@info) from url ===== message = 'CHK; access; ' + @url_site + ' -> ' + @isbn @stderr.puts message.kconv(@messageKcode) #----- get detail-code ----- if @flag_isbnsearch agent = WebAgent.new() agent.uri = url_isbnsearch agent.get() body = agent.body.kconv(@insystemKcode) parse_isbnsearch(body) end #----- get book-info ----- agent = WebAgent.new() agent.uri = url_bibinfo agent.get() body = agent.body.kconv(@insystemKcode) parse_bibinfo(body) if !@flag_overprint @info = {} end @info["isbn"] = @isbn @info["title"] = @title @info["author"] = @author @info["publisher"] = @publisher @info["series"] = @series @info["size"] = @size @info["pubdate"] = @pubdate @info["price"] = @price @info["desc"] = @content @info["image"] = @image # @info["translator"] = @translator # @info["genre"] = @genre # @info["genre1"] = @genre1 # @info["genre2"] = @genre2 # @info["genre3"] = @genre3 @info["reader"] = @reader @info["stock"] = @stock @info["delivery"] = @delivery @info["detail_id"] = @detail_id # @info["reviewlist"] = @reviewlist @info["overprint"] = true if @flag_overprint if @flag_review @info["reviews"] = @reviews end end end