#!/usr/bin/env ruby
# yahoo.rb(books) ver.0.1 2003.1.19-2003.1.20
# ISBN-search -> detail-parse
# original; isbn2bib.rb for bk1 by takahashi
#改変まだ、準備のみ。
#esbooksより
#:genre => '■ジャンル名',
#:delivery => '■発送可能時期',
#売上ランク
#評価ポイント
#review
#以上、やり残し
require 'net/http'
require 'cgi'
#require 'simpleuri'
#require 'webagent'
class CacheBib_yahoo < CacheBib
attr_accessor :isbn
attr_accessor :flag_review #, :affiliate_code
attr_accessor :url_site
attr_accessor :title, :author, :publisher, :size, :pubdate, :price, :desc, :series
attr_accessor :image, :translator, :genre, :genre1, :genre2, :genre3
attr_accessor :reader, :stock, :delivery
attr_accessor :reviews, :reviewlist
attr_accessor :insystemKcode, :outputKcode, :messageKcode
attr_accessor :detail_id
def initialize ()
super
@reviews = Array.new()
@flag_review = true
@url_site = 'http://books.yahoo.co.jp/'
@insystemKcode = 3 #sjis #1-jis,2-euc,3-sjis
@outputKcode = 2 #euc
@messagetKcode = 3 #sjis
end
#===== ===== parts ===== =====
def word_delivery ()
delivery = ''
flag = false
# title-check -> no-info / get-info check
if !@title.nil?
flag = true
if @delivery.nil?
delivery = "ない"
else
if @delivery =~ /翌日〜3日/
delivery = "1〜3日"
elsif @delivery =~ /3日〜6日/
delivery = "3〜6日"
elsif @delivery =~ /3日〜3週間/
delivery = "3〜21日"
else
delivery = "?"
end
end
else
message = "WAR; maybe data-error, no-title."
@stderr.puts message.kconv(@messageKcode)
end
return delivery, flag
end
#===== ===== url ===== =====
def url_isbnsearch ()
url = @url_site + "bin/search_key?p=#{@isbn}"
return url
end
def url_bibinfo ()
url = @url_site + "bin/detail?id=#{@detail_id}"
return url
end
#===== ===== parse ===== =====
def parse_isbnsearch (str)
count = 0
str.each{ |line|
if line =~ %r|(.*?)| then
@detail_id, @title = $1, $2
if @detail_id.nil?
@title = nil
else
count = count + 1
end
end
}
if count > 1
message = "WAR; ISBN-search hit multi-column. ISBN; " + @isbn
@stderr.puts message.kconv(@messageKcode)
end
end
def parse_bibinfo (str)
#----- set key and data ------
mode_tbl = {
:stock => 'Yahoo!ショッピング取扱状況:',
# :delivery => '',
:image => '書籍画像',
:publisher => '出版社名',
:title => '書籍名',
:series => 'シリーズ名',
:author => '著者名',
:pubdate => '出版年月 ',
:size => 'ページ数・版型 ',
:isbn => 'ISBNコード',
:price => '価格',
:content => '内容',
:reader => '読者対象',
:review => ''
# :genre => '■ジャンル名'
}
mode = :none
data = Hash.new()
#----- read and sepalate by key(mode_tbl) ------
str.each{|line|
l = line.chomp.chomp("\r").kconv(@insystemKcode)
mode_tbl.each{ |key,keydata|
mode = key if /#{keydata}/s =~ l
}
if mode != :none
if data[mode]
data[mode] << l
else
data[mode] = l
end
end
}
#----- parse -----
if !data[:stock].nil?
if data[:stock] =~ /#{mode_tbl[:stock]}(.+?)<\/td>/smi
@stock = $1.gsub(/<[^>]+>/,'').strip
if @stock =~ /^(.+?)\((.+)\)$/
@stock, @delivery = $1.strip, $2.strip
end
end
end
if !data[:image].nil?
if data[:image] =~ /#{mode_tbl[:image]}.+?(
/smi
tmp = $1
if tmp =~ /
/smi
@publisher = $1.gsub(/<[^>]+>/,'').strip
end
end
if !data[:title].nil?
if data[:title] =~ /#{mode_tbl[:title]}.+?(/smi
@title = $1.gsub(/<[^>]+>/,'').strip
end
end
if !data[:series].nil?
if data[:series] =~ /#{mode_tbl[:series]}.+?(/smi
@series = $1.gsub(/<[^>]+>/,'').strip
end
end
if !data[:author].nil?
if data[:author] =~ /#{mode_tbl[:author]}.+?(/smi
@author = $1.gsub(/<[^>]+>/,'').strip
end
end
if !data[:pubdate].nil?
if data[:pubdate] =~ /#{mode_tbl[:pubdate]}.+?(/smi
@pubdate = $1.gsub(/<[^>]+>/,'').strip
end
end
if !data[:size].nil?
if data[:size] =~ /#{mode_tbl[:size]}.+?(/smi
@size = $1.gsub(/<[^>]+>/,'').strip
end
end
if !data[:isbn].nil?
if data[:isbn] =~ /#{mode_tbl[:isbn]}.+?(/smi
@isbn = $1.gsub(/<[^>]+>/,'').strip
end
end
if !data[:price].nil?
if data[:price] =~ /#{mode_tbl[:price]}.+?(/smi
@price = $1.gsub(/<[^>]+>/,'').strip
end
end
if !data[:content].nil?
if data[:content] =~ /#{mode_tbl[:content]}.+?(/smi
@content = $1.gsub(/<[^>]+>/,'').strip
end
end
if !data[:reader].nil?
if data[:reader] =~ /#{mode_tbl[:reader]}.+?(/smi
@reader = $1.gsub(/<[^>]+>/,'').strip
end
end
=begin
reviewsplit = data[:review].to_s.split(mode_tbl[:review])
reviewlist = []
reviewsplit.each { |column|
if column =~ /cust_id=(.+?)'/
custid = $1
reviewlist << custid
end
if column =~ /(.+?)店長/
custid = $1.strip
reviewlist << custid
end
}
reviewsplit = data[:review2].to_s.split(mode_tbl[:review2])
reviewsplit.each { |column|
if column =~ /cust_id=(.+?)'/
custid = $1
reviewlist << custid
end
if column =~ /(.+?)店長/
custid = $1.strip
reviewlist << custid
end
}
@reviewlist = reviewlist.join("\t") if reviewlist.size > 0
if @flag_review
reviewsplit = data[:review].to_s.split(mode_tbl[:review])
reviewsplit.each{ |column|
review = {}
if column =~ /cust_id=(.+?)'/
review["cust_id"] = $1
end
if column =~ /(.+?)店長の『(.+?)』コーナー/s
review["reviewer"] = $1.strip
review["corner"] = $2.strip
end
if column =~ /#{review["reviewer"]}\s*\[(.+?)\]\s*−\s*(.+?日)(.+?)<\/td>/s
review["value"] = $1
review["date"] = $2
review["content"] = $3
review["content"].gsub!(/^<.+?>/,'').strip!
review["content"].gsub!(/\n/,"\t")
end
@reviews << review if review.size > 0
}
reviewsplit = data[:review2].to_s.split(mode_tbl[:review2])
reviewsplit.each{ |column|
review = {}
if column =~ /cust_id=(.+?)'/
review["cust_id"] = $1
end
if column =~ /(.+?)店長/s
review["reviewer"] = $1.strip
end
if column =~ /<\/strong>(.+?)
(.+?)<\/td>/s
review["content"] = $1
review["date"] = $2
review["content"].gsub!(/<[^>]+>/,'').gsub!(/\s+/,'').strip!
review["date"].strip!
end
@reviews << review if review.size > 0
}
end
=end
end
#===== ===== main ===== =====
def set_info #===== set local-value from @info (from cache) =====
#----- set book-info (from cache) -----
@flag_overprint = true if @info["overprint"]
@detail_id = @info["detail_id"]
@flag_isbnsearch = true if !@detail_id
@title = @info["title"]
@delivery = @info["delivery"]
end
def get_info #===== get book-info(@info) from url =====
message = 'CHK; access; ' + @url_site + ' -> ' + @isbn
@stderr.puts message.kconv(@messageKcode)
#----- get detail-code -----
if @flag_isbnsearch
agent = WebAgent.new()
agent.uri = url_isbnsearch
agent.get()
body = agent.body.kconv(@insystemKcode)
parse_isbnsearch(body)
end
#----- get book-info -----
agent = WebAgent.new()
agent.uri = url_bibinfo
agent.get()
body = agent.body.kconv(@insystemKcode)
parse_bibinfo(body)
if !@flag_overprint
@info = {}
end
@info["isbn"] = @isbn
@info["title"] = @title
@info["author"] = @author
@info["publisher"] = @publisher
@info["series"] = @series
@info["size"] = @size
@info["pubdate"] = @pubdate
@info["price"] = @price
@info["desc"] = @content
@info["image"] = @image
# @info["translator"] = @translator
# @info["genre"] = @genre
# @info["genre1"] = @genre1
# @info["genre2"] = @genre2
# @info["genre3"] = @genre3
@info["reader"] = @reader
@info["stock"] = @stock
@info["delivery"] = @delivery
@info["detail_id"] = @detail_id
# @info["reviewlist"] = @reviewlist
@info["overprint"] = true if @flag_overprint
if @flag_review
@info["reviews"] = @reviews
end
end
end