#!/usr/local/bin/ruby # amazon.rb ver.0.4 2002.3.21-2003.1.20 # amazon.rb ver.0.5 2003.5.20-2003.5.22 # ISBN-search -> detail-parse # original; isbn2bib.rb for bk1 by takahashi # amazon.rb for amazon by not #エディション: 文庫 #カスタマーレビュー #の部分のデコードがまだ。 require 'net/http' require 'cgi' #require 'simpleuri' #require 'webagent' class CacheBib_amazon < CacheBib attr_accessor :isbn attr_accessor :flag_review, :affiliate_code attr_accessor :url_site attr_accessor :title, :author, :publisher, :size, :pubdate, :price, :desc attr_accessor :image, :image_width, :image_height, :translator attr_accessor :reviews, :reviewlist attr_accessor :insystemKcode, :outputKcode, :messageKcode attr_accessor :asin def initialize () super @reviews = Array.new() @flag_review = true @url_site = 'http://www.amazon.co.jp/' @affiliate_code = 'ipposjunkbox-22' @insystemKcode = 3 #sjis #1-jis,2-euc,3-sjis @outputKcode = 2 #euc @messagetKcode = 3 #sjis end #===== ===== parts ===== ===== def word_delivery () delivery = '' flag = false # title-check -> no-info / get-info check if !@title.nil? flag = true if @delivery.nil? delivery = "ない" else if @delivery =~ /24/ delivery = "24時間" elsif @delivery =~ /3/ delivery = "2〜3日" elsif @delivery =~ /取/ delivery = "取寄せ" elsif @delivery =~ /1〜2週間/ delivery = "1〜2週間" else delivery = "?" end end else message = "WAR; maybe data-error, no-title." @stderr.puts message.kconv(@messageKcode) end return delivery, flag end #===== ===== url ===== ===== def url_bibinfo () url = @url_site + "exec/obidos/ASIN/#{@asin}" url << "/ref_#{affiliate_code}" unless @affiliate_code.nil? return url end #===== ===== parse ===== ===== def parse_bibinfo (str) #?そのうちkeyできりわけてからparseする方法に? #----- parse ----- # if %r|本のデータ: (.+?)|s =~ str then if %r|Amazon.co.jp:(.+?)|sm =~ str then @title = $1.strip @title = @title.gsub(/^本:/s,'').strip end if %r|シリーズ番号: (.+?)\n|s =~ str then @series_num = $1 end if %r|(.+?)| =~ str then authors = $1.split('.+? (.+?)| =~ str @series = $1.gsub(/ /,'').strip end if %r|(¥[\d,]+)| =~ str then @price = $1 end if %r|発送可能時期:([^<]*?)|mis =~ str then @delivery = $1.strip end if %r|\n\n\n \n([^<]+)\n(?: - (\d+) p)?|m =~ str then @size = $1 @size << " #{$2}p" unless $2.nil? end if %r|^\(([\d\/]+)\)| =~ str then @pubdate = $1 end if %r|^([^>]+)\n ; \nISBN|m =~ str then @publisher = $1 end if %r|ISBN:(.*?)\s*?;\s*?サイズ|m =~ str then @isbn = $1.strip end if %r|サイズ(.*?)\n|m =~ str then @pubsize = $1 end if %r|内容(「.+?」データベースより) +([^<]+)|s =~ str then @desc = $1 end if str =~ /カスタマーレビュー<\/font>(.*)/ms data = $1 end # reviewsplit = data.to_s.split("投稿者") reviewsplit = data.to_s.split("stars-.+?\.gif") reviewlist = [] reviewsplit.each{ |column| if column =~ /=review-id value=(\d+)/ custid = $1 reviewlist << custid if column =~ /投稿者.*?([^>]*?)<\/b>/m custid = $1.strip custid = "(noname)" if custid.size == 0 reviewlist << custid end end } @reviewlist = reviewlist.join("\t") if reviewlist.size > 0 if @flag_review reviewsplit.each{ |column| review = {} if column =~ /.+?alt=([^>]+?)>/ review["value"] = $1 end if column =~ /=review-id value=(\d+)/ review["review-id"] = $1 end if column =~ /^.*?([^>]*?)<\/b>, ([\d\/]*)/ review["title"] = $1.strip review["date"] = $2 end if column =~ /投稿者.*?([^>]*?)<\/b>(.*?)<\/font>(.*?)/,"\n") content.gsub!(/\n\s+/,"\n") content.gsub!(/\s+/," ") content.gsub!(/\n+/,"\n") content.gsub!(//,"\n") content.strip! content.gsub!(/\n/,"\t") review["content"] = content # #?もうちょっとしゅうせいがいるとおもう? end @reviews << review if review.size > 0 } end end #===== ===== main ===== ===== def set_info #===== set local-value from @info (from cache) ===== #----- set book-info (from cache) ----- @flag_overprint = true if @info["overprint"] @asin = @isbn.delete('-').upcase @title = @info["title"] @delivery = @info["delivery"] end def get_info #===== get book-info(@info) from url ===== message = 'CHK; access; ' + @url_site + ' -> ' + @isbn @stderr.puts message.kconv(@messageKcode) #----- get book-info ----- agent = WebAgent.new() agent.uri = url_bibinfo agent.get() body = agent.body.kconv(@insystemKcode) parse_bibinfo(body) if !@flag_overprint @info = {} end @info["title"] = @title @info["series_num"] = @series_num @info["author"] = @author @info["series"] = @series @info["translator"] = @translator @info["e_author"] = @e_author @info["image"] = @image @info["image_width"] = @image_width @info["image_height"] = @image_height @info["limage"] = @limage @info["price"] = @price @info["delivery"] = @delivery @info["size"] = @size @info["pubdate"] = @pubdate @info["publisher"] = @publisher @info["isbn"] = @isbn @info["pubsize"] = @pubsize @info["desc"] = @desc @info["reviewlist"] = @reviewlist @info["overprint"] = true if @flag_overprint if @flag_review @info["reviews"] = @reviews end end end
/,"\n") content.strip! content.gsub!(/\n/,"\t") review["content"] = content # #?もうちょっとしゅうせいがいるとおもう? end @reviews << review if review.size > 0 } end end #===== ===== main ===== ===== def set_info #===== set local-value from @info (from cache) ===== #----- set book-info (from cache) ----- @flag_overprint = true if @info["overprint"] @asin = @isbn.delete('-').upcase @title = @info["title"] @delivery = @info["delivery"] end def get_info #===== get book-info(@info) from url ===== message = 'CHK; access; ' + @url_site + ' -> ' + @isbn @stderr.puts message.kconv(@messageKcode) #----- get book-info ----- agent = WebAgent.new() agent.uri = url_bibinfo agent.get() body = agent.body.kconv(@insystemKcode) parse_bibinfo(body) if !@flag_overprint @info = {} end @info["title"] = @title @info["series_num"] = @series_num @info["author"] = @author @info["series"] = @series @info["translator"] = @translator @info["e_author"] = @e_author @info["image"] = @image @info["image_width"] = @image_width @info["image_height"] = @image_height @info["limage"] = @limage @info["price"] = @price @info["delivery"] = @delivery @info["size"] = @size @info["pubdate"] = @pubdate @info["publisher"] = @publisher @info["isbn"] = @isbn @info["pubsize"] = @pubsize @info["desc"] = @desc @info["reviewlist"] = @reviewlist @info["overprint"] = true if @flag_overprint if @flag_review @info["reviews"] = @reviews end end end