#!/usr/local/bin/ruby # amazon.rb # ISBN-search -> detail-parse # original; isbn2bib.rb for bk1 by takahashi # amazon.rb for amazon by not # ver.0.4 2002.3.21-2003.1.20 # ver.0.5 2003.5.20-2003.5.22 # ver.0.6 2003.11.10 adult対応 # ver.0.6a 2003.11.28 出版社情報の取り方修正 # ver.0.7 2003.12.22-2004.01.07 新cacheに変更 # ver.0.7a 2004.03.31 初動時imageを拾えてないのを修正 # ver.0.7b 2004.04.06 「4〜6週間」を拾えてないのを修正 #一部のお薦め(この商品に興味がある人は……でないのもの)等のデコードがまだ。 #descに文中リンクがある場合に未対応。今は除去してしまっている。 if __FILE__ == $0 require 'cookie' require 'webagent' require 'cache' require 'cachebib' end class CacheBib_amazon < CacheBib begin require 'iconv' rescue LoadError begin require 'uconv' rescue LoadError end end attr_accessor :series_num, :limage, :edition, :recommend, :ranking def initialize () super @shop_id = 'amz' @url_site = 'http://www.amazon.co.jp/' @affiliate_code = 'ipposjunkbox-22' @reviews = Array.new() @flag_review = true @adult_check = true end #===== ===== parts ===== ===== def word_delivery () delivery = '' flag = false if !@title.nil? # title-check -> no-info / get-info check flag = true if @delivery.nil? delivery = "ない" else if @delivery =~ /24/ delivery = "24時間" elsif @delivery =~ /2/ delivery = "2日" elsif @delivery =~ /3/ delivery = "2〜3日" elsif @delivery =~ /取/ delivery = "取寄せ" elsif @delivery =~ /1〜2週間/ delivery = "1〜2週間" elsif @delivery =~ /4〜6週間/ delivery = "4〜6週間" else delivery = "?" end end else message("WAR; maybe data-error, no-title.") end return delivery, flag end #===== ===== url ===== ===== def url_isbnsearch ( isbn = @isbn ) url_bibinfo(isbn) end def url_bibinfo ( id = @id ) id = @isbn if id.nil? id = id.delete('-').upcase url = @url_site + "exec/obidos/ASIN/#{id}" url << "/ref_#{affiliate_code}" unless @affiliate_code.nil? url << '?val=authorized' unless @adult_check.nil? return url end #キーワード検索主体。そのうち別途詳細検索用を用意した方がいいか? def url_search ( srch = 1, kywd = nil, codes = {}, aid = @affiliate_code ) if !kywd.nil? begin kywd = kywd.toeuc kywd = Iconv::iconv("UTF-8", "EUC-JP", kywd).to_s rescue NameError begin kywd = CGI::escape(kywd.toutf8) rescue end end end url = @url_site + "exec/obidos/external-search/?" url << "tag=#{affiliate_code}&" unless @affiliate_code.nil? url << "keyword=#{kywd}" return url end #===== ===== parse ===== ===== def parse_bibinfo ( str ) @id = @isbn.delete('-').upcase if @id.nil? #----- parse ----- #???keyできりわけてからparseする方法にそのうち変更??? # if %r|本のデータ: (.+?)|s =~ str if %r|Amazon.co.jp:(.+?)|sm =~ str @title = $1.strip.gsub(/^本:/s,'').strip end if %r|シリーズ番号: (.+?)\n|s =~ str @series_num = $1 end if %r|(.+?
)| =~ str authors = $1.split(/
(.+?)| =~ line name = $1 if line =~ /\(著\)/ if @author.nil? then @author = '' else @author << "\t" end @author << name elsif line =~ /\(原著\)/ if @e_author.nil? then @e_author = '' else @e_author << "\t" end @e_author << name elsif line =~ /\(翻訳\)/ if @translator.nil? then @translator = '' else @translator << "\t" end @translator << name end end } end if %r|.+? (.+?)| =~ str @series = $1.gsub(/ /,'').strip end #???label??? if %r|(¥[\d,]+)|mis =~ str @price = $1.strip end if %r|発送可能時期:([^<]*?)
|mis =~ str @delivery = $1.strip end if %r|エディション:([^<]*?)
|mis =~ str @edition = $1.strip end if %r|この商品に興味がある人は、こんな商品にも興味をもっています.+?|mis =~ str list = $1 recommend = [] list.scan(%r|
  • .+?
  • |mi){ |line| if %r|/exec/obidos/ASIN/([\dX]+)/.+?(.+?)(.+?)|mi =~ line recommend << [$1.strip, $2.strip, $3.strip] end } @recommend = recommend.flatten.join("\t") if recommend.size > 0 end # if %r|書籍データ.+?([^<]+)\n(?: - (\d+) p)?|m =~ str if %r|書籍データ.+?([^<]+)\n(?:\s*?(\d+)\s*?p)?|mis =~ str edition = $1.strip.chomp(':') @page = " #{$2}p" unless $2.nil? if @edition.nil? @edition = edition elsif @edition != edition message("WAR; edition-missmatch: #{@edition} - #{edition}") end end # if %r|サイズ(.*?)\n|m =~ str #ブックカバー(文庫サイズ)に誤ヒット if %r|書籍データ.+?サイズ(.*?)\n|mis =~ str pubsize = $1.gsub(/\s+/, ' ').gsub(/<[^>]+>/, '').strip @pubsize = pubsize if pubsize != '(cm):' end # if %r|^([^>]+)\n ; \nISBN|m =~ str then if %r|出版社: (.*?)\n|m =~ str @publisher = $1.strip end if %r|ISBN:\s*([\dX]+)| =~ str @isbn = $1.strip end if %r|^\(([\d\/]+)\)| =~ str @pubdate = $1 end if %r|Amazon.co.jp 売上ランキング:([^<]+)|mis =~ str @ranking = $1.strip end # if %r|内容(「.+?」データベースより)
    +([^<]+)
    |s =~ str if %r|(.+?)|mi =~ str @desc = $1.gsub(/ /i, '').gsub(/
    /i, "\t").gsub(/

    /i, "\t").gsub(/<[^>]+>/i, '').strip end #時々文中リンクがあるが、今は除く if %r|目次(.+?)/i, "\t").gsub(/<[^>]+>/, '').strip end if str =~ %r|カスタマーレビュー
    (.*)|mis data = $1 end # reviewsplit = data.to_s.split("投稿者") reviewsplit = data.to_s.split(/stars-.+?\.gif/) reviewlist = [] reviewsplit.each{ |column| if column =~ /=review-id value=(\d+)/ custid = $1 reviewlist << custid if column =~ %r|レビュアー:.*?(.*?)|mis custid = $1.gsub(/<[^>]+>/, '').strip.gsub(/\(プロフィールを見る\)$/s, '').strip custid = "(noname)" if custid.size == 0 reviewlist << custid end end } if reviewlist.size > 0 @reviewlist = reviewlist.join("\t") @review_num = reviewlist.size / 2 #ID, name, ID, name... end if @flag_review reviewsplit.each{ |column| review = {} if column =~ /.+?alt=([^>]+?)>/ key = $1.strip review["value"] = key if key !~ /^"/ end if column =~ /=review-id value=(\d+)/ review["review_id"] = $1.strip end if column =~ /^.*?([^>]*?)<\/b>, ([\d\/]*)/ review["title"] = $1.strip review["date"] = $2 end if column =~ %r|レビュアー:.*?(.*?)(.+?)(.+?)]+>/, '').strip.gsub(/\(プロフィールを見る\)$/s, '').strip reviewer = "(noname)" if reviewer.size == 0 review["reviewer"] = reviewer review["place"] = place.gsub(/<[^>]+>/, '').gsub(/ /, ' ').strip content.gsub!(/
    /mi,"\n") content.gsub!(/\n\s+/,"\n") content.gsub!(/\s+/," ") content.gsub!(/\n+/,"\n") content.gsub!(/

    /mi,"\n") content.strip! content.gsub!(/\n/,"\t") review["content"] = content #???もうちょっと修正が要る??? end @reviews << review if review.size > 0 } end end #===== ===== get/set info ===== ===== def preset_info #===== set local-value from @info (from cache) ===== super @id = @isbn.delete('-').upcase #@info["id"] end def set_info #===== set book-info(@info) from local-value ===== super #----- set info add ----- #amazon独自の情報類 @info["series_num"] = @series_num @info["limage"] = @limage @info["edition"] = @edition @info["recommend"] = @recommend @info["ranking"] = @ranking end end if __FILE__ == $0 p CacheBib_amazon.new.get_data('4-15-010739-4') end