#!/usr/bin/env ruby # bk1.rb # ISBN-search -> detail-parse # original; isbn2bib.rb for bk1 by takahashi # コミック等が対照となる場合、通常と微妙にフォーマットが変わる為、幾つかのデータを拾い損ねる場合有り。未対応。 # ver.0.5 2003.12.20 書誌情報HTMLの形式が変更? それに追応。 # ver.0.6 2002.3.21-2004.01.07 # 内容情報は書評の一環としてrevidが与えられてる。別途読み直し要 #デイリー/ウィークリーランキング+rss,オススメ? 以上はまだ #simageはコメントアウト中(一度アクセスして存在確認してから組むべきか?) # reviewの形で「内容紹介」があった場合、 # その内容をdesc(文)にし、more.gifがあれば文に(more)を付し、review_numを減ず。 # →本来のdescとの衝突確認まだ。 # 以上ToDo if __FILE__ == $0 require 'cookie' require 'webagent' require 'cache' require 'cachebib' end class Review_bk1 attr_accessor :url, :info, :review_id attr_accessor :title, :reviewer_title, :reviewer, :date, :point, :content attr_accessor :insystemKcode, :outputKcode, :messageKcode def initialize () @insystemKcode = 3 #sjis #1-jis,2-euc,3-sjis @outputKcode = 2 #euc @messagetKcode = 3 #sjis end def parse_review (str) #----- set key and data ------ mode_tbl = { '' => :title, '' => :reviewer_title, '' => :reviewer, '' => :date, '' => :point, '' => :content, '' => :none } mode = :none data = Hash.new() #----- read and sepalate by key(mode_tbl) ------ str.each{|line| l = line.chomp.chomp("\r").kconv(@insystemKcode) l.strip! if mode_tbl[l] mode = mode_tbl[l] end if mode != :none if data[mode] data[mode] << line else data[mode] = line end end } #----- parse ----- title = data[:title].to_s.gsub(/<[^>]+>/,'').strip reviewer_title = data[:reviewer_title].to_s.gsub(/<[^>]+>/,'').strip reviewer = data[:reviewer].to_s.gsub(/<[^>]+>/,'').strip date = data[:date].to_s.gsub(/<[^>]+>/,'').strip point = data[:point].to_s.gsub(/<[^>]+>/,'').sub(/評価(★マーク)/s,'').sub(/ /s,'').strip content = data[:content].to_s.gsub(/<[^>]+>/,'').strip content.gsub!(/\n/,"\t") @title = title if title != '' @reviewer_title = reviewer_title if reviewer_title != '' @reviewer = reviewer if reviewer != '' @date = date if date != '' @point = point if point != '' @content = content if content != '' end def get_review () agent = WebAgent.new() agent.uri = @url begin agent.get() rescue return false #timeout end body = agent.body.kconv(@insystemKcode) parse_review(body) @info = {} @info["title"] = @title unless @title.nil? @info["reviewer_title"] = @reviewer_title unless @reviewer_title.nil? @info["reviewer"] = @reviewer unless @reviewer.nil? @info["date"] = @date unless @date.nil? @info["point"] = @point unless @point.nil? @info["content"] = @content unless @content.nil? @info["review_id"] = @review_id unless @review_id.nil? #@review_idはCacheBib_bk1内で与えられる return true end end module CacheBib_bk1_xml # alias orig_url_bibinfo url_bibinfo if defined?(url_bibinfo) # alias orig_parse_bibinfo parse_bibinfo if defined?(parse_bibinfo) #bk1.jp def url_bibinfo ( id = @id ) id = '' if id.nil? url = "http://bk1.jp/#{id[0..3]}/#{id}.html" url = "http://breeder.bk1.jp/rd/#{id}/#{@affiliate_code}/noentry" if !@affiliate_code.nil? return url end def url_bibinfo_xml ( id = @id ) url = "http://cgi.bk1.jp/xml.cgi?" url << "bibid=#{id}" url << "&aid=#{@affiliate_code}" if !@affiliate_code.nil? message("CHK; access-url; #{url}") return url end def parse_bibinfo_xml ( str ) #正規XMLパーサの組込の方がいいか? data = {} #それとも組込まなくてもいい利便性を採るか? if str =~ %r|]+?)>([^<]+?)]+?>|){ data[$1] = $2 } # @id = data['BIBID'] #既存のはずなのでオミット @title = data['ProductName'] @label = data['SeriesName'] @author = data['Authors'] @publisher = data['Manufacturer'] @isbn = data['ISBN'] @delivery = data['Availability'] @simage = data['ImageUrlSmall'] @image = data['ImageUrlLarge'] @price = data['OurPrice'] @pubdate = data['DateOfIssue'] end def get_info unless @isbn.nil? message("CHK; access; #{@url_site} -> ISBN:#{@isbn}") else message("CHK; access; #{@url_site} -> ID:#{@id}") end #----- get detail-code ----- if !@id @id = @isbn.dup end #----- get book-info ----- agent = WebAgent.new() agent.uri = url_bibinfo_xml begin agent.get() rescue message("WAR; access-timeout.") return end #body = agent.body.kconv(@insystemKcode) body = agent.body #UTF対策。ここだけ標準から変更 begin body = Iconv::iconv("UTF-8", "EUC-JP", body) rescue NameError begin body = Uconv::u8toeuc(body) rescue end end body = body.kconv(@insystemKcode) parse_bibinfo_xml(body) #----- reget book-info ----- if @id.to_s == '' || @id == '02397767' #携帯ストラップはエラーとみなす agent = WebAgent.new() agent.uri = url_isbnsearch begin agent.get() rescue message("WAR; access-timeout.") return end body = agent.body.kconv(@insystemKcode) parse_isbnsearch(body) agent.uri = url_bibinfo begin agent.get() rescue message("WAR; access-timeout.") return end body = agent.body.kconv(@insystemKcode) parse_bibinfo(body) end #----- get book-image ----- if !@image.nil? && @flag_image # filename = @dir_image + File.basename(@image) isbn = @isbn # ext = File.extname(@image) # for ruby1.7- ext = '.jpg' if @image =~ /(\.+?)$/ ext = $1.strip end filename = @dir_image + 'isbn' + isbn.to_s.delete('-').downcase + ext urlname = @url_image + 'isbn' + isbn.to_s.delete('-').downcase + ext if @isbn.to_s == '' && !@id.nil? filename = @dir_image + 'id' + @id + ext urlname = @url_image + 'id' + @id + ext end unless File.exist?(filename) message("CHK; download-image; #{@image} -> #{filename}") agent = WebAgent.new() agent.uri = @image agent.get() open(filename, 'wb'){ |fp| fp.write(agent.body) fp.flush } @image_local = urlname else @image_local = urlname end end if !@image_local.nil? isbn = @isbn ext = '.jpg' if @image_local =~ /(\.+?)$/ ext = $1.strip end filename = @dir_image + 'isbn' + isbn.to_s.delete('-').downcase + ext if @isbn.to_s == '' && !@id.nil? filename = @dir_image + 'id' + @id + ext end if File.exist?(filename) begin require 'imagesize' rescue LoadError require 'lib/imagesize' end w, h = ImageSize::size(filename) @image_width, @image_height = w, h unless w.zero? end end set_info end end class CacheBib_bk1 < CacheBib begin require 'iconv' rescue LoadError begin require 'uconv' rescue LoadError end end attr_accessor :chiefeditor, :author_intro, :user, :okidoki, :simage attr_accessor :author_id, :translator_id, :chiefeditor_id attr_accessor :url_today_recommend def initialize () super @shop_id = 'bk1' @url_site = 'http://www.bk1.co.jp/' @affiliate_code = 'p-ippo57167' @reviews = Array.new() @flag_review = true end #===== ===== parts ===== ===== def word_delivery () delivery = '' flag = false if !@title.nil? # title-check -> no-info / get-info check flag = true if @delivery.nil? delivery = "ない" else if @delivery =~ /24/ delivery = "24時間" elsif @delivery =~ /3/ delivery = "2〜3日" elsif @delivery =~ /取/ delivery = "取寄せ" else delivery = "?" end end else message("WAR; maybe data-error, no-title.") end return delivery, flag end #===== ===== url ===== ===== def url_isbnsearch ( isbn = @isbn ) url = @url_site + "cgi-bin/srch/srch_result_book.cgi/?" url << "isbn=#{isbn}" url << "&aid=#{@affiliate_code}" if !@affiliate_code.nil? url << "&idx=3" return url end def url_bibinfo ( id = @id ) url = @url_site + "cgi-bin/srch/srch_detail.cgi/?" url << "bibid=#{id}" url << "&aid=#{@affiliate_code}" if !@affiliate_code.nil? url << "&idx=3" return url end def url_review ( id = @id, revid = @revid ) url = @url_site + "cgi-bin/srch/srch_rev.cgi/?" url << "bibid=#{id}&volno=#{@volno}&revid=#{revid}" url << "&aid=#{@affiliate_code}" if !@affiliate_code.nil? return url end def url_authorinfo ( author_id = @author_id ) url = @url_site + "cgi-bin/srch/srch_result_book.cgi/?" url << "auid=#{author_id}" url << "&aid=#{@affiliate_code}" if !@affiliate_code.nil? url << "&srch=4" return url end def url_top url = @url_site + "cgi-bin/srch/srch_top.cgi/?" url << "aid=#{@affiliate_code}" if !@affiliate_code.nil? return url end #キーワード検索主体。そのうち別途詳細検索用を用意した方がいいか? def url_search ( srch = 1, kywd = nil, s1 = nil, dp = nil, pg = nil, aid = @affiliate_code, codes = {} ) #検索モードにおけるFORM値 #srch (1=キーワード検索,2=詳細検索,5=書評検索) #キーワード検索(srch=1) #kywd キーワード #詳細検索(srch=2) #(http://www.bk1.co.jp/cgi-bin/srch/srch_search.cgi) #ol キーワード #ti タイトル #au 著者 #pb 出版社 #isbn ISBN #idx 発送可能時間(0,1,2,3:24時間以内,2〜3日以内,お取り寄せ,全書籍) #gu ジャンル・フィーリング('',...) #"01010000"文芸・ミステリ・SF・ホラー...) #"01040000"人文・社会・ノンフィクション #"01080000"科学・技術・医学・建築 #"01060000"ビジネス・経済・法律・資格 #"01070000"コンピュータ・インターネット #"01050000"教育・福祉・語学・辞書 #"01030000"暮らし・実用・旅行 #"01020000"芸能・娯楽・スポーツ #"01020900"美術・デザイン・写真 #"01090000"児童書・絵本 #"01100000"マンガ #"01110000"文庫・新書 #"01120000"ビデオ・DVD・限定本ほか --> #"02010000"ホッとひと息つける本 #"02020000"感動する本 #"02030000"元気が出る本 #"02040000"ドキドキする本 #"02050000"幸せな気分になれる本 #"02060000"頑張りたい気分になれる本 #s1 並べ替え条件(''=一致度順,...) #"za"発送時間順 #"dd"発行日順 #"da"発行日逆順 #"ta"タイトル50音順 #"td"タイトル50音逆順 #"aa"著者名50音順 #"ad"著者名50音逆順 #"pa"出版社50音順 #"pd"出版社50音逆順 #"rd"価格(高〜)順 #"ra"価格(低〜)順 #dp 表示件数(10,25,50) #pby 出版日(yyyy or yyyymm or yyyymmdd) #pbrg 出版日条件(0,1,2:以前,以後,頃) #age 対象('',...) #"AA"幼児 #"BB"小学生 #"CC"小学生1-2 #"DD"小学生3-4 #"EE"小学生5-6 #"FF"中学生 #"GG"高校生 if !kywd.nil? kywd = CGI::escape(kywd.toeuc) end url = @url_site + "cgi-bin/srch/srch_result_book.cgi/?" url << "aid=#{aid}" #affiliate-code url << "&srch=#{srch}" if !srch.nil? #search-mode(num=1=:keyword) url << "&kywd=#{kywd}" if !kywd.nil? #キーワード(string)(srch=1時) url << "&s1=#{s1}" if !s1.nil? #並べ替え条件(code)(''=一致度順) url << "&dp=#{dp}" if !dp.nil? #表示件数(num=10)(10,25,50) url << "&pg=#{pg}" if !pg.nil? #page(num) sort, ol, ti, au, pb, isbn, idx, gu, pby, pbrg, age, st = nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil codes.each{ |key, val| eval("#{key} = #{val}") } url << "&sort=#{sort}" if !sort.nil? #sort(string)#検索結果表示時のみ url << "&ol=#{ol}" if !ol.nil? #キーワード(詳細検索=srch=2時) url << "&ti=#{ti}" if !ti.nil? #タイトル url << "&au=#{au}" if !au.nil? #著者 url << "&pb=#{pb}" if !pb.nil? #出版社 url << "&isbn=#{isbn}" if !isbn.nil? #ISBN url << "&idx=#{idx}" if !idx.nil? #発送可能時間(0,1,2,3=全書籍) url << "&gu=#{gu}" if !gu.nil? #ジャンル・フィーリング url << "&pby=#{pby}" if !pby.nil? #出版日(yyyy or yyyymm or yyyymmdd) url << "&pgrg=#{pbrg}" if !pbrg.nil? #出版日条件(0,1,2:以前,以後,頃) url << "&age=#{age}" if !age.nil? #対象('',...) url << "&st=#{st}" if !st.nil? #??? おそらくsortと、つまりs1と被る return url end #===== ===== parse ===== ===== def parse_isbnsearch ( str ) count = [] str.scan(%r|\s*(.*)|){ count << [$1, $2] unless $1.nil? } #複数みつけた場合最期に見つけたものが有効 if count.uniq.size > 1 message("WAR; ISBN-search hit multi-column. ISBN; #{@isbn}") end @id, @title = count.last return count end def parse_bibinfo ( str ) #----- bibinfo-area ------ str_pre, str_post = '', '' if str =~ %r|.+|mi str_pre, str, str_post = $`, $&, $' end tables = str.scan(%r|.*?|mi) #----- parse ----- if tables[0] =~ %r|(.*?)|) @title = titles[0].first unless titles[0].nil? if !titles[-1].nil? && titles[-1].to_s.strip =~ /^\((.*?)\)$/ @label = $1.strip if @label =~ /^(.*?)\s(.*)$/ @label, @series = $1, $2 end end # data[:title] =~ Regexp.new("([^<]*)") authors = [] tables[1].scan(%r|(.*?)|){ |code, name| authors << [code, name] } if authors == [] tables[1].scan(%r|著者:(.*?)
|mi){ |name| authors << [nil, name.first.strip] } end @author_id = nil if authors.size > 0 #preset_infoしてる分を場合に応じて消去 authors.each { |auid, name_str| # str = line.to_s.gsub(/<[^>]+>/,'').delete("\n").strip if name_str =~ /(著$|編$|〔著〕$)/ name = $` name += '編'if name_str =~ /編$/ if @author.nil? then @author = '' else @author << "\t" end @author << name if @author_id.nil? then @author_id = '' else @author_id << "\t" end @author_id << auid unless auid.nil? elsif name_str =~ /訳$/ name = $` if @translator.nil? then @translator = '' else @translator << "\t" end @translator << name if @translator_id.nil? then @translator_id = '' else @translator_id << "\t" end @translator_id << auid elsif name_str =~ /監修$/ name = $` if @chiefeditor.nil? then @chiefeditor = '' else @chiefeditor << "\t" end @chiefeditor << name if @chiefeditor_id.nil? then @chiefeditor_id = '' else @chiefeditor_id << "\t" end @chiefeditor_id << auid end } if tables[1] =~ %r|本体価格:(.*?)
|mi @price = $1.strip end if tables[1] =~ %r|出版:(.*?)
|mi @publisher = $1.strip end if tables[1] =~ %r|サイズ:(.*?)
|mi @pubsize = $1.strip end if tables[1] =~ %r|ISBN:(.*?)
|mi isbn = $1.strip @isbn = isbn if isbn != '' end if tables[1] =~ %r|発行年月:(.*?)
|mi @pubdate = $1.strip end if tables[1] =~ %r|利用対象:(.*?)
|mi @user = $1.delete(" ").gsub(/\s+/,'').gsub(//,'').strip end if tables[1] =~ %r|発送可能時間.*?]+>/,'').gsub(/\s+/, ' ').strip # @desc = desc if desc.size > 0 end str_post.scan(%r|著者紹介(.+?)]+>/,'').gsub(/\s+/, ' ').strip if @author_intro.nil? then @author_intro = '' else @author_intro << "\t" end @author_intro << intro } #-----review----- reviewlist = [] #タイトル、評者名、(と評者時間は入れるか?) str_post.scan(%r|出版社コメント.+?(.+?)(.+?)]+>/, '').gsub(/\s+/, ' ').strip @desc += ' (more)' if desc =~ %r|/images/bk1/more.gif| } # str_post.scan(%r|書評.+?(.+?)|mi){ |nick| reviewlist << nick[0] reviewlist << nick[1] } if reviewlist.size > 0 @reviewlist = reviewlist.join("\t") @review_num = reviewlist.size / 2 #ID, name, ID, name... # (time) or /3 @review_num -= 1 if reviewlist.include?('内容紹介') end if @flag_review str_post.scan(%r|(.+?)(.+?)|){ |code, title, author| title.strip! author.gsub!(/<[^>]+?>/, '').strip! if author =~ /^\// author = $'.strip end if author =~ /著$/s author = $`.strip end list << [code, title, author] } @today_recommend = list return @today_recommend end #===== ===== get/set info ===== ===== def preset_info #===== preset local-value from @info (from cache) ===== #----- set book-info (from cache) ----- super @author_id = @info['author_id'].split("\t").first unless @info['author_id'].nil? end def set_info #===== set book-info(@info) from local-value ===== super #----- set info add ----- #bk1独自の情報類 @info["chiefeditor"] = @chiefeditor @info["author_id"] = @author_id @info["translator_id"] = @translator_id @info["chiefeditor_id"] = @chiefeditor_id @info["author_intro"] = @author_intro @info["user"] = @user #利用対象 @info["okidoki"] = @okidoki #OkiDoki対象 @info["simage"] = @simage #縮小画像(縦90px) ??? end end #class CacheBib_bk1 # include CacheBib_bk1_xml #end if __FILE__ == $0 # p CacheBib_bk1.new.get_data('4-15-010739-4') book = CacheBib_bk1.new book.flag_image = true # book.flag_review = false p book.get_data('4-15-010739-4') end