views:

79

answers:

2

I would like to collect and store all this info into an array.

I have the following, how should I refactor this?

require 'rubygems'
require 'nokogiri'
require 'open-uri'

@urls = %w{http://url_01.com http://url_02.com http://url_03.com}

@link_01_arr = []
@link_02_arr = []
@link_03_arr = []

link_01 = Nokogiri::HTML(open("#{@urls[0]}"))
@link_01_arr[0] = link_01.at("title").inner_html
@link_01_arr[1] = link_01.at(".content").inner_html
@link_01_arr[2] = link_01.at(".date").inner_html

I tried doing this instead but it turned out significantly slower. i guess because there is more request this way.

@urls = %w{http://url_01.com http://url_02.com http://url_03.com}

@titles_arr = @urls.map do |url|
  Nokogiri::HTML(open(url)).at("title").inner_html
end

@content_arr = @urls.map do |url|
  Nokogiri::HTML(open(url)).at(".content").inner_html
end

@date_arr = @urls.map do |url|
  Nokogiri::HTML(open(url)).at(".date").inner_html
end
A: 
@titles_arr = []
@content_arr = []
@date_arr = []
@urls.each do |url|
  curr = Nokogiri::HTML(open(url))
  @titles_arr << curr.at("title").inner_html
  @content_arr << curr.at(".content").inner_html
  @date_arr << curr.at(".date").inner_html
end
adamse
This code is broken; you're only creating one array.
molf
Whoops. Edited to fix(/remove the issue).
adamse
+1  A: 

It's difficult to know how to refactor your code without knowing how you're going to use the data. However, I would use hashes with meaningful keys (as symbols) instead of arrays. This greatly improves readability when you're using the data. Here's a suggestion:

urls = %w{http://url_01.com http://url_02.com http://url_03.com}

data = urls.collect do |url|
  document = Nokogiri::HTML(open(url))

  { :title => document.at("title").inner_html,
    :content => document.at(".content").inner_html,
    :date => document.at(".date").inner_html }
end

# Accessing the data:
data[0]         #=> { :title => "...", :content => "...", :date => "..." }
data[0][:title] #=> Returns title of first URL
data[1][:date]  #=> Returns date of second URL
molf