chmod 755 extraction.rb
./extraction.rb .html
dasun@bulletbill:~/Applications/jobs/Airseed$ ./extraction.rb seamless.html [{"quantity"=>"1", "item description"=>"Hong Kong-Style Noodle", "price"=>"$8.95"}, {"quantity"=>"1", "item description"=>"Beef with Broccoli", "price"=>"$10.50"}, {"quantity"=>"1", "item description"=>"Cilantro Shrimp Dumpling", "price"=>"$5.95"}]
hash_output = { "quantity" => quantity, "item description" => item_description, "price" => price }
def format_key(string) string.gsub(/[~=:;+-_.]/, '') end
To change how fields are processed (skips HTML node on is_format_invalid?), edit the method to include more cases:
def is_invalid_field?(string) return true if string.empty? || string == '=' # Can Add a Regex Check as well return false end