tags:

views:

216

answers:

1

Hi,

I am using ajax google API to search a string against google. It is returning me all HTML files which have all tags including text.

If I want to get text only, what should I use?

My program is in Java.

Regards

Manjot

+2  A: 

I did some googling and found this:

http://www.ajaxlines.com/ajax/stuff/article/using%5Fgoogle%5Fis%5Fajax%5Fsearch%5Fapi%5Fwith%5Fjava.php

Here's the example code snippet from there:

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLEncoder;
import org.json.JSONArray;      // JSON library from http://www.json.org/java/
import org.json.JSONObject;

public class GoogleQuery {

 // Put your website here
 private final String HTTP_REFERER = "http://www.example.com/";

 public GoogleQuery() {
  makeQuery("questio verum");
  makeQuery("info:http://frankmccown.blogspot.com/");
  makeQuery("site:frankmccown.blogspot.com");
 }

 private void makeQuery(String query) {

  System.out.println(" Querying for " + query);

  try
  {
   // Convert spaces to +, etc. to make a valid URL
   query = URLEncoder.encode(query, "UTF-8");

   URL url = new URL("http://ajax.googleapis.com/ajax/services/search/web?start=0&rsz=large&v=1.0&q=" + query);
   URLConnection connection = url.openConnection();
   connection.addRequestProperty("Referer", HTTP_REFERER);

   // Get the JSON response
   String line;
   StringBuilder builder = new StringBuilder();
   BufferedReader reader = new BufferedReader(
     new InputStreamReader(connection.getInputStream()));
   while((line = reader.readLine()) != null) {
    builder.append(line);
   }

   String response = builder.toString();
   JSONObject json = new JSONObject(response);

   System.out.println("Total results = " +
     json.getJSONObject("responseData")
     .getJSONObject("cursor")
     .getString("estimatedResultCount"));

   JSONArray ja = json.getJSONObject("responseData")
   .getJSONArray("results");

   System.out.println(" Results:");
   for (int i = 0; i < ja.length(); i++) {
    System.out.print((i+1) + ". ");
    JSONObject j = ja.getJSONObject(i);
    System.out.println(j.getString("titleNoFormatting"));
    System.out.println(j.getString("url"));
   }
  }
  catch (Exception e) {
   System.err.println("Something went wrong...");
   e.printStackTrace();
  }
 }

 public static void main(String args[]) {
  new GoogleQuery();
 }
}

As a side note, you should be careful not to violate the Google TOS: "You specifically agree not to access (or attempt to access) any of the Services through any automated means (including use of scripts or web crawlers) and shall ensure that you comply with the instructions set out in any robots.txt" - http://www.google.com/accounts/TOS

Cobalt
Thank you very much
Manjot