views:

33

answers:

3

I am trying to get the html of a website using this code:

 function catchData(req) {
   console.debug("i got a reply!");
   var returnXML = req.responseXML;
   console.debug(returnXML);
   if (!returnXML)
   {
    console.debug("html is bad");
    return;
   }
   if (speed != currentSpeed)
    moveToNewSpeed(speed);
   currentSpeed = speed;
   var error = returnXML.getElementsByTagName('message')[0].firstChild;
   if (error) {
    document.getElementById('errorMessage').innerHTML = error.nodeValue;
    document.getElementById('errorMessage').style.visibility = 'visible';
   }
   else
    document.getElementById('errorMessage').style.visibility = 'hidden';
   }

 function sendRequest(url,callback,postData) {
   console.debug(url);
   console.debug(postData);
   var req = createXMLHTTPObject();
   if (!req) return;
   var method = (postData) ? "POST" : "GET";
   console.debug(method);
   req.open(method,url,true);
   console.debug("request Opened");
   req.setRequestHeader('User-Agent','XMLHTTP/1.0');
   req.setRequestHeader('User-Agent','XMLHTTP/1.0');
   if (postData)
   {
    req.setRequestHeader('Content-type','application/x-www-form-urlencoded');
    console.debug("set post data");
   }
   req.onreadystatechange = function () {
    if (req.readyState != 4)
    {  
     console.debug("bad ready state");
     return;
    }
    console.debug(req);
    console.debug("responseText:");
    console.debug(req.responseText);
    callback(req);
    console.debug("callback finished");
   }
   if (req.readyState == 4) return;
   req.send(postData);
  }

var XMLHttpFactories = [
 function () {return new XMLHttpRequest()},
 function () {return new ActiveXObject("Msxml2.XMLHTTP")},
 function () {return new ActiveXObject("Msxml3.XMLHTTP")},
 function () {return new ActiveXObject("Microsoft.XMLHTTP")}
];

function createXMLHTTPObject() {
 var xmlhttp = false;
 for (var i=0;i<XMLHttpFactories.length;i++) {
  try {
   xmlhttp = XMLHttpFactories[i]();
  }
  catch (e) {
   continue;
  }
  break;
 }
 return xmlhttp;
}

When I do a wireshark grab I see the server returning the html, but req.responseText is just an empty string. Anyone know whats up?

A: 

from where is the javascript being executed? Do you have a same-origin policy violation?

I ask because I have seen wierdness in these situations, where I was violating the policy but the request was still going out; just the response was empty...it doesn't make any sense that the browser would send the request, but they all handle it differently it appears...

hvgotcodes
A: 

Is there a reason why you're writing this code yourself instead of using a library such as jQuery? You'll find it much easier and they've already figured out all the associated quirks with browser interoperability, etc.

Marc Novakowski
A: 

I guess you're trying to get the HTML of a page that's on a different domain than your JavaScript. This is a cross-domain request, which isn't allowed in Javascript. This is usually seen as empty responses in your script.

The JSONP standard describes a mechanism to retrieve JSON from a different domain, but this needs to be implemented on the other site and doesn't work with HTML.

The Yahoo! Query Language (YQL) can act as a proxy. The Yahoo! server will fetch the HTML and create a JSONP response, which your script will receive. This may help you to accomplish your goal. The YQL has a lot of cool features for retrieving content from other sites, I recommend you read through the documentation to see if there's anything else you can use.

Niels van der Rest