Ok still new to the screen scraping thing.
I've managed to log into the site I need but now how do I redirect to another page? After I login I'm trying to do another GET request on the page that I need but it has a redirect on it that takes me back to the login page.
So I'm thinking the SESSION variables are not being passed, how can I over come this?
Problem:
Even if I post the 2nd page URL it still redirects me back to the login page, unless I'm logged in already, but the screen scrape code is not allowing the SESSION data to be passed?
I found this code from another screen scraper question here @stack
class Curl {
public $cookieJar = "";
public function __construct($cookieJarFile = 'cookies.txt') {
$this->cookieJar = $cookieJarFile;
}
function setup() {
$header = array();
$header[0] = "Accept: text/xml,application/xml,application/xhtml+xml,";
$header[0] .= "text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5";
$header[] = "Cache-Control: max-age=0";
$header[] = "Connection: keep-alive";
$header[] = "Keep-Alive: 300";
$header[] = "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7";
$header[] = "Accept-Language: en-us,en;q=0.5";
$header[] = "Pragma: "; // browsers keep this blank.
curl_setopt($this->curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US; rv:1.8.1.7) Gecko/20070914 Firefox/2.0.0.7');
curl_setopt($this->curl, CURLOPT_HTTPHEADER, $header);
curl_setopt($this->curl, CURLOPT_COOKIEJAR, $cookieJar);
curl_setopt($this->curl, CURLOPT_COOKIEFILE, $cookieJar);
curl_setopt($this->curl, CURLOPT_AUTOREFERER, true);
curl_setopt($this->curl, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($this->curl, CURLOPT_RETURNTRANSFER, true);
}
function get($url) {
$this->curl = curl_init($url);
$this->setup();
return $this->request();
}
function getAll($reg, $str) {
preg_match_all($reg, $str, $matches);
return $matches[1];
}
function postForm($url, $fields, $referer = '') {
$this->curl = curl_init($url);
$this->setup();
curl_setopt($this->curl, CURLOPT_URL, $url);
curl_setopt($this->curl, CURLOPT_POST, 1);
curl_setopt($this->curl, CURLOPT_REFERER, $referer);
curl_setopt($this->curl, CURLOPT_POSTFIELDS, $fields);
return $this->request();
}
function getInfo($info) {
$info = ($info == 'lasturl') ? curl_getinfo($this->curl, CURLINFO_EFFECTIVE_URL) : curl_getinfo($this->curl, $info);
return $info;
}
function request() {
return curl_exec($this->curl);
}
}
Calling the class
include('/var/www/html/curl.php');
$curl = new Curl();
$url = "here.com";
$newURL = "here.com/newpage.php";
$fields = "usr=user1&pass=PassWord";
// Calling URL
$referer = "http://here.com/index.php";
$html = $curl->postForm($url, $fields, $referer);
$html = $curl->get($newURL);
echo $html; // takes me back to $url instead of $newURL