ansaurus

Question

cURL + JS_Extractor + php + MYSQL mess = Please help

Answer 1

+1 A:

WOW, I just solved my own problem...
Who would have thought...

Instead of trying to get the table data directly, and fail miserably, I write the file out into a temporary file and then pull the table data from there...

Remember that I told you that the script worked on a local file?
So I made the file my local file :-)

Here is what I did:

<?php
session_start();

require_once('inc/constant.php');
require_once('inc/function.php');  //basic functions
db_on();

$ckfile = tempnam ("tmp", "cookie.tmp");

$fields_string='';
$fields = array(
                  'username'=>urlencode(SITE_USER),
                  'jelszo'=>urlencode(SITE_PASS),
                  'vilag'=>urlencode(SITE_WORLD),
                  'tev'=>urlencode(SITE_TEV),
            );

foreach($fields as $key=>$value) { $fields_string .= $key.'='.$value.'&'; }
//rtrim($fields_string,'&');
$fields_string=substr($fields_string,0,-1);
//print_r($fields_string);
$ch=curl_init();
curl_setopt($ch,CURLOPT_USERAGENT,"Mozilla/5.0 (Windows; U; Windows NT 5.1; rv:1.7.3) Gecko/20041001 Firefox/0.10.1" );
curl_setopt($ch,CURLOPT_URL,'http://www.doomlord.net');
curl_setopt($ch,CURLOPT_COOKIEJAR,$ckfile);
curl_setopt($ch,CURLOPT_POST,count($fields));
curl_setopt($ch,CURLOPT_POSTFIELDS,$fields_string);
curl_setopt($ch,CURLOPT_CONNECTTIMEOUT,20);
curl_setopt($ch,CURLOPT_RETURNTRANSFER,true);
curl_setopt($ch,CURLOPT_FOLLOWLOCATION,3);
curl_setopt($ch,CURLOPT_MAXREDIRS,10);
$connect=curl_exec($ch);
$response1=curl_getinfo( $ch );
$fp=fopen('temp.html','w'); //create my temp file here
fclose($fp);
curl_close($ch);

  $ch=curl_init();
  curl_setopt($ch,CURLOPT_USERAGENT,"Mozilla/5.0 (Windows; U; Windows NT 5.1; rv:1.7.3) Gecko/20041001 Firefox/0.10.1" );
  curl_setopt($ch,CURLOPT_URL,'http://www.doomlord.net/index.php?m=szovetseg&amp;sub=reszletes_statisztikak');
  curl_setopt($ch,CURLOPT_CONNECTTIMEOUT,20);
  curl_setopt($ch,CURLOPT_COOKIEFILE,$ckfile);
  curl_setopt($ch,CURLOPT_RETURNTRANSFER,true);
  curl_setopt($ch,CURLOPT_FOLLOWLOCATION,3);
  curl_setopt($ch,CURLOPT_MAXREDIRS,10);
  $connect=curl_exec($ch);
  $response1=curl_getinfo( $ch ); 
// and this is the key here
  $fp=fopen('temp.html','a');
  fputs($fp, $connect);
  fclose($fp);
curl_close($ch);

// now my the extractor works
set_include_path(get_include_path() . PATH_SEPARATOR . './library/');
require_once 'JS/Extractor.php';

$extractor = new JS_Extractor(file_get_contents('temp.html'));
$body = $extractor->query("body")->item(0);
$table = $body->query("//table[@class=\"rstatisztika_tabla\"]")->item(0);
$data = $table->extract(array("tr", "td"));
echo "</br />";
echo "This is the data:</br />";
var_dump($data);
echo "</br />";
echo "</br />";
?>

Thadson 2009-12-08 06:26:49

and this does not work if you simply pass $connect into the JS_Extractor constructor? Above in the original question you showed JS_Extractor(file_get_contents($connect)); where $connect was also the result of the GET -- not a filename -- and hence not making any sense.

Don 2009-12-08 06:46:24

ansaurus

tags:

views:

answers:

cURL + JS_Extractor + php + MYSQL mess = Please help

related questions