I'm trying to write a tool to check if a proxy server is up and available for use. So far, I've come up with two methods in the class below (I've removed setters and getters that are superfluous to this question).
The first method uses cURL
and tries to request a page via the proxy, the second tool uses fsockopen
and just tries to open a connection to the proxy.
class ProxyList {
/**
* You could set this to localhost, depending on your environment
* @var string The URL that the proxy validation method will use to check proxies agains
* @see ProxyList::validate()
*/
const VALIDATION_URL = "http://m.www.yahoo.com/robots.txt";
const TIMEOUT = 3;
private static $valid = array(); // Checked and valid proxies
private $proxies = array(); // An array of proxies to check
public function validate($useCache=true) {
$mh = curl_multi_init();
$ch = null;
$handles = array();
$delay = count($this->proxies) * 10000;
$running = null;
$proxies = array();
$response = null;
foreach ( $this->proxies as $p ) {
// Using the cache and the proxy already exists? Skip the rest of this crap
if ( $useCache && !empty(self::$valid[$p]) ) {
$proxies[] = $p;
continue;
}
$ch = curl_init();
curl_setopt($ch, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1);
curl_setopt($ch, CURLOPT_URL, self::VALIDATION_URL);
curl_setopt($ch, CURLOPT_HTTPPROXYTUNNEL, true);
curl_setopt($ch, CURLOPT_PROXY, $p);
curl_setopt($ch, CURLOPT_NOBODY, true); // Also sets request method to HEAD
curl_setopt($ch, CURLOPT_HEADER, false);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_TIMEOUT, self::TIMEOUT);
curl_multi_add_handle($mh, $ch);
$handles[$p] = $ch;
}
// Execute the multi-handle
do {
curl_multi_exec($mh, $running);
usleep($delay);
} while ( $running );
// Get the results of the requests
foreach ( $handles as $proxy => $ch ) {
$status = (int)curl_getinfo($ch, CURLINFO_HTTP_CODE);
// Great success
if ( $status >= 200 && $status < 300 ) {
self::$valid[$proxy] = true;
$proxies[] = $proxy;
}
else {
self::$valid[$proxy] = false;
}
// Cleanup individual handle
curl_multi_remove_handle($mh, $ch);
}
// Cleanup multiple handle
curl_multi_close($mh);
return $this->proxies = $proxies;
}
public function validate2($useCache=true) {
$proxies = array();
foreach ( $this->proxies as $proxy ) {
// Using the cache and the proxy already exists? Skip the rest of this crap
if ( $useCache && !empty(self::$valid[$proxy]) ) {
$proxies[] = $proxy;
continue;
}
list($host, $post) = explode(":", $proxy);
if ( $conn = @fsockopen($host, $post, $errno, $error, self::TIMEOUT) ) {
self::$valid[$proxy] = true;
$proxies[] = $proxy;
fclose($conn);
} else {
self::$valid[$proxy] = false;
}
}
return $this->proxies = $proxies;
}
}
So far, I prefer the cURL
method since it allows me to check large batches of proxies in parallel, which is wicked fast, instead of one at a time like fsockopen
.
I haven't done much work with proxies, so it's hard for me to tell if either of these methods are sufficient for validating that the proxy is available, or if there is a better method that I am missing.