PDA

Просмотр полной версии : Парсер adwords keyword tool


kiwi
05.04.2007, 22:06
<?
//
function getadwords ( $keyword, &$err = 1 )
{
global $proxy;

$ch = curl_init();
// curl_setopt($ch, CURLOPT_URL,'https://adwords.google.com/select/VariationsTool?adgroupid=0&campaignid=0&keywords=' . urlencode ($keyword) . '&adgroupIntegrated=false&skipLogin=true&currencyCode=USD&maxCpcOverride=&targetLanguages=en&targetCountries=*&synonyms=true&nocache=1146820018203');
curl_setopt($ch, CURLOPT_URL,'https://adwords.google.com/select/VariationsTool?adgroupid=0&campaignid=0&keywords=' . urlencode ($keyword) . '&adgroupIntegrated=false&skipLogin=true&currencyCode=USD&maxCpcOverride=&targetLanguages=es&targetCountries=*&synonyms=true&nocache=1146820018203');
curl_setopt($ch, CURLOPT_RETURNTRANSFER,1);
curl_setopt($ch, CURLOPT_FAILONERROR, 1);
curl_setopt($ch, CURLOPT_COOKIEJAR, './cc-adwords.txt');
curl_setopt($ch, CURLOPT_COOKIEFILE, './cc-adwords.txt');
curl_setopt($ch, CURLOPT_REFERER, "http://adwords.google.com");
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET');
curl_setopt($ch, CURLOPT_FOLLOWLOCATION,1);
curl_setopt($ch, CURLOPT_TIMEOUT, 60);
$randproxy = rand (0, count ($proxy) - 1);
curl_setopt($ch, CURLOPT_PROXY, $proxy [$randproxy]);
$html = curl_exec ($ch);
$succeeded = curl_errno($ch) == 0 ? true : false;
$err = curl_errno($ch).':'.curl_error($ch);
if(!$succeeded)
{
print '- [' . $keyword . '] (' . $err . ")\n";
unset ($proxy [$randproxy]);
$err = 1;
} else
{
print '+ [' . $keyword . ']' . "\n";
$err = 0;
}
preg_match_all ('/criteria\.push\(new kpCriterion\(\'([\w\s]+)\', /is', $html, $match_all);
return ($match_all [1]);

curl_close ( $ch );
}

$proxy = array ();
$fi = fopen ('tut-url-otkuda-brat-proxy', 'r');
if (! $fi)
{
print '- cannot open proxy ' . "\n";
die;
}
while (!feof ($fi))
{
$proxy [] = trim (fgets ($fi, 4096));
}
fclose ($fi);

$proxy = array_slice ($proxy, 0, 100);
print '* proxy: ' . count ($proxy) . ' - ' . $proxy [0] . "\n";

$ch = curl_init();
curl_setopt($ch, CURLOPT_URL,"https://adwords.google.com/");
curl_setopt($ch, CURLOPT_RETURNTRANSFER,1);
curl_setopt($ch, CURLOPT_FAILONERROR, 1);
curl_setopt($ch, CURLOPT_COOKIEJAR, './cc-adwords.txt');
curl_setopt($ch, CURLOPT_COOKIEFILE, './cc-adwords.txt');
curl_setopt($ch, CURLOPT_REFERER, "http://www.google.com");
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET');
curl_setopt($ch, CURLOPT_FOLLOWLOCATION,1);
curl_setopt($ch, CURLOPT_TIMEOUT, 60);
$html = curl_exec ($ch);
$succeeded = curl_errno ($ch) == 0 ? true : false;
$err = curl_errno ($ch) . ':' . curl_error ($ch);
if(!$succeeded)
{
print $err;
}
curl_close ($ch);

$keywords = array ();
$skeywords = array ('telefono', 'negocio');

$err = 0;

foreach ($skeywords as $skeyword)
{
$bkeywords = getadwords ( $skeyword, $err );

$i = 0;
while ($err == 1 && $i < 4)
{
$bkeywords = getadwords ( $skeyword, $err );
$i ++ ;
}

foreach ($bkeywords as $keyword)
{
$keywords [$keyword] = $keyword;
$akeywords = getadwords ( $keyword );

foreach ($akeywords as $keyword)
{
$keywords [$keyword] = $keyword;
}

print '* ' . $i . '=' . count ($keywords) . "\n";
}
}

$fo = fopen ('./adwords-es.txt', 'a+');
foreach ($keywords as $keyword)
{
fputs ($fo, $keyword . "\n");
}
fclose ($fo);

print '+ ' . count ($keywords) . "\n";
?>

arachno
06.04.2007, 16:57
какой знакомый код :) :)

rostikus
11.04.2007, 21:35
не рабочий кодик :(

arachno
12.04.2007, 12:20
не рабочий кодик :(
что именно не работает?

rostikus
13.04.2007, 22:16
* proxy: 100 - <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" 60:SSL certificate problem, verify that the CA cert is OK. Details: error:14090086:SSL routines:SSL3_GET_SERVER_CERTIFICATE:certificate verify failed- [telefono] (56:The requested URL returned error: 501) - [telefono] (6:Could not resolve host: adwords.google.com; Host not found) - [telefono] (28:Resolving host timed out: adwords.google.com) - [telefono] (6:Could not resolve host: adwords.google.com; Host not found) - [telefono] (28:Resolving host timed out: adwords.google.com) - [negocio] (6:Could not resolve host: adwords.google.com; Host not found) - [negocio] (6:Could not resolve host: adwords.google.com; Host not found) - [negocio] (28:Resolving host timed out: adwords.google.com) - [negocio] (28:Resolving host timed out: adwords.google.com) - [negocio] (6:Could not resolve host: adwords.google.com; Host not found) + 0

senior_pomidor
13.04.2007, 22:50
курл сосёт :D

rostikus
14.04.2007, 01:33
мега информативно

Chekist
14.04.2007, 03:15
мега информативно
Курл обнови.

arachno
15.04.2007, 05:02
от скрипта это не зависит
решение уже подсказали