- A+
只写了百度的规则,GOOGLE抓取搜索地址后自己写。我的思路是禁止javascript来得到返回的内容。
配合生成弱口令的小脚本进行爆破效果应该不错。
准备学java了,有没有学习路上的小伙伴,求肥皂求带。
效果图如下。
<?php
class search
{
private $engine = array();
private $fp = null;
public function __construct()
{
$this->set('baidu', 'http://www.baidu.com/');
//$this->set('google','https://www.google.com.hk/');
//$this->set('360','http://www.so.com/');
//$this->set('sogou','http://www.sogou.com/');
}
/**
* @param string $key 要搜索的关键词
* @param string $bind 抓取指定域名的email,不填则抓取关键词的所有email
*/
public function run($key, $bind = false)
{
$this->fp = fopen('email.txt', 'a+');
foreach ($this->engine as $engine => $url) {
$this->rule($engine, $url, $key, $bind);
}
fclose($this->fp);
$data = file('email.txt');
$email = array_unique($data);
file_put_contents('email.txt', null); //清空email
foreach ($email as $v) {
file_put_contents('email.txt', $v, FILE_APPEND); //追加重写非重复email
}
}
private function curl($urls, $engine, $bind = false)
{
$queue = curl_multi_init();
$map = array();
foreach ($urls as $url) {
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_TIMEOUT, 1);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_NOSIGNAL, true);
curl_multi_add_handle($queue, $ch);
$map[(string) $ch] = $url;
}
$responses = array();
do {
while (($code = curl_multi_exec($queue, $active)) == CURLM_CALL_MULTI_PERFORM);
if ($code != CURLM_OK) {
break;
}
while ($done = curl_multi_info_read($queue)) {
//$info = curl_getinfo($done['handle']);
//$error = curl_error($done['handle']);
$results = $this->callback(curl_multi_getcontent($done['handle']), $engine, $bind);
//$responses[$map[(string) $done['handle']]] = compact('info', 'error', 'results');
$responses[$map[(string) $done['handle']]] = $results;
curl_multi_remove_handle($queue, $done['handle']);
curl_close($done['handle']);
}
if ($active > 0) {
curl_multi_select($queue, 0.5);
}
} while ($active);
curl_multi_close($queue);
return $responses;
}
private function set($name, $url)
{
$this->engine[$name] = $url;
}
/**
* 读取规则
* @param integer $pn 百度的记录数,180表示18页,好像百度做了限制一次只能抓取么多.
*/
private function rule($engine, $url, $key, $bind = false)
{
switch ($engine) {
case 'baidu':
for ($pn = 0; $pn <= 180; $pn = $pn + 10) {
$urls[] = $url . 's?nojs=1&pn=' . $pn . '&wd=' . urlencode($key);
}
$this->curl($urls, $engine, $bind);
break;
case 'google':
break;
case '360':
break;
case 'sogou':
break;
}
}
/**
* 匹配规则
*/
private function callback($data, $engine, $bind = false)
{
switch ($engine) {
case 'baidu':
$replace = preg_replace('~</?em>~i', '', $data); //去掉搜索飘红提示
if (preg_match_all('~[(a-z0-9_)]+?(?<!\b3D\b)[@#][a-z0-9_]+?(\.[a-z]{2,3}){1,2}~i', $replace, $email)) {
foreach ($email[0] as $v) {
$v = str_replace('#', '@', $v);
$bool = $bind ? strpos($v, $bind) : true;
if ( $bool !== false) {
fwrite($this->fp, $v . "\r\n");
echo 'Writed ' . $v . "<br>";
}
}
}
break;
case 'google':
break;
case '360':
break;
case 'sogou':
break;
}
}
}
ignore_user_abort(true);
set_time_limit(0);
$search = new search;
$search->run('@ucloud.cn','ucloud.cn');
//$search->run('@ucloud.cn');
?>
- 我的微信
- 这是我的微信扫一扫
- 我的微信公众号
- 我的微信公众号扫一扫