批量文本读取URL获取正常访问且保留对应IP

  • A+
所属分类:Python
#coding=utf-8 
import sys 
import requests 
for i in range(3000,4999,1): 
  url = 'http://192.168.88.139:8888/20150602'+str(i)+'.html' 
  r = requests.get(url) 
  if r.status_code == 200: 
    print url 
    print r.content

 

 

原文

http://zone.wooyun.org/content/20885

 

 

 

多线程+文本逐行读取+URL的IP转换+写入

# -*-coding:utf-8-*-
import os
import sys
import Queue
import getopt
import logging
import requests
import threading
import time
import socket
print "start:" + (time.strftime("%H:%M:%S"))

logging.basicConfig(
    level=logging.WARNING,
    format="[%(asctime)s] %(message)s"
)

class BatchThreads(threading.Thread):
    def __init__(self, queue):
        super(BatchThreads, self).__init__()
        self.queue = queue

    def run(self):
        while True:
            if self.queue.empty():
                break
            else:
                tempurl = self.queue.get()
                #print tempurl
                try:
                    url = 'http://'+tempurl
                    #print url
                    r = requests.get(url, timeout=5)
                    if r.status_code == 200 :
                        print url+' '+'access-comman:200'                       
                        #print tempurl
                        ip = socket.gethostbyname(tempurl)
                        #print ip
                        yes = open('yes.txt','a')
                        yes.write(url+'    ')
                        yes.write('    '+ip+'\n')
                        yes.close()

                except:
                    pass
                    print url+" error"
                    noaccess = open('noaccess.txt','a')
                    noaccess.write(url+'\n')
                    noaccess.close()


def batch_queue(_queue, _thread_number):
    with open('url-hz.txt') as f:
        urls = [line.strip() for line in f.readlines()]
    urls = set(filter(lambda url: url and not url.startswith("#"), urls))
    if urls:
        for url in urls:
            queue.put(url)
        if _thread_number > (queue.qsize() / 2):
            _thread_number = (queue.qsize())

        for _ in xrange(_thread_number):
            threads.append(BatchThreads(_queue))

        for t in threads:
            t.start()
        for t in threads:
            t.join()

threads = []
queue = Queue.Queue()
thread_number = 20
batch_queue(queue, thread_number)

print"end:" + (time.strftime("%H:%M:%S"))

 

  • 我的微信
  • 这是我的微信扫一扫
  • weinxin
  • 我的微信公众号
  • 我的微信公众号扫一扫
  • weinxin

发表评论

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen: