解决方案:from twisted.internet import reactor, threads

from urlparse import urlparse

import httplib

import itertools

concurrent = 200

finished=itertools.count(1)

reactor.suggestThreadPoolSize(concurrent)

def getStatus(ourl):

url = urlparse(ourl)

conn = httplib.HTTPConnection(url.netloc)

conn.request("HEAD", url.path)

res = conn.getresponse()

return res.status

def processResponse(response,url):

print response, url

processedOne()

def processError(error,url):

print "error", url#, error

processedOne()

def processedOne():

if finished.next()==added:

reactor.stop()

def addTask(url):

req = threads.deferToThread(getStatus, url)

req.addCallback(processResponse, url)

req.addErrback(processError, url)

added=0

for url in open('urllist.txt'):

added+=1

addTask(url.strip())

try:

reactor.run()

except KeyboardInterrupt:

reactor.stop()

Testtime :[kalmi@ubi1:~] wc -l urllist.txt

10000 urllist.txt

[kalmi@ubi1:~] time python f.py > /dev/null

real 1m10.682s

user 0m16.020s

sys 0m10.330s

[kalmi@ubi1:~] head -n 6 urllist.txt

http://www.google.com

http://www.bix.hu

http://www.godaddy.com

http://www.google.com

http://www.bix.hu

http://www.godaddy.com

[kalmi@ubi1:~] python f.py | head -n 6

200 http://www.bix.hu

200 http://www.bix.hu

200 http://www.bix.hu

200 http://www.bix.hu

200 http://www.bix.hu

200 http://www.bix.hu

Pingtime :bix.hu is ~10 ms away from me

godaddy.com: ~170 ms

google.com: ~30 ms

Logo

一站式 AI 云服务平台

更多推荐