import subprocess
import sys, threading, time
import pycurl
def refresh_ip():
print "Refreshing IP .. ."
try:
process = subprocess.Popen('sudo /etc/init.d/tor restart', shell=True, stdout=subprocess.PIPE)
except Exception, ex:
print "Failed to Refresh IP. ", ex
# We should ignore SIGPIPE when using pycurl.NOSIGNAL - see
# the libcurl tutorial for more info.
try:
import signal
from signal import SIGPIPE, SIG_IGN
signal.signal(signal.SIGPIPE, signal.SIG_IGN)
except ImportError:
pass
class Test(threading.Thread):
def __init__(self, url, ofile):
threading.Thread.__init__(self)
self.curl = pycurl.Curl()
self.curl.setopt(pycurl.URL, url)
self.curl.setopt(pycurl.WRITEDATA, ofile)
self.curl.setopt(pycurl.FOLLOWLOCATION, 1)
self.curl.setopt(pycurl.MAXREDIRS, 5)
self.curl.setopt(pycurl.NOSIGNAL, 1)
self.curl.setopt(pycurl.USERAGENT, 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.1 (KHTML, like Gecko) Ubuntu/11.04 Chromium/14.0.835.202 Chrome/14.0.835.202 Safari/535.1')
self.curl.setopt(pycurl.PROXY, '127.0.0.1:9050')
self.curl.setopt(pycurl.PROXYTYPE, pycurl.PROXYTYPE_SOCKS5)
self.curl.setopt(pycurl.REFERER, 'http://www.google.co.in/')
def run(self):
self.curl.perform()
self.curl.close()
sys.stdout.write(".")
sys.stdout.flush()
# Read list of URIs from file specified on commandline
try:
urls = open(sys.argv[1]).readlines()
except IndexError:
# No file was specified, show usage string
print "Usage: %s <file with uris to fetch>" % sys.argv[0]
raise SystemExit
# Initialize thread array and the file number
threads = []
# Start one thread per URI in sequence
fileno = 0
t1 = time.time()
for url in urls:
f = open(str(fileno), "wb")
t = Test(url.rstrip(), f)
t.start()
fileno = fileno + 1
t.join()
f.close()
refresh_ip()
time.sleep(3)
t2 = time.time()
print "\n** Singlethreading, %d seconds elapsed for %d uris" % (int(t2-t1), len(urls))
26.849430
80.919724