To display all missing urls url_queue update command from merged mail

import time
import MySQLdb
conn = MySQLdb.connect (host = "10.241.31.96",
                           user = "root",
                           db = "spider")
cursor = conn.cursor ()

k=open("ms", "r")
a=[]

while(True):
        content = k.readline()
        if(content):

                if(content.find("url=http://www.")>0):
                        url=content[content.find("url=http://www.")+15:]

                        x=url[:url.find(",")]
                        if(len(x)>1):
                                x=x[:x.find(".")+4]
                                if(len(x)>1):
                                        x="www."+x
                                        a.append(x)

        else:
                break

a.sort()
while(True):
        x=a.pop()
        if(x in a):
                None
Advertisements

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s

%d bloggers like this: