Fetch all nodes from Nextag

this code shall fetch all US category name and nodes from http://b6.pv.sv.nextag.com/serv/main/buyer/BulkCategoryCodes.jsp


import urllib2

def fetchdata(nodeid):
    url="http://b6.pv.sv.nextag.com/serv/main/buyer/BulkCategoryListing.jsp?node="+nodeid+"&all=y"
    print "fetching data for node id(", nodeid, ")" 
    f = urllib2.urlopen(url)
    content=f.read()
    content=content[int(content.find('<pre>'))+7:int(content.find('</pre>'))]
    removespaces(content)

def removespaces(content):
    
    for x in content:
        if(x!=' ' and x!=chr(10)):
            content=content[int(content.find(x)):]
            break
    content=content[::-1]
    for x in content:
        if(x!=' ' and x!=chr(10)):
            content=content[int(content.find(x)):]
            break
    content=content[::-1]
    print "storing to database"
    f = open("temp.utsav", "a")
    f.write(content)
    f.close()
 
url="http://b6.pv.sv.nextag.com/serv/main/buyer/BulkCategoryCodes.jsp"
f = urllib2.urlopen(url)
content=f.read()
temp="<td>MAIN channel</td>"
start=int(content.find(temp))
temp="td>UK channel</td>"
end=int(content.find(temp))
content=content[start:end]
while(content.find('<option value="')!=-1):
    content=content[int(content.find('<option value="')):]
    temp=content[15: ]
    nodeid=temp[:int(temp.find('"'))]
    temp=temp[int(temp.find('"'))+3:]
    if(temp.find('<option value="')==-1):
        name=temp[:int(temp.find('</select>'))]
    else:
        name=temp[:int(temp.find('<option value="'))]
    content = content[15: ]
    fetchdata(nodeid)
    



Advertisements

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s

%d bloggers like this: