LXML HTML Parsing Usage Example


from lxml.html import fromstring

f=open("result_urls","a+")
for x in range(0,1000):
	mySearchTree = fromstring(open(str(x)).read())
	f.write("Product Name: ")

	for a in mySearchTree.cssselect('tr input'):
		f.write(a.get('value')) #product_name
		f.write(chr(10))
	f.write("URLS:")
	f.write(chr(10))
	for a in mySearchTree.cssselect('h3 a'):
		f.write(a.get('href')) #url
		f.write(chr(10))
	f.flush()
f.close()

Advertisements

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s

%d bloggers like this: