LXML Parser

Takes an XML file as input and reads all products one by one and store their all data fetched for a particular row in a multi-dimesion array,

from lxml import etree
class Test:
	def parser(self, x):
		pool=[]
		temp = []
		if(len(x)==0):
			try:
				temp = []
				temp = [x.tag, x.text.encode('utf-8')]
				pool.append(temp[:])

			except Exception:
				temp = [x.tag, -1]
				pool.append(temp[:])

		else:
			for y in x:
				self.parser(y)
		pool.sort()
		print pool

	def Starter(self):
		#Input file
		infile="1.xml"
		context = etree.iterparse(infile, events=('end',), tag='product')
		for event, elem in context:
		    print "--- --- ---"
		    print event
		    for x in elem:
			self.parser(x)

		    #print elem.text.encode('utf-8')

		    # It's safe to call clear() here because no descendants will be accessed
		    elem.clear()

		    # Also eliminate now-empty references from the root node to <Title>
		    while elem.getprevious() is not None:
			del elem.getparent()[0]


Advertisements

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s

%d bloggers like this: