SAX Parser for large XML files

<pre>from lxml import etree
def abc(x):
	if(len(x)==0):
		try:
			print "--- --- --- --- --- --- "
			print x.tag
			if(len(x.text.encode('utf-8').strip())>0):
				print x.text.encode('utf-8')
			else:
				print "No Data"
		except Exception:
			print "ERROR", x.tag
	else:
		for y in x:
			abc(y)

	
infile="1.xml"
context = etree.iterparse(infile, events=('end',), tag='product')
for event, elem in context:
    print "--- --- ---"
    for x in elem:
	abc(x)
	
		
    #print elem.text.encode('utf-8')

    # It's safe to call clear() here because no descendants will be accessed
    elem.clear()

    # Also eliminate now-empty references from the root node to <Title> 
    while elem.getprevious() is not None:
        del elem.getparent()[0]





</pre>
Advertisements

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s

%d bloggers like this: