一個python操做XML文件的例子

XML文件在http://www.oschina.net/code/snippet_866703_15810node

<!-- lang: python -->
def get_seed_data(filename):
dom = minidom.parse(filename)
root = dom.documentElement
system_nodes = root.getElementsByTagName("system")
k = 0
seed_list = []
for system_node in system_nodes:
	#print system_node.nodeName+' id='+system_node.getAttribute('id')
	system_id = system_node.getAttribute("id")
	system_name = system_node.getAttribute("name")
	#print 'system_name:%s'%system_name
	section_nodes = system_node.getElementsByTagName("section")
	for section_node in section_nodes:
			section_id = section_node.getAttribute('id')
			section_name = section_node.getAttribute('name')
			#print ' '+section_node.nodeName+' id='+section_id+' name='+section_name
			crawl_cycle_node = section_node.getElementsByTagName("crawl_cycle")
			crawl_cycle = crawl_cycle_node[0].childNodes[0].nodeValue
			#print '  '+crawl_cycle_node[0].nodeName+'='+crawl_cycle
			seed_nodes = section_node.getElementsByTagName('seed')
			for seed_node in seed_nodes:
				seed = {}
				seed['crawl_cycle'] = crawl_cycle
				seed['system_id'] = int(system_id)
				seed['system_name'] = system_name
				seed['section_id'] = int(section_id)
				seed['section_name'] = section_name
				seed_id = seed_node.getAttribute('id')
				seed['seed_id'] = int(seed_id)
				#print '  '+seed_node.nodeName+' '+'id='+seed_id
				userblog_url_node = seed_node.getElementsByTagName('userblog_url')
				userblog_url = userblog_url_node[0].childNodes[0].nodeValue
				seed['userblog_url'] = userblog_url
				#print '   '+'userblog_url'+' '+userblog_url
				print '-------------------------------------------'
				print 'system_id:%d' % seed['system_id']
				print 'system_name:%s'%seed['system_name']
				print ' section_id:%d' % seed['section_id']
				print ' section_name:%s' % seed['section_name']
				print '  seed_id:%d' %seed['seed_id']
				print '  userblog_url:%s' %seed['userblog_url']
				print '========================='
				seed_list.append(seed)
				print seed_list[k]
				k += 1
				os.system('pause')
return seed_list
相關文章
相關標籤/搜索