e.g. a hadoop-site.yml file
---
dfs.name.dir : /var/local/hadoop/hdfs/name
dfs.data.dir : /var/local/hadoop/hdfs/data
dfs.heartbeat.interval : 3
dfs.datanode.address : 0.0.0.0:1004
dfs.datanode.http.address : 0.0.0.0:1006 | Determines where on the local filesystem an DFS data node should store its blocks. If this is a comma-delimited list of directories, then data will be stored in all named directories, typically on different devices. Directories that do not exist are ignored.
It will later been converted to hadoop-site.xml
Here is the small python codes that I wrote to do the conversion
import yaml
from xml.etree.ElementTree import Element, SubElement, Comment
from xml.etree import ElementTree
from xml.dom import minidom
def prettify(element):
rough_string = ElementTree.tostring(element, 'utf-8')
reparsed = minidom.parseString(rough_string)
return reparsed.toprettyxml(indent=" ")
def read_yaml_file(yaml_file):
with open(yaml_file, "r") as file:
site_config = yaml.load(file)
return site_config
def generate_xml(yaml_file):
config = read_yaml_file(yaml_file)
_top = Element('configuration')
for key, values in config.iteritems():
_property = SubElement(_top, 'property')
_name = SubElement(_property, 'name')
_value = SubElement(_property, 'value')
if "|" in str(values).split():
[value, description] = values.split("|")
else:
value = values
description = ""
_name.text = key
_value.text = str(value)
if description:
_description = SubElement(_property, 'description')
_description.text = description
xml_file = yaml_file.split(".")[0] + ".xml"
with open(xml_file, "w") as file:
file.write(prettify(_top))
To test it you can import function and use it like this
from hadoop_xml_parser import generate_xml
if __name__ == '__main__':
generate_xml("hadoop-site.yml")