Friday, July 4, 2014

Hadoop default configuration parameters and values

hi all,

This is the default configuration for hadoop-1.2.1. Feel free to take this and have a reference.

dfs.access.time.precision=3600000
dfs.balance.bandwidthPerSec=1048576
dfs.block.access.key.update.interval=600
dfs.block.access.token.enable=false
dfs.block.access.token.lifetime=600
dfs.blockreport.initialDelay=0
dfs.blockreport.intervalMsec=3600000
dfs.block.size=67108864
dfs.client.block.write.retries=3
dfs.client.use.datanode.hostname=false
dfs.data.dir=${hadoop.tmp.dir}/dfs/data
dfs.datanode.address=0.0.0.0:50010
dfs.datanode.data.dir.perm=755
dfs.datanode.dns.interface=default
dfs.datanode.dns.nameserver=default
dfs.datanode.drop.cache.behind.reads=false
dfs.datanode.drop.cache.behind.writes=false
dfs.datanode.du.reserved=0
dfs.datanode.failed.volumes.tolerated=0
dfs.datanode.handler.count=3
dfs.datanode.http.address=0.0.0.0:50075
dfs.datanode.https.address=0.0.0.0:50475
dfs.datanode.ipc.address=0.0.0.0:50020
dfs.datanode.max.xcievers=4096
dfs.datanode.readahead.bytes=4193404
dfs.datanode.sync.behind.writes=false
dfs.datanode.use.datanode.hostname=false
dfs.default.chunk.view.size=32768
dfs.df.interval=60000
dfs.heartbeat.interval=3
dfs.http.address=0.0.0.0:50070
dfs.https.address=0.0.0.0:50470
dfs.https.client.keystore.resource=ssl-client.xml
dfs.https.enable=false
dfs.https.need.client.auth=false
dfs.https.server.keystore.resource=ssl-server.xml
dfs.image.transfer.bandwidthPerSec=0
dfs.max.objects=0
dfs.name.dir=${hadoop.tmp.dir}/dfs/name
dfs.name.edits.dir=${dfs.name.dir}
dfs.namenode.avoid.read.stale.datanode=false
dfs.namenode.avoid.write.stale.datanode=false
dfs.namenode.decommission.interval=30
dfs.namenode.decommission.nodes.per.interval=5
dfs.namenode.delegation.key.update-interval=86400000
dfs.namenode.delegation.token.max-lifetime=604800000
dfs.namenode.delegation.token.renew-interval=86400000
dfs.namenode.edits.toleration.length=0
dfs.namenode.handler.count=10
dfs.namenode.invalidate.work.pct.per.iteration=0.32f
dfs.namenode.kerberos.internal.spnego.principal=${dfs.web.authentication.kerberos.principal}
dfs.namenode.logging.level=info
dfs.namenode.replication.work.multiplier.per.iteration=2
dfs.namenode.safemode.min.datanodes=0
dfs.namenode.stale.datanode.interval=30000
dfs.namenode.write.stale.datanode.ratio=0.5f
dfs.permissions.supergroup=supergroup
dfs.permissions=true
dfs.replication=3
dfs.replication.considerLoad=true
dfs.replication.interval=3
dfs.replication.max=512
dfs.replication.min=1
dfs.safemode.extension=30000
dfs.safemode.threshold.pct=0.999f
dfs.secondary.http.address=0.0.0.0:50090
dfs.secondary.namenode.kerberos.internal.spnego.principal=${dfs.web.authentication.kerberos.principal}
dfs.webhdfs.enabled=false
dfs.web.ugi=webuser,webgroup
fs.checkpoint.dir=${hadoop.tmp.dir}/dfs/namesecondary
fs.checkpoint.edits.dir=${fs.checkpoint.dir}
fs.checkpoint.period=3600
fs.checkpoint.size=67108864
fs.default.name=file:///
fs.file.impl=org.apache.hadoop.fs.LocalFileSystem
fs.ftp.impl=org.apache.hadoop.fs.ftp.FTPFileSystem
fs.har.impl.disable.cache=true
fs.har.impl=org.apache.hadoop.fs.HarFileSystem
fs.hdfs.impl=org.apache.hadoop.hdfs.DistributedFileSystem
fs.hftp.impl=org.apache.hadoop.hdfs.HftpFileSystem
fs.hsftp.impl=org.apache.hadoop.hdfs.HsftpFileSystem
fs.kfs.impl=org.apache.hadoop.fs.kfs.KosmosFileSystem
fs.ramfs.impl=org.apache.hadoop.fs.InMemoryFileSystem
fs.s3.block.size=67108864
fs.s3.buffer.dir=${hadoop.tmp.dir}/s3
fs.s3.impl=org.apache.hadoop.fs.s3.S3FileSystem
fs.s3.maxRetries=4
fs.s3n.impl=org.apache.hadoop.fs.s3native.NativeS3FileSystem
fs.s3.sleepTimeSeconds=10
fs.trash.interval=0
fs.webhdfs.impl=org.apache.hadoop.hdfs.web.WebHdfsFileSystem
hadoop.http.authentication.kerberos.keytab=${user.home}/hadoop.keytab
hadoop.http.authentication.kerberos.principal=HTTP/localhost@LOCALHOST
hadoop.http.authentication.signature.secret.file=${user.home}/hadoop-http-auth-signature-secret
hadoop.http.authentication.simple.anonymous.allowed=true
hadoop.http.authentication.token.validity=36000
hadoop.http.authentication.type=simple
hadoop.jetty.logs.serve.aliases=true
hadoop.logfile.count=10
hadoop.logfile.size=10000000
hadoop.native.lib=true
hadoop.relaxed.worker.version.check=false
hadoop.rpc.socket.factory.class.default=org.apache.hadoop.net.StandardSocketFactory
hadoop.security.authentication=simple
hadoop.security.authorization=false
hadoop.security.group.mapping=org.apache.hadoop.security.ShellBasedUnixGroupsMapping
hadoop.security.instrumentation.requires.admin=false
hadoop.security.token.service.use_ip=true
hadoop.security.uid.cache.secs=14400
hadoop.security.use-weak-http-crypto=false
hadoop.skip.worker.version.check=false
hadoop.tmp.dir=/tmp/hadoop-${user.name}
hadoop.util.hash.type=murmur
io.bytes.per.checksum=512
io.compression.codecs=org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.SnappyCodec
io.file.buffer.size=4096
io.mapfile.bloom.error.rate=0.005
io.mapfile.bloom.size=1048576
io.map.index.skip=0
io.seqfile.compress.blocksize=1000000
io.seqfile.lazydecompress=true
io.seqfile.sorter.recordlimit=1000000
io.serializations=org.apache.hadoop.io.serializer.WritableSerialization
io.skip.checksum.errors=false
io.sort.factor=10
io.sort.mb=100
io.sort.record.percent=0.05
io.sort.spill.percent=0.80
ipc.client.connection.maxidletime=10000
ipc.client.connect.max.retries=10
ipc.client.fallback-to-simple-auth-allowed=false
ipc.client.idlethreshold=4000
ipc.client.kill.max=10
ipc.client.tcpnodelay=false
ipc.server.listen.queue.size=128
ipc.server.tcpnodelay=false
jobclient.output.filter=FAILED
job.end.retry.attempts=0
job.end.retry.interval=30000
keep.failed.task.files=false
local.cache.size=10737418240
mapred.acls.enabled=false
mapred.child.java.opts=-Xmx200m
mapred.child.tmp=./tmp
mapred.cluster.map.memory.mb=-1
mapred.cluster.max.map.memory.mb=-1
mapred.cluster.max.reduce.memory.mb=-1
mapred.cluster.reduce.memory.mb=-1
mapred.combine.recordsBeforeProgress=10000
mapred.compress.map.output=false
mapred.disk.healthChecker.interval=60000
mapred.healthChecker.interval=60000
mapred.healthChecker.script.timeout=600000
mapred.heartbeats.in.second=100
mapred.inmem.merge.threshold=1000
mapred.job.map.memory.mb=-1
mapred.job.queue.name=default
mapred.job.reduce.input.buffer.percent=0.0
mapred.job.reduce.memory.mb=-1
mapred.job.reuse.jvm.num.tasks=1
mapred.job.shuffle.input.buffer.percent=0.70
mapred.job.shuffle.merge.percent=0.66
mapred.jobtracker.blacklist.fault-bucket-width=15
mapred.jobtracker.blacklist.fault-timeout-window=180
mapred.jobtracker.completeuserjobs.maximum=100
mapred.job.tracker.handler.count=10
mapred.job.tracker.http.address=0.0.0.0:50030
mapred.jobtracker.job.history.block.size=3145728
mapred.job.tracker.jobhistory.lru.cache.size=5
mapred.jobtracker.jobSchedulable=org.apache.hadoop.mapred.JobSchedulable
mapred.job.tracker=local
mapred.jobtracker.maxtasks.per.job=-1
mapred.jobtracker.nodegroup.aware=false
mapred.job.tracker.persist.jobstatus.active=false
mapred.job.tracker.persist.jobstatus.dir=/jobtracker/jobsInfo
mapred.job.tracker.persist.jobstatus.hours=0
mapred.jobtracker.restart.recover=false
mapred.job.tracker.retiredjobs.cache.size=1000
mapred.jobtracker.taskScheduler=org.apache.hadoop.mapred.JobQueueTaskScheduler
mapred.line.input.format.linespermap=1
mapred.local.dir=${hadoop.tmp.dir}/mapred/local
mapred.local.dir.minspacekill=0
mapred.local.dir.minspacestart=0
mapred.map.max.attempts=4
mapred.map.output.compression.codec=org.apache.hadoop.io.compress.DefaultCodec
mapred.map.tasks=2
mapred.map.tasks.speculative.execution=true
mapred.max.tracker.blacklists=4
mapred.max.tracker.failures=4
mapred.merge.recordsBeforeProgress=10000
mapred.min.split.size=0
mapred.output.compress=false
mapred.output.compression.codec=org.apache.hadoop.io.compress.DefaultCodec
mapred.output.compression.type=RECORD
mapred.queue.default.state=RUNNING
mapred.queue.names=default
mapred.reduce.max.attempts=4
mapred.reduce.parallel.copies=5
mapred.reduce.slowstart.completed.maps=0.05
mapred.reduce.tasks=1
mapred.reduce.tasks.speculative.execution=true
mapred.skip.attempts.to.start.skipping=2
mapred.skip.map.auto.incr.proc.count=true
mapred.skip.map.max.skip.records=0
mapred.skip.reduce.auto.incr.proc.count=true
mapred.skip.reduce.max.skip.groups=0
mapred.submit.replication=10
mapred.system.dir=${hadoop.tmp.dir}/mapred/system
mapred.task.cache.levels=2
mapred.task.profile=false
mapred.task.profile.maps=0-2
mapred.task.profile.reduces=0-2
mapred.task.timeout=600000
mapred.tasktracker.dns.interface=default
mapred.tasktracker.dns.nameserver=default
mapred.tasktracker.expiry.interval=600000
mapred.task.tracker.http.address=0.0.0.0:50060
mapred.tasktracker.indexcache.mb=10
mapred.tasktracker.map.tasks.maximum=2
mapred.tasktracker.reduce.tasks.maximum=2
mapred.task.tracker.report.address=127.0.0.1:0
mapred.task.tracker.task-controller=org.apache.hadoop.mapred.DefaultTaskController
mapred.tasktracker.taskmemorymanager.monitoring-interval=5000
mapred.tasktracker.tasks.sleeptime-before-sigkill=5000
mapred.temp.dir=${hadoop.tmp.dir}/mapred/temp
mapreduce.ifile.readahead.bytes=4194304
mapreduce.ifile.readahead=true
mapreduce.job.acl-modify-job=
mapreduce.job.acl-view-job=
mapreduce.job.complete.cancel.delegation.tokens=true
mapreduce.job.counters.counter.name.max=64
mapreduce.job.counters.group.name.max=128
mapreduce.job.counters.groups.max=50
mapreduce.job.counters.max=120
mapreduce.jobhistory.cleaner.interval-ms=86400000
mapreduce.jobhistory.max-age-ms=2592000000
mapreduce.job.restart.recover=true
mapreduce.job.split.metainfo.maxsize=10000000
mapreduce.jobtracker.staging.root.dir=${hadoop.tmp.dir}/mapred/staging
mapreduce.reduce.input.limit=-1
mapreduce.reduce.shuffle.connect.timeout=180000
mapreduce.reduce.shuffle.maxfetchfailures=10
mapreduce.reduce.shuffle.read.timeout=180000
mapreduce.tasktracker.outofband.heartbeat.damper=1000000
mapreduce.tasktracker.outofband.heartbeat=false
mapred.used.genericoptionsparser=true
mapred.user.jobconf.limit=5242880
mapred.userlog.limit.kb=0
mapred.userlog.retain.hours=24
map.sort.class=org.apache.hadoop.util.QuickSort
net.topology.impl=org.apache.hadoop.net.NetworkTopology
tasktracker.http.threads=40
topology.node.switch.mapping.impl=org.apache.hadoop.net.ScriptBasedMapping
topology.script.number.args=100
webinterface.private.actions=false

3 comments:

Unknown said...

There are lots of information about hadoop have spread around the web, but this is a unique one according to me. The strategy you have updated here will make me to get to the next level in big data. Thanks for sharing this.

Big Data Hadoop Training in Chennai
Best Hadoop Training in Chennai
Best hadoop training institute in chennai

Unknown said...

Amazing Article ! I have bookmarked this article page as i received good information from this. All the best for the upcoming articles. I will be waiting for your new articles. Thank You ! Kindly Visit Us @ Coimbatore Travels | Ooty Travels | Coimbatore Airport Taxi | Coimbatore taxi

anirudh said...


Thank you for sharing the article. The data that you provided in the blog is informative and effective.
Best DevOps Training Institute