Sunday, 16 September 2012

Sample Mapred-site.xml







 
    mapred.jobtracker.taskScheduler
    org.apache.hadoop.mapred.CapacityTaskScheduler
 

 
    mapred.queue.names
    hive,pig,default
 

 
    io.sort.record.percent
    0.70
 

 
    io.sort.spill.percent
    0.70
 

 
    io.sort.factor
    32
 

 
    io.sort.mb
    320
 

 
    mapred.child.java.opts
    -Xmx2048m
 

 
    mapred.child.ulimit
    7172096
    true
 

 
    mapred.job.tracker
    namenode_host_name:8021
 

 
    mapred.job.tracker.handler.count
    64
    true
 

 
    mapred.local.dir
    /hadoop1/mapred/,/hadoop2/mapred/,/hadoop3/mapred/,/hadoop4/mapred/,/hadoop5/mapred/,/hadoop6/mapred/
    true
 

 
    mapred.map.tasks.speculative.execution
    true
 

 
    mapred.reduce.parallel.copies
    20
 

 
    mapred.reduce.tasks
    54
 

 
    mapred.reduce.tasks.speculative.execution
    false
 

 
    mapred.tasktracker.map.tasks.maximum
    8
    true
 

 
    mapred.tasktracker.reduce.tasks.maximum
    4
    true
 

 
    tasktracker.http.threads
    40
    true
 



 
    mapred.output.compression.type
    BLOCK
    If the job outputs are to compressed as SequenceFiles, how should
    they be compressed? Should be one of NONE, RECORD or BLOCK.
    Cloudera's Distribution for Hadoop switches this default to BLOCK
    for better performance.
   

 

 
    mapred.compress.map.output
    true
 

 
    mapred.output.compression.codec
    org.apache.hadoop.io.compress.GzipCodec
 

 
    mapred.map.output.compression.codec
    org.apache.hadoop.io.compress.GzipCodec
 


 
    If users connect through a SOCKS proxy, we don't want their
    SocketFactory settings interfering with the socket factory associated
    with the actual daemons.

    hadoop.rpc.socket.factory.class.default
    org.apache.hadoop.net.StandardSocketFactory
    true
 

 
    hadoop.rpc.socket.factory.class.ClientProtocol
   
    true
 

 
    hadoop.rpc.socket.factory.class.JobSubmissionProtocol
   
    true
 

 
    mapred.job.reuse.jvm.num.tasks
    1
 


 

 
    mapred.capacity-scheduler.queue.default.capacity
    10
    Percentage of the number of slots in the cluster that are
    to be available for jobs in this queue.
   
  
 

 
 
    mapred.capacity-scheduler.queue.default.supports-priority
    true
    If true, priorities of jobs will be taken into
    account in scheduling decisions.
   

 


 
    mapred.capacity-scheduler.queue.default.minimum-user-limit-percent
    25
    Each queue enforces a limit on the percentage of resources
    allocated to a user at any given time, if there is competition for them.
    This user limit can vary between a minimum and maximum value. The former
    depends on the number of users who have submitted jobs, and the latter is
    set to this property value. For example, suppose the value of this
    property is 25. If two users have submitted jobs to a queue, no single
    user can use more than 50% of the queue resources. If a third user submits
    a job, no single user can use more than 33% of the queue resources. With 4
    or more users, no user can use more than 25% of the queue's resources. A
    value of 100 implies no user limits are imposed.
   

 

 
    mapred.capacity-scheduler.queue.default.maximum-initialized-jobs-per-user
    2
    The maximum number of jobs to be pre-initialized for a user
    of the job queue.
   

 

 
 
 
 
 
    mapred.capacity-scheduler.default-supports-priority
    true
    If true, priorities of jobs will be taken into
    account in scheduling decisions by default in a job queue.
   

 

 
 
    mapred.capacity-scheduler.default-minimum-user-limit-percent
    100
    The percentage of the resources limited to a particular user
    for the job queue at any given point of time by default.
   

 


 
    mapred.capacity-scheduler.default-maximum-initialized-jobs-per-user
    5
    The maximum number of jobs to be pre-initialized for a user
    of the job queue.
   

 



 
 
    mapred.capacity-scheduler.init-poll-interval
    5000
    The amount of time in miliseconds which is used to poll
    the job queues for jobs to initialize.
   

 

 
    mapred.capacity-scheduler.init-worker-threads
    5
    Number of worker threads which would be used by
    Initialization poller to initialize jobs in a set of queue.
    If number mentioned in property is equal to number of job queues
    then a single thread would initialize jobs in a queue. If lesser
    then a thread would get a set of queues assigned. If the number
    is greater then number of threads would be equal to number of
    job queues.
   

 


 
 
    mapred.capacity-scheduler.queue.hive.capacity
    70
    Percentage of the number of slots in the cluster that are
    to be available for jobs in this queue.
   

 


 
    mapred.capacity-scheduler.queue.default.supports-priority
    true
    If true, priorities of jobs will be taken into
    account in scheduling decisions.
   

 


 
    mapred.capacity-scheduler.queue.default.minimum-user-limit-percent
    100
    Each queue enforces a limit on the percentage of resources
    allocated to a user at any given time, if there is competition for them.
    This user limit can vary between a minimum and maximum value. The former
    depends on the number of users who have submitted jobs, and the latter is
    set to this property value. For example, suppose the value of this
    property is 25. If two users have submitted jobs to a queue, no single
    user can use more than 50% of the queue resources. If a third user submits
    a job, no single user can use more than 33% of the queue resources. With 4
    or more users, no user can use more than 25% of the queue's resources. A
    value of 100 implies no user limits are imposed.
   

 

 
    mapred.capacity-scheduler.queue.default.maximum-initialized-jobs-per-user
    10
    The maximum number of jobs to be pre-initialized for a user
    of the job queue.
   

 


 
 
    mapred.capacity-scheduler.queue.pig.capacity
    20
    Percentage of the number of slots in the cluster that are
    to be available for jobs in this queue.
   

 


 
    mapred.capacity-scheduler.queue.default.supports-priority
    true
    If true, priorities of jobs will be taken into
    account in scheduling decisions.
   

 


 
    mapred.capacity-scheduler.queue.default.minimum-user-limit-percent
    50
    Each queue enforces a limit on the percentage of resources
    allocated to a user at any given time, if there is competition for them.
    This user limit can vary between a minimum and maximum value. The former
    depends on the number of users who have submitted jobs, and the latter is
    set to this property value. For example, suppose the value of this
    property is 25. If two users have submitted jobs to a queue, no single
    user can use more than 50% of the queue resources. If a third user submits
    a job, no single user can use more than 33% of the queue resources. With 4
    or more users, no user can use more than 25% of the queue's resources. A
    value of 100 implies no user limits are imposed.
   

 

 
    mapred.capacity-scheduler.queue.default.maximum-initialized-jobs-per-user
    5
    The maximum number of jobs to be pre-initialized for a user
    of the job queue.
   

 

No comments:

Post a Comment