Hadoop – Single node set-up

< Hadoop – Single node set-up document >

1) Create hadoop group & user

[root@myhostname hpadmin]# groupadd hadoop
[root@myhostname hpadmin]#
[root@myhostname hpadmin]# adduser -g hadoop hadoop

OR

[root@myhostname hpadmin]# adduser -g hadoop hduser
[root@myhostname hpadmin]#

2) Download & Install the stable Hadoop version as below.

You may download it from any Apache site. E.g., Index of /hadoop/common/hadoop-2.3.0

[root@myhostname local]# pwd
/usr/local
[root@myhostname local]#
[root@myhostname local]#
[root@myhostname local]# tar -zxvf hadoop-1.2.1.tar.gz
hadoop-1.2.1/
hadoop-1.2.1/.eclipse.templates/
hadoop-1.2.1/.eclipse.templates/.externalToolBuilders/
hadoop-1.2.1/.eclipse.templates/.launches/
hadoop-1.2.1/bin/

——— blah blah blah ——————

hadoop-1.2.1/src/contrib/ec2/bin/launch-hadoop-slaves
hadoop-1.2.1/src/contrib/ec2/bin/list-hadoop-clusters
hadoop-1.2.1/src/contrib/ec2/bin/terminate-hadoop-cluster
[root@myhostname local]#

[root@myhostname local]# ll -lhtr
total 62M
drwxr-xr-x 2 root root 4.0K Jun 28 2011 src
drwxr-xr-x 2 root root 4.0K Jun 28 2011 sbin
drwxr-xr-x 2 root root 4.0K Jun 28 2011 libexec
drwxr-xr-x 2 root root 4.0K Jun 28 2011 lib64
drwxr-xr-x 2 root root 4.0K Jun 28 2011 lib
drwxr-xr-x 2 root root 4.0K Jun 28 2011 include
drwxr-xr-x 2 root root 4.0K Jun 28 2011 games
drwxr-xr-x 2 root root 4.0K Jun 28 2011 etc
drwxr-xr-x 15 root root 4.0K Jul 22 2013 hadoop-1.2.1
drwxr-xr-x 2 root root 4.0K Mar 26 13:54 bin
drwxr-xr-x 5 root root 4.0K Mar 26 14:21 share
drwxr-xr-x 3 root root 4.0K Apr 11 13:24 home
-rw-r–r– 1 root root 61M Jul 3 02:10 hadoop-1.2.1.tar.gz
[root@myhostname local]#

3) Move the contents of the hadoop installed to hadoop for the easy convention & change the ownership.

[root@myhostname local]#
[root@myhostname local]# mv hadoop-1.2.1 hadoop

[root@myhostname local]# chown -R hduser:hadoop hadoop
[root@myhostname local]#
[root@myhostname local]# cd hadoop
[root@myhostname hadoop]# ll -lhtr
total 8.3M
drwxr-xr-x 3 hduser hadoop 4.0K Jul 22 2013 share
drwxr-xr-x 10 hduser hadoop 4.0K Jul 22 2013 contrib
drwxr-xr-x 9 hduser hadoop 4.0K Jul 22 2013 webapps
-rw-rw-r– 1 hduser hadoop 377K Jul 22 2013 hadoop-tools-1.2.1.jar
-rw-rw-r– 1 hduser hadoop 3.0M Jul 22 2013 hadoop-test-1.2.1.jar
-rw-rw-r– 1 hduser hadoop 417 Jul 22 2013 hadoop-minicluster-1.2.1.jar
-rw-rw-r– 1 hduser hadoop 140K Jul 22 2013 hadoop-examples-1.2.1.jar
-rw-rw-r– 1 hduser hadoop 4.1M Jul 22 2013 hadoop-core-1.2.1.jar
-rw-rw-r– 1 hduser hadoop 414 Jul 22 2013 hadoop-client-1.2.1.jar
-rw-rw-r– 1 hduser hadoop 6.7K Jul 22 2013 hadoop-ant-1.2.1.jar
-rw-rw-r– 1 hduser hadoop 1.4K Jul 22 2013 README.txt
-rw-rw-r– 1 hduser hadoop 101 Jul 22 2013 NOTICE.txt
-rw-rw-r– 1 hduser hadoop 14K Jul 22 2013 LICENSE.txt
-rw-rw-r– 1 hduser hadoop 11K Jul 22 2013 ivy.xml
-rw-rw-r– 1 hduser hadoop 483K Jul 22 2013 CHANGES.txt
drwxr-xr-x 4 hduser hadoop 4.0K Jul 22 2013 c++
-rw-rw-r– 1 hduser hadoop 119K Jul 22 2013 build.xml
drwxr-xr-x 2 hduser hadoop 4.0K Jul 3 02:12 conf
drwxr-xr-x 6 hduser hadoop 4.0K Jul 3 02:12 docs
drwxr-xr-x 2 hduser hadoop 4.0K Jul 3 02:12 ivy
drwxr-xr-x 16 hduser hadoop 4.0K Jul 3 02:12 src
drwxr-xr-x 5 hduser hadoop 4.0K Jul 3 02:12 lib
drwxr-xr-x 2 hduser hadoop 4.0K Jul 3 02:12 sbin
drwxr-xr-x 2 hduser hadoop 4.0K Jul 3 02:12 libexec
drwxr-xr-x 2 hduser hadoop 4.0K Jul 3 02:12 bin
[root@myhostname hadoop]#

4) Check the whether the Java is installed/not, if not you’ll have to do it.

Java^TM 1.6.x, preferably from Sun, must be installed.
ssh must be installed and sshd must be running to use the Hadoop scripts that manage remote Hadoop daemons.

[root@myhostname hadoop]# which java
/usr/java/jdk1.8.0_05/bin/java
[root@myhostname hadoop]#
[root@myhostname hadoop]#
[root@myhostname hadoop]# java -version
java version “1.8.0_05″
Java(TM) SE Runtime Environment (build 1.8.0_05-b13)
Java HotSpot(TM) 64-Bit Server VM (build 25.5-b02, mixed mode)
[root@myhostname hadoop]#

To know the existing Java path :

[root@myhostname hadoop]# JAVAPATH=”$(readlink -f $(which java))”
[root@myhostname hadoop]# echo “$JAVAPATH”
/usr/java/jdk1.8.0_05/bin/java

5) Set the Hadoop & Java’s HOME and PATH variables as below in the .bashrc as below.

[root@myhostname hadoop]# cat ~/.bashrc
# .bashrc

# User specific aliases and functions

alias rm=’rm -i’
alias cp=’cp -i’
alias mv=’mv -i’

# Source global definitions
if [ -f /etc/bashrc ]; then
. /etc/bashrc
fi

### Hadoop & Java Environment Variables ####
###————————————–####
export HADOOP_HOME=/usr/local/hadoop
export JAVA_HOME=/usr/java/jdk1.8.0_05

export PATH=$PATH:$HADOOP_HOME/bin
export PATH=$PATH:$JAVA_HOME/bin
#############################################

### Kerberos Environment Variables ####
###——————————–####
export KRB5_KTNAME=/opt/mongodb/keytab/mongodb.keytab
export KRB5_TRACE=/tmp/krb5trace_mongo_`date +”%d-%m-%y-%H-%Ms”`.log
#######################################

### MongoDB Environment Variables ####
###——————————-####
export PATH=$PATH:/home/hpadmin/
export PATH=$PATH:/opt/mongodb/bin
export CLASSPATH=”/home/hpadmin::/java/mongo-java-driver-2.12.2.jar”
######################################
[root@myhostname hadoop]#
[root@myhostname hadoop]#
[root@myhostname hadoop]#
[root@myhostname hadoop]# source ~/.bashrc
[root@myhostname hadoop]# env | egrep “HADOOP|JAVA”
HADOOP_HOME=/usr/local/hadoop
JAVA_HOME=/usr/java/jdk1.8.0_05
[root@myhostname hadoop]#

6) Set the required configuration files as below.

[root@myhostname conf]# cat core-site.xml
<?xml version=”1.0″?>
<?xml-stylesheet type=”text/xsl” href=”configuration.xsl”?>

<!– Put site-specific property overrides in this file. –>

<configuration>

<property>
<name>fs.default.name</name>
<value>hdfs://myhostname3:9000</value>
</property>

</configuration>
[root@myhostname conf]#
[root@myhostname conf]# cat mapred-site.xml
<?xml version=”1.0″?>
<?xml-stylesheet type=”text/xsl” href=”configuration.xsl”?>

<!– Put site-specific property overrides in this file. –>

<configuration>
<property>
<name>mapred.job.tracker</name>
<value>myhostname:9001</value>
</property>
</configuration>
[root@myhostname conf]#
[root@myhostname conf]#
[root@myhostname conf]# cat hdfs-site.xml
<?xml version=”1.0″?>
<?xml-stylesheet type=”text/xsl” href=”configuration.xsl”?>

<!– Put site-specific property overrides in this file. –>

<configuration>

<property>
<name>mapred.job.tracker</name>
<value>myhostname:9001</value>
</property>

</configuration>
[root@myhostname conf]#
[root@myhostname conf]# head hadoop-env.sh
# Set Hadoop-specific environment variables here.

# The only required environment variable is JAVA_HOME. All others are
# optional. When running a distributed configuration it is best to
# set JAVA_HOME in this file, so that it is correctly defined on
# remote nodes.
# The java implementation to use. Required.

export JAVA_HOME=/usr/java/jdk1.8.0_05
[root@myhostname conf]#

7) Set-up the SSH as below/ using the below link if you have any issues with Lab boxes.

https://collaborate.citi.net/docs/DOC-122509

[root@myhostname ~]# ssh-keygen -t rsa
Generating 2048-bit rsa key pair
7 o.oOo.oOo..o
Key generated.
2048-bit rsa, root@myhostname, Thu Jul 03 2014 03:51:15 -0400
Passphrase :
Again :
Key is stored with NULL passphrase.
(You can ignore the following warning if you are generating hostkeys.)
This is not recommended.
Don’t do this unless you know what you’re doing.
If file system protections fail (someone can access the keyfile),
or if the super-user is malicious, your key can be used without
the deciphering effort.
Private key saved to /root/.ssh2/id_rsa_2048_a
Public key saved to /root/.ssh2/id_rsa_2048_a.pub
[root@myhostname ~]#
[root@myhostname ~]#
[root@myhostname ~]#
[root@myhostname ~]# ll -lhtr
total 0
[root@myhostname ~]# cd .ssh2/
[root@myhostname .ssh2]#
[root@myhostname .ssh2]# ll -lhtr
total 12K
-rw——- 1 root root 512 Jul 3 03:51 random_seed
-rw-r–r– 1 root root 532 Jul 3 03:51 id_rsa_2048_a.pub
-rw——- 1 root root 1.5K Jul 3 03:51 id_rsa_2048_a
[root@myhostname .ssh2]#

For more details on two way SSH, you can refer SSH Tectia is not working due to improper build in the Lab/Test VMs !!

8) Install rsync if not already.

[root@myhostname hpadmin]# yum install rsync
soe6local | 951 B 00:00
soe6products | 951 B 00:00
soe6products/primary | 349 kB 00:00
soe6products 980/980
soe6u4 | 951 B 00:00
Setting up Install Process
Resolving Dependencies
–> Running transaction check
—> Package rsync.x86_64 0:3.0.6-9.el6 will be installed
–> Finished Dependency Resolution

Dependencies Resolved

============================================================================================================================================================================================================================================
Package Arch Version Repository Size
============================================================================================================================================================================================================================================
Installing:
rsync x86_64 3.0.6-9.el6 soe6u4 334 k

Transaction Summary
============================================================================================================================================================================================================================================
Install 1 Package(s)

Total download size: 334 k
Installed size: 682 k
Is this ok [y/N]: y
Downloading Packages:
rsync-3.0.6-9.el6.x86_64.rpm | 334 kB 00:00
Running rpm_check_debug
Running Transaction Test
Transaction Test Succeeded
Running Transaction
Installing : rsync-3.0.6-9.el6.x86_64 1/1
Verifying : rsync-3.0.6-9.el6.x86_64 1/1

Installed:
rsync.x86_64 0:3.0.6-9.el6

Complete!
[root@myhostname hpadmin]#

9) Formatting the HDFS File system via namenode as below.

[root@myhostname conf]# hadoop namenode -format
14/07/03 02:55:29 INFO namenode.NameNode: STARTUP_MSG:
/************************************************************
STARTUP_MSG: Starting NameNode
STARTUP_MSG: host = myhostname/10.40.87.36
STARTUP_MSG: args = [-format]
STARTUP_MSG: version = 1.2.1
STARTUP_MSG: build = https://svn.apache.org/repos/asf/hadoop/common/branches/branch-1.2 -r 1503152; compiled by ‘mattf’ on Mon Jul 22 15:23:09 PDT 2013
STARTUP_MSG: java = 1.8.0_05
************************************************************/
14/07/03 02:55:29 INFO util.GSet: Computing capacity for map BlocksMap
14/07/03 02:55:29 INFO util.GSet: VM type = 64-bit
14/07/03 02:55:29 INFO util.GSet: 2.0% max memory = 932184064
14/07/03 02:55:29 INFO util.GSet: capacity = 2^21 = 2097152 entries
14/07/03 02:55:29 INFO util.GSet: recommended=2097152, actual=2097152
14/07/03 02:55:29 INFO namenode.FSNamesystem: fsOwner=root
14/07/03 02:55:29 INFO namenode.FSNamesystem: supergroup=supergroup
14/07/03 02:55:29 INFO namenode.FSNamesystem: isPermissionEnabled=true
14/07/03 02:55:29 INFO namenode.FSNamesystem: dfs.block.invalidate.limit=100
14/07/03 02:55:29 INFO namenode.FSNamesystem: isAccessTokenEnabled=false accessKeyUpdateInterval=0 min(s), accessTokenLifetime=0 min(s)
14/07/03 02:55:29 INFO namenode.FSEditLog: dfs.namenode.edits.toleration.length = 0
14/07/03 02:55:29 INFO namenode.NameNode: Caching file names occuring more than 10 times
14/07/03 02:55:30 INFO common.Storage: Image file /tmp/hadoop-root/dfs/name/current/fsimage of size 110 bytes saved in 0 seconds.
14/07/03 02:55:30 INFO namenode.FSEditLog: closing edit log: position=4, editlog=/tmp/hadoop-root/dfs/name/current/edits
14/07/03 02:55:30 INFO namenode.FSEditLog: close success: truncate to 4, editlog=/tmp/hadoop-root/dfs/name/current/edits
14/07/03 02:55:30 INFO common.Storage: Storage directory /tmp/hadoop-root/dfs/name has been successfully formatted.
14/07/03 02:55:30 INFO namenode.NameNode: SHUTDOWN_MSG:
/************************************************************
SHUTDOWN_MSG: Shutting down NameNode at myhostname/10.40.87.36
************************************************************/
[root@myhostname conf]#

10 ) Stat the services with start-all script as below

[root@myhostname bin]# pwd
/root/hadoop-1.2.1/bin

[root@myhostname bin]# ./start-all.sh

starting namenode, logging to /root/hadoop-1.2.1/libexec/../logs/hadoop-root-namenode-myhostname.out
localhost: Red Hat Enterprise Linux Server release 6.4 (Santiago)
localhost: Kernel \r on an \m
localhost:
localhost: starting datanode, logging to /root/hadoop-1.2.1/libexec/../logs/hadoop-root-datanode-myhostname.out
localhost: Red Hat Enterprise Linux Server release 6.4 (Santiago)
localhost: Kernel \r on an \m
localhost:
localhost:
localhost: starting secondarynamenode, logging to /root/hadoop-1.2.1/libexec/../logs/hadoop-root-secondarynamenode-myhostname.out
starting jobtracker, logging to /root/hadoop-1.2.1/libexec/../logs/hadoop-root-jobtracker-myhostname.out
localhost: Red Hat Enterprise Linux Server release 6.4 (Santiago)
localhost: Kernel \r on an \m
localhost:
localhost: starting tasktracker, logging to /root/hadoop-1.2.1/libexec/../logs/hadoop-root-tasktracker-myhostname.out
[root@myhostname bin]#

To check the currently running hadoop process

[root@myhostname bin]# jps
13079 DataNode
13513 TaskTracker
12922 NameNode
13627 Jps
13227 SecondaryNameNode
28685 ResourceManager
13358 JobTracker
6734 — process information unavailable
[root@myhostname bin]#

11) To stop services

[root@myhostname bin]# ./stop-all.sh

stopping jobtracker
localhost: Red Hat Enterprise Linux Server release 6.4 (Santiago)
localhost: Kernel \r on an \m
localhost:
localhost: no tasktracker to stop
stopping namenode
localhost: Red Hat Enterprise Linux Server release 6.4 (Santiago)
localhost: Kernel \r on an \m
localhost:
localhost:
localhost: no datanode to stop
localhost: Red Hat Enterprise Linux Server release 6.4 (Santiago)
localhost: Kernel \r on an \m
localhost:
localhost:
localhost: no secondarynamenode to stop
[root@myhostname bin]#

The Next post will be building Hadoop Cluster.
Please feel free to post your suggestions/corrections to above post.

Sources & Useful Links related to Hadoop :-
—————————————————–

http://mirror.cogentco.com/pub/apache/hadoop/common/stable2/
ftp://mirror.reverse.net/pub/apache/hadoop/common/hadoop-1.2.1/hadoop-1.2.1.tar.gz
http://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/SingleNodeSetup.html
http://www.michael-noll.com/tutorials/running-hadoop-on-ubuntu-linux-single-node-cluster/

  • Ask Question