本次安装了四台虚拟机:hadoop001、hadoop002、hadoop003、hadoop004,安装过程略过
rpm -qa | grep -i java | xargs -n1 rpm -e --nodeps
systemctl stop firewalld
systemctl disable firewalld.service
/etc/sudoers
## Allow root to run any commands anywhere
root ALL=(ALL) ALL
## Allows people in group wheel to run all commands
%wheel ALL=(ALL) ALL
node01 ALL=(ALL) ALL # 需要添加root权限的用户
mkdir /opt/software # 安装包存放位置
mkdir /opt/module # 程序安装目录
chown -R node01:node01 /opt/software # 修改新建目录所属用户以及用户组
chown -R node01:node01 /opt/module # 修改新建目录所属用户以及用户组
# jdk官网需要登录验证,这里不做记录
wget --no-check-certificate https://dlcdn.apache.org/zookeeper/zookeeper-3.7.1/apache-zookeeper-3.7.1-bin.tar.gz
wget --no-check-certificate https://dlcdn.apache.org/hadoop/common/hadoop-3.2.4/hadoop-3.2.4.tar.gz
tar -zxf apache-zookeeper-3.7.1-bin.tar.gz -C /opt/module
tar -zxf hadoop-3.2.4.tar.gz -C /opt/module
vim /etc/profile.d/my_env.sh
export JAVA_HOME=/opt/module/jdk1.8.0_361
export PATH=${JAVA_HOME}/bin:$PATH
source /etc/profile
java -version
java version "1.8.0_361"
Java(TM) SE Runtime Environment (build 1.8.0_361-b09)
Java HotSpot(TM) 64-Bit Server VM (build 25.361-b09, mixed mode)
vim /etc/profile.d/my_env.sh
export HADOOP_HOME=/opt/module/hadoop-3.2.4
export PATH=${HADOOP_HOME}/bin:${HADOOP_HOME}/sbin:${PATH}
source /etc/profile
hadoop version
Hadoop 3.2.4
Source code repository Unknown -r 7e5d9983b388e372fe640f21f048f2f2ae6e9eba
Compiled by ubuntu on 2022-07-12T11:58Z
Compiled with protoc 2.5.0
From source with checksum ee031c16fe785bbb35252c749418712
This command was run using /opt/module/hadoop-3.2.4/share/hadoop/common/hadoop-common-3.2.4.jar
# 四台主机上分别执行以下命令
ssh-keygen -t rsa
ssh-copy-id -i ~/.ssh/id_rsa.pub node01@hadoop001
ssh-copy-id -i ~/.ssh/id_rsa.pub node01@hadoop002
ssh-copy-id -i ~/.ssh/id_rsa.pub node01@hadoop003
ssh-copy-id -i ~/.ssh/id_rsa.pub node01@hadoop004
核心配置文件 core-site.xml
<property>
<name>fs.defaultFSname>
<value>hdfs://myclustervalue>
property>
<property>
<name>hadoop.tmp.dirname>
<value>/var/doudou/hadoop/havalue>
property>
<property>
<name>hadoop.http.staticuser.username>
<value>doudouvalue>
property>
<property>
<name>ha.zookeeper.quorumname>
<value>hadoop004:2181,hadoop002:2181,hadoop003:2181value>
property>
HDFS配置文件 hdfs-site.xml
<property>
<name>dfs.replicationname>
<value>3value>
property>
<property>
<name>dfs.nameservicesname>
<value>myclustervalue>
property>
<property>
<name>dfs.ha.namenodes.myclustername>
<value>nn1,nn2value>
property>
<property>
<name>dfs.namenode.rpc-address.mycluster.nn1name>
<value>hadoop001:8020value>
property>
<property>
<name>dfs.namenode.rpc-address.mycluster.nn2name>
<value>hadoop002:8020value>
property>
<property>
<name>dfs.namenode.shared.edits.dirname>
<value>qjournal://hadoop001:8485;hadoop002:8485;hadoop003:8485/myclustervalue>
property>
<property>
<name>dfs.client.failover.proxy.provider.myclustername>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvidervalue>
property>
<property>
<name>dfs.ha.fencing.methodsname>
<value>sshfencevalue>
property>
<property>
<name>dfs.ha.fencing.ssh.private-key-filesname>
<value>/root/.ssh/id_dsavalue>
property>
<property>
<name>dfs.journalnode.edits.dirname>
<value>/var/doudou/hadoop/ha/jnnvalue>
property>
<property>
<name>dfs.ha.automatic-failover.enabledname>
<value>truevalue>
property>
YARM配置文件 yarn-site.xml
<property>
<name>yarn.nodemanager.aux-servicesname>
<value>mapreduce_shufflevalue>
property>
<property>
<name>yarn.resourcemanager.ha.enabledname>
<value>truevalue>
property>
<property>
<name>yarn.resourcemanager.cluster-idname>
<value>rmhacluster1value>
property>
<property>
<name>yarn.resourcemanager.ha.rm-idsname>
<value>rm1,rm2value>
property>
<property>
<name>yarn.resourcemanager.hostname.rm1name>
<value>hadoop003value>
property>
<property>
<name>yarn.resourcemanager.hostname.rm2name>
<value>hadoop004value>
property>
<property>
<name>yarn.resourcemanager.webapp.address.rm1name>
<value>hadoop003:8088value>
property>
<property>
<name>yarn.resourcemanager.webapp.address.rm2name>
<value>hadoop004:8088value>
property>
<property>
<name>yarn.resourcemanager.zk-addressname>
<value>hadoop002:2181,hadoop003:2181,hadoop004:2181value>
property>
<property>
<name>yarn.application.classpathname>
<value>/opt/hadoop-3.2.4/etc/hadoop:/opt/hadoop-3.2.4/share/hadoop/common/lib/*:/opt/hadoop-3.2.4/share/hadoop/common/*:/opt/hadoop-3.2.4/share/hadoop/hdfs:/opt/hadoop-3.2.4/share/hadoop/hdfs/lib/*:/opt/hadoop-3.2.4/share/hadoop/hdfs/*:/opt/hadoop-3.2.4/share/hadoop/mapreduce/lib/*:/opt/hadoop-3.2.4/share/hadoop/mapreduce/*:/opt/hadoop-3.2.4/share/hadoop/yarn:/opt/hadoop-3.2.4/share/hadoop/yarn/lib/*:/opt/hadoop-3.2.4/share/hadoop/yarn/*value>
property>
<property>
<name>yarn.log-aggregation-enablename>
<value>truevalue>
property>
<property>
<name>yarn.log.server.urlname>
<value>http://hadoop001:19888/jobhistory/logsvalue>
property>
<property>
<name>yarn.log-aggregation.retain-secondsname>
<value>604800value>
property>
MapReduce配置文件 mapred-site.xml
<property>
<name>mapreduce.framework.namename>
<value>yarnvalue>
property>
<property>
<name>mapreduce.jobhistory.addressname>
<value>hadoop001:10020value>
property>
<property>
<name>mapreduce.jonhistory.webapp.addressname>
<value>hadoop001:19888value>
property>
datanode节点 workers
# hadoop-3.2.4/etc/hadoop/workers
hadoop001
hadoop002
hadoop003
hadoop004
xsync /opt/module/hadoop-3.2.4/etc/hadoop/mapred-site.xml
myzookeeper.sh start
# 初次启动时需格式化namenode
# hadoop001、hadoop002、hadoop003三台节点上启动journalnode
hdfs --daemon start journalnode
# 在hadoop001或者hadoop002上格式化namenode
hdfs namenode -format
# 启动格式化节点的namenode
hdfs --daemon start namenode
# 在另一台namenode节点上同步数据
hdfs namenode -bootstrapStandby
# 初始化zookeeper节点数据
hdfs zkfc -formatZK
# 启动hdfs、yarn、historyserver服务
start-dfs.sh
start-yarn.sh
mapred --daemon start historyserver
# 后续启动集群
# 先启动zookeeper集群
myzookeeper.sh start
# 在启动hadoop集群
myhadoop.sh
WEB端查看HDFS的namenode 【http://hadoop001:9870】
WEB端查看YARN的ResourceManager 【http://hadoop003:8088】
WEB端查看JobHistiory 【http://hadoop001:19888】
#!/bin/bash
#1.校验参数个数
if [ $# -lt 1 ]
then
echo Not Enough Arguement!
exit;
fi
# 遍历指定机器&#xff0c;进行数据发送
for host in hadoop001 hadoop002 hadoop003 hadoop004
do
echo &#61;&#61;&#61;&#61;&#61;&#61;&#61;&#61;&#61;&#61;&#61;&#61;&#61;&#61;&#61; $host &#61;&#61;&#61;&#61;&#61;&#61;&#61;&#61;&#61;&#61;&#61;&#61;&#61;&#61;&#61;
for file in $&#64;
do
# 判断文件是否存在
if [ -e $file ]
then
# 获取父目录
pdir&#61;$(cd -P $(dirname $file); pwd)
# 获取当前文件名称
fame&#61;$(basename $file)
ssh $host "mkdir -p $pdir"
rsync -av $pdir/$fname $host:$pdir
else
echo $file dose not exists!
fi
done
done
#!/bin/bash
for host in hadoop001 hadoop002 hadoop003 hadoop004
do
echo "&#61;&#61;&#61;&#61;&#61;&#61;&#61;&#61;&#61;&#61; $host &#61;&#61;&#61;&#61;&#61;&#61;&#61;&#61;&#61;"
ssh $host jps
done
#!/bin/bash
if [ $# -lt 1 ]
then
echo "No Args Input..."
exit;
fi
case $1 in
"start")
echo " &#61;&#61;&#61;&#61;&#61;&#61;&#61;&#61;&#61;&#61; 启动zookeeper集群 &#61;&#61;&#61;&#61;&#61;&#61;&#61;&#61;&#61;&#61;"
for host in hadoop002 hadoop003 hadoop004
do
echo "----------- --------- $host ----------- --------"
ssh $host zkServer.sh start
done
;;
"stop")
echo " &#61;&#61;&#61;&#61;&#61;&#61;&#61;&#61;&#61;&#61; 停止zookeeper集群 &#61;&#61;&#61;&#61;&#61;&#61;&#61;&#61;&#61;&#61;"
for host in hadoop002 hadoop003 hadoop004
do
echo "------------------- - $host ------------------- "
ssh $host zkServer.sh stop
done
;;
"status")
echo " &#61;&#61;&#61;&#61;&#61;&#61;&#61;&#61;&#61;&#61; zookeeper集群状态 &#61;&#61;&#61;&#61;&#61;&#61;&#61;&#61;&#61;&#61;"
for host in hadoop002 hadoop003 hadoop004
do
echo "------------------- - $host ----------- --------"
ssh $host zkServer.sh status
done
;;
*)
echo "Input Args Error..."
;;
esac
#!/bin/bash
if [ $# -lt 1 ]
then
echo "No Args Input..."
exit;
fi
case $1 in
"start")
echo " &#61;&#61;&#61;&#61;&#61;&#61;&#61;&#61;&#61;&#61; 启动hadoop集群 &#61;&#61;&#61;&#61;&#61;&#61;&#61;&#61;&#61;&#61;"
echo " ------------- 启动hdfs -------------"
ssh hadoop001 "/opt/hadoop-3.2.4/sbin/start-dfs.sh"
echo " ------------- 启动yarn -------------"
ssh hadoop003 "/opt/hadoop-3.2.4/sbin/start-yarn.sh"
echo " -------- 启动historyserver ---------"
ssh hadoop001 "/opt/hadoop-3.2.4/bin/mapred --daemon start historyserver"
;;
"stop")
echo "&#61;&#61;&#61;&#61;&#61;&#61;&#61;&#61;&#61;&#61; 关闭hadoop集群 &#61;&#61;&#61;&#61;&#61;&#61;&#61;&#61;&#61;&#61;"
echo " ------------ 关闭historyserver ----------"
ssh hadoop001 "/opt/hadoop-3.2.4/bin/mapred --daemon stop historyserver"
echo " ------------- 关闭yarn ------------ "
ssh hadoop003 "/opt/hadoop-3.2.4/sbin/stop-yarn.sh"
echo " ------------ 关闭hadoop ----------- "
ssh hadoop001 "/opt/hadoop-3.2.4/sbin/stop-dfs.sh"
;;
*)
echo "Input Args Error..."
;;
esac