首页 > 技术文章 > hadoop集群搭建教程

iupoint 2019-12-22 16:05 原文

1. 相关软件准备:

VMware-workstation-full-15.0.4-12990004.exe  

CentOS-7-x86_64-DVD-1810.iso

jdk-8u231-linux-x64.tar.gz  账号:邮箱,密码首字母大写

hadoop-3.2.1.tar.gz

apache-zookeeper-3.5.6-bin.tar.gz

apache-hive-3.1.2-bin.tar.gz    其他

pyspark-2.4.4.tar.gz

xshell+xftp  xshell破解版  非学生版

2. VMware虚拟机及linux系统安装及网络环境配置,参见之前的博客

3. hadoop安装及配置教程

Hadoop3.2.1版本的环境搭建

(1)查看、打开、关闭防火墙

# CentOS 7默认没有了iptables文件
cd /etc/sysconfig
ls -l
yum install iptables-services
systemctl enable iptables

  systemctl disable iptables.service

(2)权限不足 :  sudo chmod -R 777 /home/hadoop/apps/hadoop-3.2.1/bin/yarn

 

vim /etc/profile    #也可以vim ~/.bashrc   
export JAVA_HOME="/opt/modules/jdk1.8.0_271"
export PATH=$JAVA_HOME/bin:$PATH
export HADOOP_HOME="/opt/modules/hadoop-2.10.1"
export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH
source /etc/profile

mkdir $HADOOP_HOME/tmp
mkdir $HADOOP_HOME/hdfs/data
mkdir $HADOOP_HOME/hdfs/name

#etc/hadoop/core-site.xml:
<configuration>
    <property>
        <name>fs.defaultFS</name>
        <value>hdfs://localhost:9000</value>
    </property>
    <property>
        <name>hadoop.tmp.dir</name>
        <value>$HADOOP_HOME/tmp</value>
    </property>
</configuration>


#etc/hadoop/hdfs-site.xml:
<configuration>
    <property>
        <name>dfs.replication</name>
        <value>1</value>
    </property>
    <property>
        <name>dfs.name.dir</name>
        <value>$HADOOP_HOME/hdfs/name</value>
    </property>
    <property>
        <name>dfs.data.dir</name>
        <value>$HADOOP_HOME/hdfs/data</value>
    </property>
</configuration>


#etc/hadoop/yarn-site.xml:
<configuration>
    <property>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
    </property>
</configuration>


#vim $HADOOP_HOME/etc/hadoop/hadoop-env.sh 添加
export JAVA_HOME=/opt/modules/jdk1.8.0_271  #修改
export HDFS_NAMENODE_USER=root
export HDFS_DATANODE_USER=root
export HDFS_SECONDARYNAMENODE_USER=root
export YARN_RESOURCEMANAGER_USER=root
export YARN_NODEMANAGER_USER=root


#启动 - 初次启动格式化namenode?
bin/hdfs namenode -format
#关闭防火墙
#https://blog.csdn.net/u011170921/article/details/80437937?utm_medium=distribute.pc_relevant.none-task-blog-BlogCommendFromMachineLearnPai2-4.control&depth_1-utm_source=distribute.pc_relevant.none-task-blog-BlogCommendFromMachineLearnPai2-4.control
#./sbin/stop-all.sh
#service iptables stop
#chkconfig iptables off
systemctl disable iptables.service
service iptables status

#关闭 NetworkManager
service  NetworkManager stop
chkconfig NetworkManager off

#./sbin/start-all.sh
sbin/start-dfs.sh
sbin/start-yarn.sh

#检查 9000 端口是否打开并远程访问,一般集群启动就打开了,如为localhost表示只能本地访问
netstat -tlpn   #netstat -ap | grep 9000
#检查是否允许远程访问可使用telnet ip port
telnet 192.168.100.10 9000
#没启动9000,解决措施
https://www.cnblogs.com/woofwoof/p/10261751.html


http://dblab.xmu.edu.cn/blog/2440-2/
http://dblab.xmu.edu.cn/blog/install-mysql/
#安装hive元数据库 - 以mysql为元数据库
-----------------------------------------------------
#安装新版mysql前,需将系统自带的mariadb-lib卸载
#rpm -qa|grep mariadb  #mariadb-libs-5.5.60-1.el7_5.x86_64
#rpm -e --nodeps mariadb-libs-5.5.60-1.el7_5.x86_64
#会自动替换掉这个服务
#sudo tar -zxvf ./apache-hive-3.1.2-bin.tar.gz -C /usr/local   # 解压到/usr/local中
#cd /usr/local/
#sudo mv apache-hive-3.1.2-bin hive       # 将文件夹名改为hive
#sudo chown -R dblab:dblab hive            # 修改文件权限
wget https://mirrors.tuna.tsinghua.edu.cn/mysql/yum/mysql80-community-el7/mysql80-community-release-el7-3.noarch.rpm
rpm -ivh mysql80-community-release-el7-3.noarch.rpm
yum update mysql80-community-release-el7-3.noarch.rpm
yum install -y mysql-server
#启动服务
service mysqld start
netstat -tap | grep mysql  #mysql节点处于LISTEN状态表示启动成功
systemctl status mysqld.service
mysqladmin --version
grep 'temporary password' /var/log/mysqld.log    #查看root默认密码  &aF%C#+4NMo/
#修改密码
vim /etc/my.cnf
[mysqld]
skip-grant-tables
basedir=/var/lib/mysql
datadir=/var/lib/mysql/data
socket=/var/lib/mysql/mysql.sock
character-set-server=utf8
log-error=/var/log/mysqld.log
pid-file=/var/run/mysqld/mysqld.pid
#重启
systemctl restart mysqld   
show databases;
use mysql;
select user,host from user;
update user set host="%" where user="root";
#update user set authentication_string='' where user='root';   #如果直接改不了就先置空  xZg#jK49sIl.
update mysql.user set authentication_string='l123456' where user='root';
#ALTER USER 'root'@'localhost' IDENTIFIED WITH mysql_native_password BY 'l123456';
flush privileges;
quit;
#将这行注释
vim /etc/my.cnf
#skip-grant-tables
#systemctl restart mysqld
service mysqld restart

#忘记密码
ps -ef | grep -i mysql    --查看状态
service mysqld stop   --关闭
vim /etc/my.cnf   --skip-grant-tables
service mysqld start

mysql -u root -p


#创建hive数据库及hive用户
select host, user, plugin from mysql.user;
CREATE DATABASE hive;
USE hive;
CREATE USER 'hive'@'localhost' IDENTIFIED BY 'hive';
CREATE USER 'hive'@'%' IDENTIFIED BY 'hive';
CREATE USER 'root'@'%' IDENTIFIED BY 'iUpoint@123';
#GRANT ALL ON hive.* TO 'hive'@'localhost' IDENTIFIED BY 'hive';  #8.0.19报错
#GRANT ALL privileges ON hive.* TO 'hive'@'%' IDENTIFIED BY 'hive';
GRANT ALL ON *.* TO 'hive'@'localhost';
GRANT ALL ON *.* TO 'hive'@'%';
FLUSH PRIVILEGES;


#安装hive
-----------------------------------------------------
wget https://mirrors.tuna.tsinghua.edu.cn/apache/hive/hive-3.1.2/apache-hive-3.1.2-bin.tar.gz
tar -xzvf apache-hive-2.3.7-bin.tar.gz
#ln -s /opt/modules/apache-hive-2.3.7-bin hive  #类似快捷方式
mv apache-hive-2.3.7-bin hive

vim /etc/profile
export HIVE_HOME="/opt/modules/hive"
export path=$HIVE_HOME/bin:$PATH
source /etc/profile

cd hive/conf
cp hive-default.xml.template hive-site.xml
# 将相对路径${system:java.io.tmpdir{/${system:user.name}改为绝对路径/home/user_name/hive/tmp/hive
mv hive-default.xml hive-default.xml.template 

#mysql jdbc
wget https://mirrors.tuna.tsinghua.edu.cn/mysql/downloads/Connector-J/mysql-connector-java-8.0.22.tar.gz
tar -zxvf mysql-connector-java-8.0.22.tar.gz
cp mysql-connector-java-8.0.22/mysql-connector-java-8.0.22.jar /opt/modules/hive-2.3.7/lib
#rmdir mysql-connector-java-8.0.22   #删除空目录
rm -rf mysql-connector-java-8.0.22/  #递归删除目录

#hive配置文件
#参考:https://www.jianshu.com/p/02ec73752e1c
#grep -n 'datanucleus.autoStartMechanism' conf/hive-site.xml
#grep -n '<name>.*dir</name>' conf/hive-site.xml
#grep -n '<name>.*Connection.*</name>' conf/hive-site.xml
#vim +529 conf/hive-site.xml   #打开后esc状态下输入:n可跳转到指定行
#添加
  <property>
    <name>system:java.io.tmpdir</name>
    <value>/opt/modules/hive/tmp/</value>
  </property>
  <property>
    <name>system:user.name</name>
    <value>hive</value>
  </property>
#修改
<property>
    <name>javax.jdo.option.ConnectionURL</name>
	<!-- createDatabaseIfNotExist=true;characterEncoding=UTF-8; -->
    <value>jdbc:mysql://localhost:3306/hive?useSSL=false</value>
    <description>
      JDBC connect string for a JDBC metastore.
      To use SSL to encrypt/authenticate the connection, provide database-specific SSL flag in the connection URL.
      For example, jdbc:postgresql://myhost/db?ssl=true for postgres database.
    </description>
</property>
 
 <property>
    <name>javax.jdo.option.ConnectionDriverName</name>
    <!-- <value>com.mysql.jdbc.Driver</value> -->
	<value>com.mysql.cj.jdbc.Driver</value>
    <description>Driver class name for a JDBC metastore</description>
  </property>
 
  <property>
    <name>javax.jdo.option.ConnectionUserName</name>
    <value>hive</value>
    <description>Username to use against metastore database</description>
  </property>
 
  <property>
    <name>javax.jdo.option.ConnectionPassword</name>
    <value>hive</value>
    <description>password to use against metastore database</description>
  </property>
 
  <property>
    <name>hive.metastore.warehouse.dir</name>
    <value>/user/hive/warehouse</value>
    <description>location of default database for the warehouse</description>
  </property>

<property>
   <name>datanucleus.autoStartMechanism</name>
   <value>SchemaTable</value>
</property>

<!-- 当配置为true时,则表示会强制metastore的版本信息与hive jar一致 -->
<property>
   <name>hive.metastore.schema.verification</name>
   <value>false</value>
   <description>  
    Enforce metastore schema version consistency.  
    True: Verify that version information stored in metastore matches with one from Hive jars.  Also disable automatic schema migration attempt. Users are required to manully migrate schema after Hive upgrade which ensures proper metastore schema migration. (Default)  
    False: Warn if the version information stored in metastore doesn't match with one from in Hive jars.  
    </description>  
</property>
<property>
    <name>datanucleus.schema.autoCreateAll</name>
    <value>true</value>
 </property>



#配置hive-env.sh
cd $HIVE_HOME/conf
cp hive-env.sh.template hive-env.sh
vim hive-env.sh

# 相应的目录换成自己的
# hadoop 目录
HADOOP_HOME=/opt/modules/hadoop-3.1.4

# hive 配置目录
export HIVE_CONF_DIR=/opt/modules/hive/conf

# hive 的lib目录
export HIVE_AUX_JARS_PATH=/opt/modules/hive/lib



#Could not create ServerSocket on address 0.0.0.0/0.0.0.0:9083.
# Jps查看Runjar进程号,并杀掉
jps
kill -9 进程号

#$HIVE_HOME/lib 中guava的版本应与 $HADOOP_HOME/share/hadoop/common/lib 中相同
schematool -initSchema -dbType mysql
hive --service metastore &

#hadoop测试
查看文件权限
ls -l /opt/data
chmod -R 777 /opt/data   #不加-R改变的是该文件的权限


mkdir /opt/data
cd /opt/data
touch text.txt
vim text.txt
Hello World
Hello Hadoop


cd $HADOOP_HOME/share/hadoop/mapreduce
hadoop jar hadoop-mapreduce-examples-2.10.1.jar wordcount /opt/data/tmp/test.txt /opt/data/output


cd /opt/data/output
cat part-r-00000
Hadoop 1
Hello 2
World 1

  

 

推荐阅读