首页 > 技术文章 > 离线数仓-Hive 数据抽取脚本

javapand 2021-01-22 15:40 原文

脚本作用:工具脚本,抽取postgres数据的数据到Hive的不同库和表

  1 #!/bin/bash
  2 queue="queue_lsd sj"
  3 NUM MAPPERS = 1
  4 OVERWRITE=true
  5 num_mappers=$NUM MAPPERS
  6 overwrite=OVERNRITE
  7 printUsage()
  8 {
  9   echo "抽取GP数据到HIVE表"
 10   echo "用法:$0 -s source-schema -a source-table  [-e columns][-where] -d target-da -t target-table [-k hive-partition-key] [-v hive-partition-value]
 11   echo "-s source-schema     :GP源模式schema"
 12   echo "-a source-table      :GP源表table"
 13   echo "-c columns            :[可选]抽取的烈烈之則用景一亮默认:所有列"
 14   echo "-w where              :[可选]GP源表过滤语句"
 15   echo "-d target-dk          目标库"
 16   echo "-t target-table       目标表"
 17   echo "-k hive-partition-key   :[可选]表分区"
 18   echo "-v hive-partition-value :[可选]分区值"
 19   echo "-r overwrite            :[可选]是否清空具标表/分区, true/false"
 20   echo "-q queue                :[可选]执行队烈,默认队烈q"
 21   echo "-p split-by             :[可选] 行验分学学段置必须为全数字, 可以采用加工后的字段。执行mapps数为1时 可选1并行抽取切分宇段"
 22   echo "-m num-mappers          :[可选] 执行mapper数,待抽取数据量较少时建议使用,建议使用1 默认是${num_mappers}"
 23 }
 24 
 25 1og()
 26 {
 27   now_time=`date +%Y-%m-%d\  %H:%M:%s`
 28   echo "[$now_time]$@"|tee -a $log_dir$log_file
 29 }
 30 export LANG=en_US.UTF-8
 31 
 32 SUCC=0
 33 FAIL=1
 34 APP_LOG_HOME="/home/ap/nas/test home/logs"
 35 APP_TMP_HOME=/home/ap/nas/test home/run"
 36 connect="jdbs.postgresgl://128196116.69:5432/sordbp13"
 37 username=sor_et1 
 38 password=sor_etl
 39 where=""
 40 hive_partition_key=""
 41 hive_partition_value=""
 42 columns=""
 43 
 44 while getopts ":s:a:w:d:t:k:v:q:p:m:c" opt
 45 do
 46  case $opt in
 47         s)
 48         source_schema=$OPTARG
 49         ;;
 50         a)
 51         source_table=$OPTARG
 52         ;;
 53         w)
 54         where=$OPTARG
 55         ;;
 56         d)
 57         target_db=$OPTARG
 58         ;;
 59         t)
 60         target_table=$OPTARG
 61         ;;
 62         k)
 63         hive_partition_key=$OPTARG
 64         ;;
 65         v)
 66         hive_partition_value=$OPTARG
 67         ;;
 68         r)
 69         overwrite=$OPTARG
 70         ;;
 71         q)
 72         queue=$OPTARG
 73         ;;
 74         p)
 75         split_by=$OPTARG
 76         ;;
 77         m)
 78         num_mappers=$OPTARG
 79         ;;
 80         c)
 81         columns=$OPTARG
 82         ;;
 83   esac
 84 done
 85 ##输入合规校验##
 86 if [ -z $source_schema ] || [ -z $source_table ] || [ -z $target_db ] || [ -z $target_table ]; then
 87   printUsage
 88   exit $FAIL
 89 fi
 90 
 91 if [ $num_mappers != '1' ] && [ -z $split_by ]; then
 92   printUsage
 93   exit $FAIL
 94 fi
 95 
 96 ##创建目录
 97 mkdir -p ${APP_TMP_HOME}
 98 ls_date=`date +%Y%m%d`
 99 log_dir=${APP_LOG_HOME}/$ls_date/
100 log_file="load.$target_db.$target_table.$ls_date.log"
101 mkdir -p $log_dir
102 
103 #sql="select count(*) from ${target_db}.${target_table} limit 1"
104 
105 #log ${sql}
106 #sqlfile="${APP_TMP_HOME}/truncate.$target_db.$target_table.$ls_date.sql"
107 #echo ${sql}>$sqlfile
108 
109 #COMMAND="beeline -n \"\" -p \"\" -f $sqlfile --silent=false --force=false"
110 #log $COMMAND
111 #$COMMAND |tee -a $log_dir$log_file
112 #EXIT_CODE=${PIPESTATUS[0]}
113 #rm $sqlfile
114 
115 #if [ $EXIT_CODE != $SUCC ];then
116 #   log "truncate table  failed.return ${FAIL}"
117 #   exit ${FAIL}
118 #fi
119 
120 cd ${APP_TMP_HOME}
121 
122 COMMAND="sqoop import -Dmapred.job.queue.name=${queue} --connect ${connect} --driver org.postgresql.Driver --username ${username} --password ${password} 
123 --table ${source_schema}.${source_table} --hcatalog-database ${target_db} --hcatalog-table ${target_table} --hive-overwrite"
124 ##条件过滤###
125 if [ $num_mappers != '1' ];then
126   COMMAND=$COMMAND" --split-by \"cast(${split_by} as integer) \""
127 fi
128 COMMAND=$COMMAND" -m ${num_mappers} "
129 if [ -z "$where" ];then
130    echo "进入where"
131    echo $where
132    COMMAND=$COMMAND" --where \"${where}\""
133    echo $COMMAND
134 fi
135 
136 if [ $columns ];then
137    COMMAND=$COMMAND" --columns $columns"
138 fi
139 
140 if [ ! -z $hive_partition_key ] && [ ! -z $hive_partition_value ]; then
141    COMMAND=$COMMAND" --hive_partition_key ${hive_partition_key} --hive_partition_value ${hive_partition_value}"
142 fi
143 commandfile="${APP_TMP_HOME}/sqoop.$target_db.$target_table.$ls_date.sh"
144 log $COMMAND
145 echo -e "$COMMAND |tee -a $log_dir$log_file\n">$commandfile
146 echo "exit \${PIPESTATUS[0]}">>$commandfile
147 sh $commandfile
148 EXIT_CODE=$?
149 #rm ${source_schema}_${source_table}.java
150 rm $commandfile
151 if [ $EXIT_CODE != $SUCC ]; then
152   log "load table failed.return ${FAIL}"
153   exit ${FAIL}
154 else
155   log "load table success.return ${SUCC}"
156    exit ${SUCC}  
157 fi
shell 脚本

 

推荐阅读