sqoop迁移mysql数据到hive中
2018-01-29 17:31
531 查看
一、先创建sqoop job,使用增量导入,python脚本如下:
#!/usr/bin/python
import os
import sys
import glob
import time
import datetime
##import all row fields --check-column create_time
def job_group0(name):
cmd = 'sqoop job --create %s -- import -m 1 --connect "jdbc:mysql://192.168.76.12:3306/smart_hardware?useSSL=false&user=phidoop&password=phidoop" --table %s --where "create_time < current_date()" --hive-import --hive-database
phi_health --hive-table %s --incremental append --check-column create_time --last-value \'1900-01-01\' ' % (name,name,name)
run = os.system(cmd)
##import all row fields --check-column create_time
def job_group1(num1,num2,name):
for i in range(num1,num2):
cmd = 'sqoop job --create %s_%s -- import -m 1 --connect "jdbc:mysql://192.168.76.12:3306/smart_hardware?useSSL=false&user=phidoop&password=phidoop" --table %s_%s --where "create_time < current_date()" --hive-import --hive-database
phi_health --hive-table %s --incremental append --check-column create_time --last-value \'1900-01-01\' ' % (name,i,name,i,name)
print cmd
run = os.system(cmd)
##import all row fields --check-column date
def job_group2(name):
cmd = 'sqoop job --create %s -- import -m 1 --connect "jdbc:mysql://192.168.76.12:3306/smart_hardware?useSSL=false&user=phidoop&password=phidoop" --table %s --where "date < current_date()" --hive-import --hive-database phi_health
--hive-table %s --incremental append --check-column date --last-value \'1900-01-01\' ' % (name,name,name)
run = os.system(cmd)
##import all row fields --check-column date
def job_group3(num1,num2,name):
for i in range(num1,num2):
cmd = 'sqoop job --create %s_%s -- import -m 1 --connect "jdbc:mysql://192.168.76.12:3306/smart_hardware?useSSL=false&user=phidoop&password=phidoop" --table %s_%s --where "date < current_date()" --hive-import --hive-database
phi_health --hive-table %s --incremental append --check-column date --last-value \'1900-01-01\' ' % (name,i,name,i,name)
print cmd
run = os.system(cmd)
if __name__=="__main__":
job_group0("balance_mac_manger_info")
job_group0("balance_measure_info")
job_group1(0,5,"balance_mac_measure_info")
job_group1(0,20,"blood_pressure_measure_info")
job_group1(0,50,"balance_measure_info")
job_group2("user_body_info")
job_group3(0,10,"user_body_info")
二、执行sqoop job的脚本job_exec.py如下:
#!/usr/bin/python
import os
import sys
import glob
import time
import datetime
def job_exec_group0(name):
cmd = 'sqoop job --exec %s' % (name)
##import all row fields
def job_exec_group1(num1,num2,name):
for i in range(num1,num2):
cmd = 'sqoop job --exec %s_%s' % (name,i)
run = os.system(cmd)
if __name__=="__main__":
job_exec_group0("balance_mac_manger_info")
job_exec_group0("balance_measure_info")
job_exec_group0("user_body_info")
job_exec_group1(0,5,"balance_mac_measure_info")
job_exec_group1(0,20,"blood_pressure_measure_info")
job_exec_group1(0,10,"user_body_info")
job_exec_group1(0,50,"balance_measure_info")
三、将job_exec.py写进定时脚本:
30 1 * * * nohup /var/lib/hadoop-hdfs/sqoop/job_exec.py &
每天凌晨1点半自动执行
#!/usr/bin/python
import os
import sys
import glob
import time
import datetime
##import all row fields --check-column create_time
def job_group0(name):
cmd = 'sqoop job --create %s -- import -m 1 --connect "jdbc:mysql://192.168.76.12:3306/smart_hardware?useSSL=false&user=phidoop&password=phidoop" --table %s --where "create_time < current_date()" --hive-import --hive-database
phi_health --hive-table %s --incremental append --check-column create_time --last-value \'1900-01-01\' ' % (name,name,name)
run = os.system(cmd)
##import all row fields --check-column create_time
def job_group1(num1,num2,name):
for i in range(num1,num2):
cmd = 'sqoop job --create %s_%s -- import -m 1 --connect "jdbc:mysql://192.168.76.12:3306/smart_hardware?useSSL=false&user=phidoop&password=phidoop" --table %s_%s --where "create_time < current_date()" --hive-import --hive-database
phi_health --hive-table %s --incremental append --check-column create_time --last-value \'1900-01-01\' ' % (name,i,name,i,name)
print cmd
run = os.system(cmd)
##import all row fields --check-column date
def job_group2(name):
cmd = 'sqoop job --create %s -- import -m 1 --connect "jdbc:mysql://192.168.76.12:3306/smart_hardware?useSSL=false&user=phidoop&password=phidoop" --table %s --where "date < current_date()" --hive-import --hive-database phi_health
--hive-table %s --incremental append --check-column date --last-value \'1900-01-01\' ' % (name,name,name)
run = os.system(cmd)
##import all row fields --check-column date
def job_group3(num1,num2,name):
for i in range(num1,num2):
cmd = 'sqoop job --create %s_%s -- import -m 1 --connect "jdbc:mysql://192.168.76.12:3306/smart_hardware?useSSL=false&user=phidoop&password=phidoop" --table %s_%s --where "date < current_date()" --hive-import --hive-database
phi_health --hive-table %s --incremental append --check-column date --last-value \'1900-01-01\' ' % (name,i,name,i,name)
print cmd
run = os.system(cmd)
if __name__=="__main__":
job_group0("balance_mac_manger_info")
job_group0("balance_measure_info")
job_group1(0,5,"balance_mac_measure_info")
job_group1(0,20,"blood_pressure_measure_info")
job_group1(0,50,"balance_measure_info")
job_group2("user_body_info")
job_group3(0,10,"user_body_info")
二、执行sqoop job的脚本job_exec.py如下:
#!/usr/bin/python
import os
import sys
import glob
import time
import datetime
def job_exec_group0(name):
cmd = 'sqoop job --exec %s' % (name)
##import all row fields
def job_exec_group1(num1,num2,name):
for i in range(num1,num2):
cmd = 'sqoop job --exec %s_%s' % (name,i)
run = os.system(cmd)
if __name__=="__main__":
job_exec_group0("balance_mac_manger_info")
job_exec_group0("balance_measure_info")
job_exec_group0("user_body_info")
job_exec_group1(0,5,"balance_mac_measure_info")
job_exec_group1(0,20,"blood_pressure_measure_info")
job_exec_group1(0,10,"user_body_info")
job_exec_group1(0,50,"balance_measure_info")
三、将job_exec.py写进定时脚本:
30 1 * * * nohup /var/lib/hadoop-hdfs/sqoop/job_exec.py &
每天凌晨1点半自动执行
相关文章推荐
- sqoop从mysql迁移数据到hive中遇到的问题
- 使用Sqoop将数据从Hive导入MySQL可能遇到的问题
- sqoop把hive表数据导入到mysql中
- sqoop工具在hive和mysql之间互相导数据
- 用Sqoop将mysql中的表和数据导入到Hive中
- sqoop mysql数据导入Hive中
- sqoop 导出 hive分区表 数据到 mysql
- (MySQL里的数据)通过Sqoop Import Hive 里 和 通过Sqoop Export Hive 里的数据到(MySQL)
- sqoop将Mysql数据导入Hive中
- Sqoop-1.4.6安装配置及Mysql->HDFS->Hive数据导入(基于Hadoop2.7.3)
- 使用Sqoop将Hive结果迁移至Mysql(四)
- linux中sqoop实现hive数据导入到mysql
- 使用sqoop将oracle数据迁移到hive中
- 使用sqoop做Mysql与Hive数据互倒操作
- python脚本用sqoop把mysql数据导入hive数据仓库中
- sqoop mysql数据导入Hive中
- sqoop导出mysql数据进入hive错误
- 用sqoop将mysql的数据导入到hive表中
- Java api 调用Sqoop2进行MySQL-->Hive的数据同步
- Sqoop将MySQL中数据导入到Hive表中