注,文档由word粘贴过后格式有错乱,图片也无法显示,稍后会进行调整,并将代码用代码格式显示。
一.Sqoop介绍及安装
http://sqoop.apache.org/
文档:http://sqoop.apache.org/docs/1.4.6/index.html
https://dev.mysql.com/downloads/mysql/
解压后的环境变量配置:
vi /etc/profile
#set sqoop home
SQOOP_HOME=/sqoop-1.4.6
PATH=$SQOOP_HOME/bin:$PATH
export SQOOP_HOME PATH
验证版本,暂时忽略其他因组件不存在而抛出的警告
#sqoop version
与往常一样,要使sqoop连接到不同的数据库,则需要先加载对应数据库的jar包才行。
放在$SQOOP_HOME/lib中
Mysql:
[root@hadoop1setup]# rpm -vih --nodeps mysql-community-libs-compat-5.7.17-1.el5.x86_64.rpm
[root@hadoop1setup]# rpm -vih --nodeps mysql-community-libs-5.7.17-1.el5.x86_64.rpm
[root@hadoop1setup]# rpm -vih mysql-community-client-5.7.17-1.el5.x86_64.rpm
[root@hadoop1setup]# rpm -vih mysql-5.7.17-1.el5.x86_64.rpm
[root@hadoop1setup]# rpm -vih mysql-community-common-5.7.17-1.el5.x86_64.rpm
[root@hadoop1setup]# rpm -vih mysql-community-server-5.7.17-1.el5.x86_64.rpm
/etc/rc.d/init.d/mysqldstart|status
servicemysqld start
[root@hadoop1setup]# service mysqld stop
[root@hadoop1setup]# mysqld_safe --skip-grant-tables
[root@hadoop1~]# mysql
mysql>use mysql;
mysql>update user set authentication_string=password('root') where user='root';
QueryOK, 0 rows affected, 1 warning (0.00 sec)
Rowsmatched: 1 Changed: 0 Warnings: 1
mysql>flush privileges;
QueryOK, 0 rows affected (0.00 sec)
mysql>quit
[root@hadoop1~]# mysql -u root -p
mysql>set password=password('Gopro5hero!');
QueryOK, 0 rows affected, 1 warning (0.00 sec)
mysql>show databases;
+--------------------+
|Database |
+--------------------+
|information_schema |
|mysql |
|performance_schema |
|sys |
+--------------------+
4rows in set (0.00 sec)
mysql>select user,host from user;
+-----------+-----------+
|user | host |
+-----------+-----------+
|Bond | localhost |
|mysql.sys | localhost |
|root | localhost |
+-----------+-----------+
3rows in set (0.00 sec)
mysql>update user set host='%' where user='root';
QueryOK, 1 row affected (0.00 sec)
Rowsmatched: 1 Changed: 1 Warnings: 0
mysql>flush privileges;
QueryOK, 0 rows affected (0.00 sec)
mysql>select user,host from user;
+-----------+-----------+
|user | host |
+-----------+-----------+
|root | % |
|Bond | localhost |
|mysql.sys | localhost |
+-----------+-----------+
3rows in set (0.00 sec)
mysql>CREATE USER 'Bond'@'localhost' IDENTIFIED BY 'test123';
ERROR1819 (HY000): Your password does not satisfy the current policy requirements
mysql>CREATE USER 'Bond'@'localhost' IDENTIFIED BY 'Gopro5hero!';
QueryOK, 0 rows affected (0.00 sec)
mysql>select host,user,authentication_string,password_expired,account_locked fromuser;
+-----------+-----------+-------------------------------------------+------------------+----------------+
|host | user | authentication_string | password_expired |account_locked |
+-----------+-----------+-------------------------------------------+------------------+----------------+
|localhost | root |*4F63A43ACD2876BD906F01B56E65F3A06DC9A5A8 | N | N |
|localhost | mysql.sys | *THISISNOTAVALIDPASSWORDTHATCANBEUSEDHERE | N | Y |
|localhost | Bond |*4F63A43ACD2876BD906F01B56E65F3A06DC9A5A8 | N | N |
+-----------+-----------+-------------------------------------------+------------------+----------------+
3rows in set (0.00 sec)3 rows in set (0.00 sec)3 rows in set (0.00 sec)
mysql>delete from user where user = 'Bond';
QueryOK, 1 row affected (0.00 sec)
mysql>CREATE USER 'Bond'@'localhost' IDENTIFIED BY 'Gopro5hero!';
ERROR1396 (HY000): Operation CREATE USER failed for 'Bond'@'localhost'
mysql>flush privileges;
QueryOK, 0 rows affected (0.00 sec)
mysql>CREATE USER 'Bond'@'localhost' IDENTIFIED BY 'Gopro5hero!';
QueryOK, 0 rows affected (0.00 sec)
mysql>alter user Bond@localhost identified by 'Gopro5hero!!';
QueryOK, 0 rows affected (0.01 sec)
mysql>flush privileges;
QueryOK, 0 rows affected (0.01 sec)
mkdir–p /usr/local/mysql/data/
chown-R mysql:mysql mysql/
/usr/local/mysql/bin/mysqld-u mysql --datadir=/usr/local/mysql/data
cpsupport-files/mysql.server /etc/init.d/mysql
二.Sqoop基本使用
以行为读取单位,并行读取导入。每一行的列之间,有分隔符。
Sqoop就是个ETL工具
Sqoop help import
Mysql导入到hadoop 的hdfs
详细操作命令手册:http://sqoop.apache.org/docs/1.4.6/SqoopUserGuide.html
sqoopimport --connect jdbc:mysql://hadoop1:3306/pjh -username root -password root--table t1
sqoopimport --connect jdbc:mysql://hadoop1:3306/pjh -username root -P --table t1
[root@hadoop1~]# hadoop fs -ls /user/root/t1
[root@hadoop1~]# hadoop fs -rm -R /user/root/t1
HIVE数据库回头再看,目前只看hdfs
sqoopeval --connect jdbc:mysql://hadoop1:3306/pjh \
-username root -P \
--query "select *from t1 where id between 2 and 4"
#使用warehouse-dir将mysql表t1导入到hdfs指定的父目录中
[root@hadoop1 ~]#hadoop fs -mkdir -p /user/shared
sqoopimport \
--connectjdbc:mysql://hadoop1:3306/pjh -username root -P \
--tablet1 \
--warehouse-dir/user/shared \
--m1 \
--split-by id
#No primary key could be found for table t1. Please specify onewith --split-by or perform a sequential import with '--m 1'
#--m 1 时,表示取消并行,则仅会生成一个文件
#如果使用--split-by id,则select列表中至少要包含id
#如果使用--split-by id,而不指定--m,则系统自动决定生成文件的数量。
#使用table, columns, where,target-dir 将指定的记录导入到指定目录.
sqoop import \
--connectjdbc:mysql://hadoop1:3306/pjh -username root -P \
--table t1 \
--columns "id,name"\
--where "idbetween '2' and '4'" \
--target-dir /user/shared/t1_q1\
--m 2 \
--split-by id
#如果使用--m 5 (--num-mappers大于1)做parallel query,则必须同时指定--split-by
#如果结果集行数小于--m 5,则仅会生成一行一文件。
#--append默认追加全部记录,新增文件至hdfs,且不会更改原有文件
sqoopimport \
--connect jdbc:mysql://hadoop1:3306/pjh -username root -P \
--query "select id,name from t1 where id<9 and\$CONDITIONS" \
--target-dir /user/shared/t1_q2 \
--append \
--m 5 \
--split-by id
使用sqoop是可利用参数文件&#xff0c;则不必手动输入各项参数
参数文件中可以指定密码文件路径&#xff0c;而密码文件最好放在hdfs上
[root&#64;hadoop1~]# echo -n "root" > /root/.password_mysql
[root&#64;hadoop1~]# hadoop fs -mkdir -p /user/hadoop
[root&#64;hadoop1~]# hadoop fs -put /root/.password_mysql /user/hadoop/
[root&#64;hadoop1~]# hadoop fs -chmod 400 /user/hadoop/.password_mysql
[root&#64;hadoop1~]# vi sqoop_import.cfg
import
--connect
jdbc:mysql://hadoop1:3306/pjh
--username
root
--password-file
/user/hadoop/.password_mysql
[root&#64;hadoop1~]# vi sqoop_list_databases.cfg
list-databases
--connect
jdbc:mysql://hadoop1:3306/pjh
--username
root
--password-file
/user/hadoop/.password_mysql
[root&#64;hadoop1~]# vi sqoop_list_tables.cfg
list-tables
--connect
jdbc:mysql://hadoop1:3306/pjh
--username
root
--password-file
/user/hadoop/.password_mysql
[root&#64;hadoop1~]#
sqoop --options-file~/sqoop_import.cfg \
--tablet1 \
--target-dir/user/shared/t1_q3 \
--append\
--m1
[root&#64;hadoop1 ~]# sqoop --options-file ~/sqoop_list_databases.cfg
[root&#64;hadoop1 ~]#sqoop --options-file /root/sqoop_list_tables.cfg
[root&#64;hadoop1 ~]# hadoopfs -cat /user/shared/t1_q3/*
导入时的分隔符设置
分隔符默认为逗号&#xff0c;但是可以手动指定为其它字符&#xff0c;例如 |
[root&#64;hadoop1 ~]#
sqoop--options-file /root/sqoop_import.cfg \
--query "selectid,name from t1 where id<6 and \$CONDITIONS" \
--target-dir/user/shared/t1_q4 \
--append \
--m 1 \
--split-by id \
--fields-terminated-by "|"
[root&#64;hadoop1~]# hadoopfs -ls /user/shared/t1_q4/
[root&#64;hadoop1~]# hadoop fs -cat /user/shared/t1_q4/*
可使用--options-file标定所有参数
[root&#64;hadoop1~]# vi sqoop_i_q5.cfg
import
--connect
jdbc:mysql://hadoop1:3306/pjh
--username
root
--password-file
/user/hadoop/.password_mysql
--query
selectid,name from t1 where id<7 and $CONDITIONS
##使用--opions-file时&#xff0c;此处无需双引号&#xff0c;$CONDITIONS前也无需正斜杠\
--target-dir
/user/shared/t1_q5
--split-by
id
[root&#64;hadoop1~]# hadoop fs -rm -R /user/shared/t1_q5
[root&#64;hadoop1~]# sqoop --options-file /root/sqoop_i_q5.cfg
四.Sqoop增量导入&#xff08;mysql&#xff09;
The merge tool istypically run after an incremental import with the date-last-modified mode
(sqoop import --incremental lastmodified …)
Note&#xff1a;针对常规的增量update 实际上需要借助merge完成。
#初始化导入
[root&#64;hadoop1~]# vi sqoop_i_q6_init.cfg
import
--connect
jdbc:mysql://hadoop1:3306/pjh
--username
root
--password-file
/user/hadoop/.password_mysql
--query
selectid,name,ts from t1 where id<8 and $CONDITIONS
##使用--opions-file时&#xff0c;此处无需双引号&#xff0c;$CONDITIONS前也无需正斜杠\
--target-dir
/user/shared/t1_q6
--split-by
id
[root&#64;hadoop1~]# hadoop fs -rm -R /user/shared/t1_q6
[root&#64;hadoop1~]# sqoop --options-file /root/sqoop_i_q6_init.cfg
[root&#64;hadoop1~]# hadoop fs -cat /user/shared/t1_q6/*
增量导入的3个核心参数&#xff1a;
Argument | Description |
--check-column (cols) | Specifies the column to be examined when determining which rows to import. (the column should not be of type CHAR/NCHAR/VARCHAR/VARNCHAR/ LONGVARCHAR/LONGNVARCHAR) |
--incremental (mode) | Specifies how Sqoop determines which rows are new. Legal values for mode include appendand lastmodified. |
--last-value (value) | Specifies the maximum value of the check column from the previous import. 做增量的非必需参数 |
#测试--incrementalappend 做insert
[root&#64;hadoop1~]# vi sqoop_i_q6_inc_append.cfg
import
--connect
jdbc:mysql://hadoop1:3306/pjh
--username
root
--password-file
/user/hadoop/.password_mysql
--query
selectid,name,ts from t1 where id<99 and$CONDITIONS
##使用--opions-file时&#xff0c;此处无需双引号&#xff0c;$CONDITIONS前也无需正斜杠\
--target-dir
/user/shared/t1_q6
--split-by
id
--check-column
ts
--incremental
append
#做insert需要使用append
--last-value
2017-04-0708:38:34.0
##此处指定的值&#xff0c;要与check column具有可比性&#xff0c;将抽取大于该记录的行&#xff0c;等于该值的行不会被抽取&#xff0c;它是决定源记录是否被导入的重要依据。如果不指定--last-value,则会默认追加导入全部的行&#xff0c;等效 --append
#在导入前&#xff0c;在源表插入一条带有新ts的行&#xff0c;注意query必须要能取到新加的行&#xff0c;导入完成后将仅增加一行。并且会自动打印出下一次导入所需的--last-value值&#xff0c;建议使用sqoop job来自动记录该值达到简化操作的目的&#xff08;章节6&#xff09;。
[root&#64;hadoop1 ~]# sqoop --options-file /root/sqoop_q6_inc1_append.cfg
[root&#64;hadoop1 ~]# hadoop fs -cat /user/shared/t1_q6/*
#测试--incrementallastmodified 做update
[root&#64;hadoop1 ~]# visqoop_i_q6_inc_lastmodified.cfg
import
--connect
jdbc:mysql://hadoop1:3306/pjh
--username
root
--password-file
/user/hadoop/.password_mysql
--query
select id,name,ts fromt1 where id<99 and $CONDITIONS
##此处无需双引号&#xff0c;$CONDITIONS前无需正斜杠\
--target-dir
/user/shared/t1_q6
--split-by
id
--check-column
ts
#在使用lastmodified时&#xff0c;--check-column必须是时间戳类型&#xff0c;且必须指定&#xff0c;但是我认为它并未起到任何作用。
--incremental
lastmodified
#--merge-key or--append is required when using --incremental lastmodified and the outputdirectory exists.
#做update需要使用lastmodified
--merge-key
id
#指定合并更新使用的key字段&#xff0c;通常是唯一字段。
--last-value
2015-04-0708:38:34.0
#非必需
#在导入前&#xff0c;在源表更新一行name&#xff0c;注意query必须要能取到该行&#xff0c;导入完成后将更新一行name.可以看到分成了两次job完成
[root&#64;hadoop1~]# sqoop --options-file /root/sqoop_q6_inc_lastmodified.cfg
[root&#64;hadoop1 ~]# hadoop fs -cat/user/shared/t1_q6/*
测试结果为何未体现&#xff1f;&#xff1f;&#xff1f;
在导入前&#xff0c;在源表更新一行name&#xff0c;注意query必须要能取到该行&#xff0c;导入完成后将更新一行name.可以看到分成了两次job完成
[root&#64;hadoop1 ~]# sqoop--options-file /root/sqoop_i_q6_inc_append.cfg
[root&#64;hadoop1~]# hadoop fs -cat /user/shared/t1_q6/*
在hdfs制造一条重复记录 id&#61;2
sqoop import \
--connectjdbc:mysql://hadoop1:3306/pjh -username root -P \
--query "select id,name, &#39;1999-04-0708:38:34.0&#39; from t1 where id&#61;2 and\$CONDITIONS" \
--target-dir/user/shared/t1_q6 \
--append \
--split-by id
在hdfs制造一条重复记录 id&#61;1
sqoop import \
--connectjdbc:mysql://hadoop1:3306/pjh -username root -P \
--query "select id, &#39;gopro&#39;,&#39;1999-04-0708:38:34.0&#39; from t1 where id&#61;1 and\$CONDITIONS" \
--target-dir/user/shared/t1_q6 \
--append \
--split-by id
发现&#xff1a;
i)若源表出现了两行相同的id,而hdfs无重复&#xff0c;会仅取rdbms中较早的一条记录更新至hdfs&#xff0c;且与rdbms源表ts字段无关
ii)若hdfs先后插入重复id的行&#xff0c;而rdbms无重复&#xff0c;在导入之后&#xff0c;hdfs仅会保存较早的一条&#xff0c;与hdfs的ts字段无关。
iii)若hdfs先后插入重复id的行&#xff0c;且rdbms也有该id的重复行&#xff0c;在导入之后&#xff0c;rdbms与hdfs均仅会保留最早的一条&#xff0c;剩下的一行保持数据一致。
启动日志记录进程
[root&#64;hadoop1 ~]# /hadoop-2.7.2/sbin/mr-jobhistory-daemon.shstart historyserver
使用lastmodified就可以同时做insert与update&#xff01;但是在sqoop job中要注意&#xff0c;如果仅仅在rdbms增加&更新记录&#xff0c;而不去更新ts&#xff0c;则除了新增行可以被抽取之外&#xff0c;更新的行并不能被捕捉。因此作为RDBMS规范&#xff0c;所有行的变更与新增都需要同时更新时间戳以便于ETL处理。
五.Sqoop导入实战&#xff08;mysql&#xff09;
[root&#64;hadoop1 ~]# sqoop help export
[root&#64;hadoop1 ~]# vi sqoop_export.cfg
export
--connect
jdbc:mysql://hadoop1:3306/pjh
--username
root
--password-file
/user/hadoop/.password_mysql
#查看hdfs源数据&#xff1a;
[root&#64;hadoop1 ~]# hadoop fs -cat /user/shared/t1_q6/*
#由hdfs向rdbms出表&#xff0c;前提是&#xff1a;目标表t2必须先创建&#xff0c;并且字段结构要与导出源一致。如果多次导出&#xff0c;是以追加方式写入。
[root&#64;hadoop1~]#
sqoop --options-file~/sqoop_export.cfg\
--export-dir/user/shared/t1_q6 \
--table t2 \
--input-null-string &#39;\\N&#39;
–col1,col2,col3
注意&#xff1a;没有被包含在–columns字段必须有默认值&#xff0c;或者允许插入空值&#xff0c;否则rdbms会拒绝sqoop导出的数据&#xff0c;导致作业失败
–export-dir&#xff0c;导出目录&#xff0c;在执行导出的时候&#xff0c;必须指定这个参数&#xff0c;同时需要具备–table或–call参数两者之一&#xff0c;–table是指的导出数据库当中对应的表&#xff0c;–call是指的某个存储过程
–input-null-string、–input-null-non-string&#xff0c;这两参数是可选&#xff0c;如果没有指定第一个参数&#xff0c;对于字符串类型的列来说&#xff0c;“NULL”这个字符串就回被翻译成空值&#xff0c;如果没有使用第二个参数&#xff0c;无论是“NULL”字符串还是说空字符串也好&#xff0c;对于非字符串类型的字段来说&#xff0c;这两个类型的空串都会被翻译成空值
--update-key&#xff0c;更新标识&#xff0c;即根据某个字段进行更新&#xff0c;例如id&#xff0c;可以指定多个更新标识的&#xff0c;用逗号分隔
--update-mode&#xff0c;有两种模式&#xff0c;一种是updateonly&#xff08;默认模式&#xff09;&#xff0c;仅仅更新已存在的数据记录&#xff0c;不会插入新纪录&#xff0c;另一种模式是allowinsert&#xff0c;允许插入新纪录
Sqoop的Export工具&#xff0c;对应两种语句&#xff0c;一种是Insert语句&#xff0c;如果表当中存在PK约束&#xff0c;且表中已包含数据&#xff0c;此时&#xff0c;导出报错。此时需要用到—update-key和updatemod
如果指定了update-key&#xff0c;那么Sqoop就会修改在数据表中已存在的数据&#xff0c;此时的每一个更行数据记录都会变成一个Update语句&#xff0c;用这个语句去更新目标表中已存在的数据&#xff0c;这是根据–update-key所指定的这个列进行更新的。Update set teacher&#61;”MarsJ” where class_id &#61; 1。
1&#xff09;若update-key所指定的字段不是PK字段&#xff0c;若同时updatemod使用updateonly模式时&#xff0c;就仅进行更新&#xff0c;若updatemod使用allowinsert模式&#xff0c;那么实质上就是一个insert操作.
2&#xff09;若update-key所指定的字段是PK字段&#xff0c;同时updatemod是allowinsert时&#xff0c;实质上是一个insert & update的操作&#xff0c;若updatemod是updateonly时&#xff0c;实质仅为update操作.
[root&#64;hadoop1 ~]#
sqoop --options-file ~/sqoop_export.cfg \
--export-dir /user/shared/t1_q6 \
--table t2 \
--update-key id \
--update-mode updateonly
#若rdbms存在重复的id&#xff08;没有pk&#xff0c;通常是不规范的情况&#xff09;&#xff0c;hdfs也存在相同的重复id&#xff0c;更新后rdbms会保留所有的行&#xff0c;并参照hdfs保留最近的一行进行update。
#若rdbms有pk&#xff0c;hdfs存在相同的重复id&#xff0c;更新后并参照hdfs最早的一行进行update。
[root&#64;hadoop1~]#
sqoop--options-file ~/sqoop_export.cfg \
--export-dir/user/shared/t1_q6 \
--tablet2 \
--update-keyid \
--update-modeallowinsert
#如果使用allowinsert&#xff0c;则在rdbms表必须创建pk&#xff0c;否则不会检查id存在性而直接insert。Hdfs存在重复id时&#xff0c;仅会取hdfs中最早的一条记录&#xff0c;更新进rdbms
六.Sqoop job
Sqoop job可以保存所有的配置信息&#xff0c;包括连接字符串与参数。
#为t1_q6的lastmdified增量更新创建sqoop job
#首先需要得到最大的last_value,可以从上一次sqoop输出得到&#xff08;可以重复跑一次最近一次的sqoop命令,得到当前系统时间戳&#xff09;
[root&#64;hadoop1 ~]# sqoop--options-file /root/sqoop_i_q6_inc_lastmodified.cfg
17/04/11 13:24:37 INFO tool.ImportTool: --incremental lastmodified
17/04/11 13:24:37 INFO tool.ImportTool: --check-column ts
17/04/11 13:24:37 INFO tool.ImportTool: --last-value2017-04-11 13:23:55.0
17/04/11 13:24:37 INFO tool.ImportTool: (Consider saving thiswith &#39;sqoop job --create&#39;)
[root&#64;hadoop1~]# hadoop fs -cat /user/shared/t1_q6/*
[root&#64;hadoop1 ~]# sqoopjob --create job1 -- --options-file /root/sqoop_i_q6_inc_lastmodified.cfg
[root&#64;hadoop1 ~]# sqoop job –list
[root&#64;hadoop1 ~]# sqoop job --show job1
[root&#64;hadoop1 ~]# sqoop job --exec job1
17/04/11 13:36:08 INFO tool.ImportTool: Saving incrementalimport state to the metastore
17/04/11 13:36:08 INFO tool.ImportTool: Updateddata for job: job1
#执行完job1后&#xff0c;再去查看它的定义&#xff0c;会发现last_value已经被自动更新
[root&#64;hadoop1 ~]# sqoop job --show job1
Job: job1
Tool: import
Options:
----------------------------
verbose &#61; false
incremental.last.value &#61; 2017-04-11 13:35:22.0
db.connect.string &#61; jdbc:mysql://hadoop1:3306/pjh
#在rdbms新增行&#xff08;具有新的ts字段值&#xff09;&#xff0c;然后直接调用job1&#xff0c;再观察结果
[root&#64;hadoop1 ~]# sqoop job --exec job1
[root&#64;hadoop1 ~]# hadoop fs -cat /user/shared/t1_q6/*
#也可通过查看.sqoop下的metastore来查看job1的当前定义
[root&#64;hadoop1 ~]# cd .sqoop
[root&#64;hadoop1 .sqoop]# ls -lrt
total 12
-rw-r--r-- 1 root root 5653 Apr 11 13:49 metastore.db.script
-rw-r--r-- 1 root root 419 Apr 11 13:49 metastore.db.properties
[root&#64;hadoop1 .sqoop]# more metastore.db.properties
#HSQL Database Engine 1.8.0.10
#Tue Apr 11 13:49:21 CST 2017
hsqldb.script_format&#61;0
runtime.gc_interval&#61;0
sql.enforce_strict_size&#61;false
hsqldb.cache_size_scale&#61;8
readonly&#61;false
hsqldb.nio_data_file&#61;true
hsqldb.cache_scale&#61;14
version&#61;1.8.0
hsqldb.default_table_type&#61;memory
hsqldb.cache_file_scale&#61;1
hsqldb.log_size&#61;200
modified&#61;no
hsqldb.cache_version&#61;1.7.0
hsqldb.original_version&#61;1.8.0
hsqldb.compatible_version&#61;1.8.0
[root&#64;hadoop1 .sqoop]# more metastore.db.script
CREATE SCHEMA PUBLIC AUTHORIZATION DBA
CREATE MEMORY TABLE SQOOP_ROOT(VERSION INTEGER,PROPNAMEVARCHAR(128) NOT NULL,PROPVAL VARCHAR(256),CONSTRAINT SQOOP_ROOT_UNQ UNIQUE(VERSION,PROPNAME))
CREATE MEMORY TABLE SQOOP_SESSIONS(JOB_NAME VARCHAR(64) NOTNULL,PROPNAME VARCHAR(128) NOT NULL,PROPVAL VARCHAR(1024),PROPCLASS VARCHAR(32)NOT NULL,C
ONSTRAINT SQOOP_SESSIONS_UNQUNIQUE(JOB_NAME,PROPNAME,PROPCLASS))
CREATE USER SA PASSWORD ""
GRANT DBA TO SA
SET WRITE_DELAY 10
SET SCHEMA PUBLIC
INSERT INTO SQOOP_ROOTVALUES(NULL,&#39;sqoop.hsqldb.job.storage.version&#39;,&#39;0&#39;)
INSERT INTO SQOOP_ROOTVALUES(0,&#39;sqoop.hsqldb.job.info.table&#39;,&#39;SQOOP_SESSIONS&#39;)
INSERT INTO SQOOP_SESSIONS VALUES(&#39;job1&#39;,&#39;sqoop.tool&#39;,&#39;import&#39;,&#39;schema&#39;)
INSERT INTO SQOOP_SESSIONSVALUES(&#39;job1&#39;,&#39;sqoop.property.set.id&#39;,&#39;0&#39;,&#39;schema&#39;)
INSERT INTO SQOOP_SESSIONSVALUES(&#39;job1&#39;,&#39;verbose&#39;,&#39;false&#39;,&#39;SqoopOptions&#39;)
INSERT INTO SQOOP_SESSIONSVALUES(&#39;job1&#39;,&#39;incremental.last.value&#39;,&#39;2017-04-11 13:47:56.0&#39;,&#39;SqoopOptions&#39;)
INSERT INTO SQOOP_SESSIONSVALUES(&#39;job1&#39;,&#39;db.connect.string&#39;,&#39;jdbc:mysql://hadoop1:3306/pjh&#39;,&#39;SqoopOptions&#39;)
INSERT INTO SQOOP_SESSIONSVALUES(&#39;job1&#39;,&#39;codegen.output.delimiters.escape&#39;,&#39;0&#39;,&#39;SqoopOptions&#39;)
INSERT INTO SQOOP_SESSIONSVALUES(&#39;job1&#39;,&#39;codegen.output.delimiters.enclose.required&#39;,&#39;false&#39;,&#39;SqoopOptions&#39;)
INSERT INTO SQOOP_SESSIONSVALUES(&#39;job1&#39;,&#39;codegen.input.delimiters.field&#39;,&#39;0&#39;,&#39;SqoopOptions&#39;)
INSERT INTO SQOOP_SESSIONS VALUES(&#39;job1&#39;,&#39;hbase.create.table&#39;,&#39;false&#39;,&#39;SqoopOptions&#39;)
INSERT INTO SQOOP_SESSIONSVALUES(&#39;job1&#39;,&#39;hdfs.append.dir&#39;,&#39;false&#39;,&#39;SqoopOptions&#39;)
INSERT INTO SQOOP_SESSIONSVALUES(&#39;job1&#39;,&#39;codegen.input.delimiters.escape&#39;,&#39;0&#39;,&#39;SqoopOptions&#39;)
INSERT INTO SQOOP_SESSIONSVALUES(&#39;job1&#39;,&#39;accumulo.create.table&#39;,&#39;false&#39;,&#39;SqoopOptions&#39;)
INSERT INTO SQOOP_SESSIONSVALUES(&#39;job1&#39;,&#39;import.fetch.size&#39;,&#39;null&#39;,&#39;SqoopOptions&#39;)
INSERT INTO SQOOP_SESSIONSVALUES(&#39;job1&#39;,&#39;codegen.input.delimiters.enclose.required&#39;,&#39;false&#39;,&#39;SqoopOptions&#39;)
INSERT INTO SQOOP_SESSIONSVALUES(&#39;job1&#39;,&#39;db.username&#39;,&#39;root&#39;,&#39;SqoopOptions&#39;)
INSERT INTO SQOOP_SESSIONSVALUES(&#39;job1&#39;,&#39;reset.onemapper&#39;,&#39;false&#39;,&#39;SqoopOptions&#39;)
INSERT INTO SQOOP_SESSIONSVALUES(&#39;job1&#39;,&#39;codegen.output.delimiters.record&#39;,&#39;10&#39;,&#39;SqoopOptions&#39;)
INSERT INTO SQOOP_SESSIONSVALUES(&#39;job1&#39;,&#39;import.max.inline.lob.size&#39;,&#39;16777216&#39;,&#39;SqoopOptions&#39;)
INSERT INTO SQOOP_SESSIONSVALUES(&#39;job1&#39;,&#39;hbase.bulk.load.enabled&#39;,&#39;false&#39;,&#39;SqoopOptions&#39;)
INSERT INTO SQOOP_SESSIONS VALUES(&#39;job1&#39;,&#39;hcatalog.create.table&#39;,&#39;false&#39;,&#39;SqoopOptions&#39;)
INSERT INTO SQOOP_SESSIONSVALUES(&#39;job1&#39;,&#39;db.clear.staging.table&#39;,&#39;false&#39;,&#39;SqoopOptions&#39;)
INSERT INTO SQOOP_SESSIONSVALUES(&#39;job1&#39;,&#39;incremental.col&#39;,&#39;ts&#39;,&#39;SqoopOptions&#39;)
INSERT INTO SQOOP_SESSIONS VALUES(&#39;job1&#39;,&#39;codegen.input.delimiters.record&#39;,&#39;0&#39;,&#39;SqoopOptions&#39;)
INSERT INTO SQOOP_SESSIONS VALUES(&#39;job1&#39;,&#39;db.password_mysql.file&#39;,&#39;/user/hadoop/.password_mysql&#39;,&#39;SqoopOptions&#39;)
INSERT INTO SQOOP_SESSIONSVALUES(&#39;job1&#39;,&#39;enable.compression&#39;,&#39;false&#39;,&#39;SqoopOptions&#39;)
INSERT INTO SQOOP_SESSIONSVALUES(&#39;job1&#39;,&#39;hive.overwrite.table&#39;,&#39;false&#39;,&#39;SqoopOptions&#39;)
INSERT INTO SQOOP_SESSIONSVALUES(&#39;job1&#39;,&#39;hive.import&#39;,&#39;false&#39;,&#39;SqoopOptions&#39;)
INSERT INTO SQOOP_SESSIONSVALUES(&#39;job1&#39;,&#39;codegen.input.delimiters.enclose&#39;,&#39;0&#39;,&#39;SqoopOptions&#39;)
INSERT INTO SQOOP_SESSIONSVALUES(&#39;job1&#39;,&#39;accumulo.batch.size&#39;,&#39;10240000&#39;,&#39;SqoopOptions&#39;)
INSERT INTO SQOOP_SESSIONSVALUES(&#39;job1&#39;,&#39;hive.drop.delims&#39;,&#39;false&#39;,&#39;SqoopOptions&#39;)
INSERT INTO SQOOP_SESSIONSVALUES(&#39;job1&#39;,&#39;codegen.output.delimiters.enclose&#39;,&#39;0&#39;,&#39;SqoopOptions&#39;)
INSERT INTO SQOOP_SESSIONSVALUES(&#39;job1&#39;,&#39;hdfs.delete-target.dir&#39;,&#39;false&#39;,&#39;SqoopOptions&#39;)
INSERT INTO SQOOP_SESSIONSVALUES(&#39;job1&#39;,&#39;codegen.output.dir&#39;,&#39;.&#39;,&#39;SqoopOptions&#39;)
INSERT INTO SQOOP_SESSIONSVALUES(&#39;job1&#39;,&#39;codegen.auto.compile.dir&#39;,&#39;true&#39;,&#39;SqoopOptions&#39;)
INSERT INTO SQOOP_SESSIONS VALUES(&#39;job1&#39;,&#39;db.query&#39;,&#39;selectid,name,ts from t1 where id<99 and $CONDITIONS&#39;,&#39;SqoopOptions&#39;)
INSERT INTO SQOOP_SESSIONSVALUES(&#39;job1&#39;,&#39;relaxed.isolation&#39;,&#39;false&#39;,&#39;SqoopOptions&#39;)
INSERT INTO SQOOP_SESSIONS VALUES(&#39;job1&#39;,&#39;mapreduce.num.mappers&#39;,&#39;4&#39;,&#39;SqoopOptions&#39;)
INSERT INTO SQOOP_SESSIONSVALUES(&#39;job1&#39;,&#39;accumulo.max.latency&#39;,&#39;5000&#39;,&#39;SqoopOptions&#39;)
INSERT INTO SQOOP_SESSIONSVALUES(&#39;job1&#39;,&#39;import.direct.split.size&#39;,&#39;0&#39;,&#39;SqoopOptions&#39;)
INSERT INTO SQOOP_SESSIONSVALUES(&#39;job1&#39;,&#39;codegen.output.delimiters.field&#39;,&#39;44&#39;,&#39;SqoopOptions&#39;)
INSERT INTO SQOOP_SESSIONSVALUES(&#39;job1&#39;,&#39;export.new.update&#39;,&#39;UpdateOnly&#39;,&#39;SqoopOptions&#39;)
INSERT INTO SQOOP_SESSIONSVALUES(&#39;job1&#39;,&#39;incremental.mode&#39;,&#39;DateLastModified&#39;,&#39;SqoopOptions&#39;)
INSERT INTO SQOOP_SESSIONSVALUES(&#39;job1&#39;,&#39;hdfs.file.format&#39;,&#39;TextFile&#39;,&#39;SqoopOptions&#39;)
INSERT INTO SQOOP_SESSIONSVALUES(&#39;job1&#39;,&#39;codegen.compile.dir&#39;,&#39;/tmp/sqoop-root/compile/eb4331b0d50336a1ed2a6c3c679460f3&#39;,&#39;SqoopOptions&#39;)
INSERT INTO SQOOP_SESSIONS VALUES(&#39;job1&#39;,&#39;direct.import&#39;,&#39;false&#39;,&#39;SqoopOptions&#39;)
INSERT INTO SQOOP_SESSIONSVALUES(&#39;job1&#39;,&#39;db.split.column&#39;,&#39;id&#39;,&#39;SqoopOptions&#39;)
INSERT INTO SQOOP_SESSIONSVALUES(&#39;job1&#39;,&#39;hdfs.target.dir&#39;,&#39;/user/shared/t1_q6&#39;,&#39;SqoopOptions&#39;)
INSERT INTO SQOOP_SESSIONS VALUES(&#39;job1&#39;,&#39;hive.fail.table.exists&#39;,&#39;false&#39;,&#39;SqoopOptions&#39;)
INSERT INTO SQOOP_SESSIONSVALUES(&#39;job1&#39;,&#39;merge.key.col&#39;,&#39;id&#39;,&#39;SqoopOptions&#39;)
INSERT INTO SQOOP_SESSIONSVALUES(&#39;job1&#39;,&#39;db.batch&#39;,&#39;false&#39;,&#39;SqoopOptions&#39;)
INSERT INTO SQOOP_SESSIONS VALUES(&#39;job1&#39;,&#39;rpc.engine.org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolPB&#39;,&#39;org.apache.hadoop.ipc.ProtobufRpcEng
ine&#39;,&#39;config&#39;)
INSERT INTO SQOOP_SESSIONSVALUES(&#39;job1&#39;,&#39;sqoop.job.storage.implementations&#39;,&#39;com.cloudera.sqoop.metastore.hsqldb.HsqldbJobStorage,com.cloudera.sqoop.
metastore.hsqldb.AutoHsqldbStorage&#39;,&#39;config&#39;)
INSERT INTO SQOOP_SESSIONSVALUES(&#39;job1&#39;,&#39;mapreduce.client.genericoptionsparser.used&#39;,&#39;true&#39;,&#39;config&#39;)
[root&#64;hadoop1 .sqoop]#