使用cloudera impala sql 进行交集、并集、差集查询
2014-12-25 21:30
429 查看
impala-shell -i hadoop3 -f join_test.sql
Starting Impala Shell without Kerberos authentication
Connected to hadoop3:21000
Server version: impalad version 1.2.4 RELEASE (build ac29ae09d66c1244fe2ceb293083723226e66c1a)
use db02
show tables
+------+
| name |
+------+
| d1 |
| d2 |
+------+
Returned 2 row(s) in 0.01s
select * from d1
+-----------+-----+---------+
| username | age | is_male |
+-----------+-----+---------+
| zhangshan | 23 | 1 |
| lisiiiii | 24 | 1 |
| wangmazi | 30 | 1 |
| meinvvvv | 18 | 0 |
| damaaaaa | 55 | 0 |
+-----------+-----+---------+
Returned 5 row(s) in 0.19s
select * from d2
+-----+---------+
| age | options |
+-----+---------+
| 1 | a |
| 23 | bb |
| 50 | ccc |
| 30 | dddd |
| 66 | eeeee |
+-----+---------+
Returned 5 row(s) in 0.16s
#交集
select * from d1 a inner join d2 b on a.age=b.age
+-----------+-----+---------+-----+---------+
| username | age | is_male | age | options |
+-----------+-----+---------+-----+---------+
| zhangshan | 23 | 1 | 23 | bb |
| wangmazi | 30 | 1 | 30 | dddd |
+-----------+-----+---------+-----+---------+
Returned 2 row(s) in 0.29s
#多个条件下的交集
select * from d1 a inner join d2 b on a.age=b.age and a.username in ('zhangshan','lisiiiii','meinvvvv') and b.options in ('bb','ccc')
+-----------+-----+---------+-----+---------+
| username | age | is_male | age | options |
+-----------+-----+---------+-----+---------+
| zhangshan | 23 | 1 | 23 | bb |
+-----------+-----+---------+-----+---------+
Returned 1 row(s) in 0.33s
#左集
select * from d1 a left join d2 b on a.age=b.age
+-----------+-----+---------+------+---------+
| username | age | is_male | age | options |
+-----------+-----+---------+------+---------+
| zhangshan | 23 | 1 | 23 | bb |
| lisiiiii | 24 | 1 | NULL | NULL |
| wangmazi | 30 | 1 | 30 | dddd |
| meinvvvv | 18 | 0 | NULL | NULL |
| damaaaaa | 55 | 0 | NULL | NULL |
+-----------+-----+---------+------+---------+
Returned 5 row(s) in 0.28s
#左集-右集=差集
select * from d1 a left join d2 b on a.age=b.age where b.age is null
+----------+-----+---------+------+---------+
| username | age | is_male | age | options |
+----------+-----+---------+------+---------+
| lisiiiii | 24 | 1 | NULL | NULL |
| meinvvvv | 18 | 0 | NULL | NULL |
| damaaaaa | 55 | 0 | NULL | NULL |
+----------+-----+---------+------+---------+
Returned 3 row(s) in 0.28s
#右集
select * from d1 a right join d2 b on a.age=b.age
+-----------+------+---------+-----+---------+
| username | age | is_male | age | options |
+-----------+------+---------+-----+---------+
| zhangshan | 23 | 1 | 23 | bb |
| wangmazi | 30 | 1 | 30 | dddd |
| NULL | NULL | NULL | 1 | a |
| NULL | NULL | NULL | 50 | ccc |
| NULL | NULL | NULL | 66 | eeeee |
+-----------+------+---------+-----+---------+
Returned 5 row(s) in 0.31s
#右集-左集=差集
select * from d1 a right join d2 b on a.age=b.age where a.age is null
+----------+------+---------+-----+---------+
| username | age | is_male | age | options |
+----------+------+---------+-----+---------+
| NULL | NULL | NULL | 1 | a |
| NULL | NULL | NULL | 50 | ccc |
| NULL | NULL | NULL | 66 | eeeee |
+----------+------+---------+-----+---------+
Returned 3 row(s) in 0.31s
#并集
select * from d1 a full outer join d2 b on a.age=b.age
+-----------+------+---------+------+---------+
| username | age | is_male | age | options |
+-----------+------+---------+------+---------+
| zhangshan | 23 | 1 | 23 | bb |
| lisiiiii | 24 | 1 | NULL | NULL |
| wangmazi | 30 | 1 | 30 | dddd |
| meinvvvv | 18 | 0 | NULL | NULL |
| damaaaaa | 55 | 0 | NULL | NULL |
| NULL | NULL | NULL | 1 | a |
| NULL | NULL | NULL | 50 | ccc |
| NULL | NULL | NULL | 66 | eeeee |
+-----------+------+---------+------+---------+
Returned 8 row(s) in 0.31s
#并集-交集
select * from d1 a full outer join d2 b on a.age=b.age where a.age is null or b.age is null
+----------+------+---------+------+---------+
| username | age | is_male | age | options |
+----------+------+---------+------+---------+
| lisiiiii | 24 | 1 | NULL | NULL |
| meinvvvv | 18 | 0 | NULL | NULL |
| damaaaaa | 55 | 0 | NULL | NULL |
| NULL | NULL | NULL | 1 | a |
| NULL | NULL | NULL | 50 | ccc |
| NULL | NULL | NULL | 66 | eeeee |
+----------+------+---------+------+---------+
Returned 6 row(s) in 0.31s
#多个条件下的左集
select * from d1 a left join d2 b on a.age=b.age and a.username in ('zhangshan','lisiiiii','meinvvvv') and b.options in ('bb','ccc')
+-----------+-----+---------+------+---------+
| username | age | is_male | age | options |
+-----------+-----+---------+------+---------+
| zhangshan | 23 | 1 | 23 | bb |
| lisiiiii | 24 | 1 | NULL | NULL |
| wangmazi | 30 | 1 | NULL | NULL |
| meinvvvv | 18 | 0 | NULL | NULL |
| damaaaaa | 55 | 0 | NULL | NULL |
+-----------+-----+---------+------+---------+
Returned 5 row(s) in 0.30s
#多个条件下的左集
select * from d1 a left join d2 b on a.age=b.age and a.username in ('zhangshan','lisiiiii','meinvvvv')
+-----------+-----+---------+------+---------+
| username | age | is_male | age | options |
+-----------+-----+---------+------+---------+
| zhangshan | 23 | 1 | 23 | bb |
| lisiiiii | 24 | 1 | NULL | NULL |
| wangmazi | 30 | 1 | NULL | NULL |
| meinvvvv | 18 | 0 | NULL | NULL |
| damaaaaa | 55 | 0 | NULL | NULL |
+-----------+-----+---------+------+---------+
Returned 5 row(s) in 0.31s
#两个表分别多个条件下的左集
select * from (select * from d1 where username in ('zhangshan','lisiiiii','meinvvvv')) as a
left join (select * from d2 where options in ('bb','ccc')) as b on a.age=b.age
+-----------+-----+---------+------+---------+
| username | age | is_male | age | options |
+-----------+-----+---------+------+---------+
| zhangshan | 23 | 1 | 23 | bb |
| lisiiiii | 24 | 1 | NULL | NULL |
| meinvvvv | 18 | 0 | NULL | NULL |
+-----------+-----+---------+------+---------+
Returned 3 row(s) in 0.32s
#两个表分别多个条件下的左集-右集=差集
select * from (select * from d1 where username in ('zhangshan','lisiiiii','meinvvvv')) as a
left join (select * from d2 where options in ('bb','ccc')) as b on a.age=b.age where b.age is null
+----------+-----+---------+------+---------+
| username | age | is_male | age | options |
+----------+-----+---------+------+---------+
| lisiiiii | 24 | 1 | NULL | NULL |
| meinvvvv | 18 | 0 | NULL | NULL |
+----------+-----+---------+------+---------+
Returned 2 row(s) in 0.31s
#差集的另一种办法 (注意:Impala 2.0 版本开始支持下面的语句,1.x 版本不支持。)
select distinct age from d1 where username in ('zhangshan','lisiiiii','meinvvvv') and age not in (select distinct age from d2 where options in ('bb','ccc'))
ERROR: AnalysisException: Syntax error in line 1:
...einvvvv') and age not in (select distinct age from d2 ...
^
Encountered: SELECT
Expected: AVG, CASE, CAST, COUNT, DISTINCTPC, DISTINCTPCSA, FALSE,
GROUP_CONCAT, IF, INTERVAL, MAX, MIN, NDV, NOT, NULL, SUM, TRUE, IDENTIFIER
CAUSED BY: Exception: Syntax error
Could not execute command: select distinct age from d1 where username in
('zhangshan','lisiiiii','meinvvvv') and age not in (select distinct age from
d2 where options in ('bb','ccc'))
Starting Impala Shell without Kerberos authentication
Connected to hadoop3:21000
Server version: impalad version 1.2.4 RELEASE (build ac29ae09d66c1244fe2ceb293083723226e66c1a)
use db02
show tables
+------+
| name |
+------+
| d1 |
| d2 |
+------+
Returned 2 row(s) in 0.01s
select * from d1
+-----------+-----+---------+
| username | age | is_male |
+-----------+-----+---------+
| zhangshan | 23 | 1 |
| lisiiiii | 24 | 1 |
| wangmazi | 30 | 1 |
| meinvvvv | 18 | 0 |
| damaaaaa | 55 | 0 |
+-----------+-----+---------+
Returned 5 row(s) in 0.19s
select * from d2
+-----+---------+
| age | options |
+-----+---------+
| 1 | a |
| 23 | bb |
| 50 | ccc |
| 30 | dddd |
| 66 | eeeee |
+-----+---------+
Returned 5 row(s) in 0.16s
#交集
select * from d1 a inner join d2 b on a.age=b.age
+-----------+-----+---------+-----+---------+
| username | age | is_male | age | options |
+-----------+-----+---------+-----+---------+
| zhangshan | 23 | 1 | 23 | bb |
| wangmazi | 30 | 1 | 30 | dddd |
+-----------+-----+---------+-----+---------+
Returned 2 row(s) in 0.29s
#多个条件下的交集
select * from d1 a inner join d2 b on a.age=b.age and a.username in ('zhangshan','lisiiiii','meinvvvv') and b.options in ('bb','ccc')
+-----------+-----+---------+-----+---------+
| username | age | is_male | age | options |
+-----------+-----+---------+-----+---------+
| zhangshan | 23 | 1 | 23 | bb |
+-----------+-----+---------+-----+---------+
Returned 1 row(s) in 0.33s
#左集
select * from d1 a left join d2 b on a.age=b.age
+-----------+-----+---------+------+---------+
| username | age | is_male | age | options |
+-----------+-----+---------+------+---------+
| zhangshan | 23 | 1 | 23 | bb |
| lisiiiii | 24 | 1 | NULL | NULL |
| wangmazi | 30 | 1 | 30 | dddd |
| meinvvvv | 18 | 0 | NULL | NULL |
| damaaaaa | 55 | 0 | NULL | NULL |
+-----------+-----+---------+------+---------+
Returned 5 row(s) in 0.28s
#左集-右集=差集
select * from d1 a left join d2 b on a.age=b.age where b.age is null
+----------+-----+---------+------+---------+
| username | age | is_male | age | options |
+----------+-----+---------+------+---------+
| lisiiiii | 24 | 1 | NULL | NULL |
| meinvvvv | 18 | 0 | NULL | NULL |
| damaaaaa | 55 | 0 | NULL | NULL |
+----------+-----+---------+------+---------+
Returned 3 row(s) in 0.28s
#右集
select * from d1 a right join d2 b on a.age=b.age
+-----------+------+---------+-----+---------+
| username | age | is_male | age | options |
+-----------+------+---------+-----+---------+
| zhangshan | 23 | 1 | 23 | bb |
| wangmazi | 30 | 1 | 30 | dddd |
| NULL | NULL | NULL | 1 | a |
| NULL | NULL | NULL | 50 | ccc |
| NULL | NULL | NULL | 66 | eeeee |
+-----------+------+---------+-----+---------+
Returned 5 row(s) in 0.31s
#右集-左集=差集
select * from d1 a right join d2 b on a.age=b.age where a.age is null
+----------+------+---------+-----+---------+
| username | age | is_male | age | options |
+----------+------+---------+-----+---------+
| NULL | NULL | NULL | 1 | a |
| NULL | NULL | NULL | 50 | ccc |
| NULL | NULL | NULL | 66 | eeeee |
+----------+------+---------+-----+---------+
Returned 3 row(s) in 0.31s
#并集
select * from d1 a full outer join d2 b on a.age=b.age
+-----------+------+---------+------+---------+
| username | age | is_male | age | options |
+-----------+------+---------+------+---------+
| zhangshan | 23 | 1 | 23 | bb |
| lisiiiii | 24 | 1 | NULL | NULL |
| wangmazi | 30 | 1 | 30 | dddd |
| meinvvvv | 18 | 0 | NULL | NULL |
| damaaaaa | 55 | 0 | NULL | NULL |
| NULL | NULL | NULL | 1 | a |
| NULL | NULL | NULL | 50 | ccc |
| NULL | NULL | NULL | 66 | eeeee |
+-----------+------+---------+------+---------+
Returned 8 row(s) in 0.31s
#并集-交集
select * from d1 a full outer join d2 b on a.age=b.age where a.age is null or b.age is null
+----------+------+---------+------+---------+
| username | age | is_male | age | options |
+----------+------+---------+------+---------+
| lisiiiii | 24 | 1 | NULL | NULL |
| meinvvvv | 18 | 0 | NULL | NULL |
| damaaaaa | 55 | 0 | NULL | NULL |
| NULL | NULL | NULL | 1 | a |
| NULL | NULL | NULL | 50 | ccc |
| NULL | NULL | NULL | 66 | eeeee |
+----------+------+---------+------+---------+
Returned 6 row(s) in 0.31s
#多个条件下的左集
select * from d1 a left join d2 b on a.age=b.age and a.username in ('zhangshan','lisiiiii','meinvvvv') and b.options in ('bb','ccc')
+-----------+-----+---------+------+---------+
| username | age | is_male | age | options |
+-----------+-----+---------+------+---------+
| zhangshan | 23 | 1 | 23 | bb |
| lisiiiii | 24 | 1 | NULL | NULL |
| wangmazi | 30 | 1 | NULL | NULL |
| meinvvvv | 18 | 0 | NULL | NULL |
| damaaaaa | 55 | 0 | NULL | NULL |
+-----------+-----+---------+------+---------+
Returned 5 row(s) in 0.30s
#多个条件下的左集
select * from d1 a left join d2 b on a.age=b.age and a.username in ('zhangshan','lisiiiii','meinvvvv')
+-----------+-----+---------+------+---------+
| username | age | is_male | age | options |
+-----------+-----+---------+------+---------+
| zhangshan | 23 | 1 | 23 | bb |
| lisiiiii | 24 | 1 | NULL | NULL |
| wangmazi | 30 | 1 | NULL | NULL |
| meinvvvv | 18 | 0 | NULL | NULL |
| damaaaaa | 55 | 0 | NULL | NULL |
+-----------+-----+---------+------+---------+
Returned 5 row(s) in 0.31s
#两个表分别多个条件下的左集
select * from (select * from d1 where username in ('zhangshan','lisiiiii','meinvvvv')) as a
left join (select * from d2 where options in ('bb','ccc')) as b on a.age=b.age
+-----------+-----+---------+------+---------+
| username | age | is_male | age | options |
+-----------+-----+---------+------+---------+
| zhangshan | 23 | 1 | 23 | bb |
| lisiiiii | 24 | 1 | NULL | NULL |
| meinvvvv | 18 | 0 | NULL | NULL |
+-----------+-----+---------+------+---------+
Returned 3 row(s) in 0.32s
#两个表分别多个条件下的左集-右集=差集
select * from (select * from d1 where username in ('zhangshan','lisiiiii','meinvvvv')) as a
left join (select * from d2 where options in ('bb','ccc')) as b on a.age=b.age where b.age is null
+----------+-----+---------+------+---------+
| username | age | is_male | age | options |
+----------+-----+---------+------+---------+
| lisiiiii | 24 | 1 | NULL | NULL |
| meinvvvv | 18 | 0 | NULL | NULL |
+----------+-----+---------+------+---------+
Returned 2 row(s) in 0.31s
#差集的另一种办法 (注意:Impala 2.0 版本开始支持下面的语句,1.x 版本不支持。)
select distinct age from d1 where username in ('zhangshan','lisiiiii','meinvvvv') and age not in (select distinct age from d2 where options in ('bb','ccc'))
ERROR: AnalysisException: Syntax error in line 1:
...einvvvv') and age not in (select distinct age from d2 ...
^
Encountered: SELECT
Expected: AVG, CASE, CAST, COUNT, DISTINCTPC, DISTINCTPCSA, FALSE,
GROUP_CONCAT, IF, INTERVAL, MAX, MIN, NDV, NOT, NULL, SUM, TRUE, IDENTIFIER
CAUSED BY: Exception: Syntax error
Could not execute command: select distinct age from d1 where username in
('zhangshan','lisiiiii','meinvvvv') and age not in (select distinct age from
d2 where options in ('bb','ccc'))
相关文章推荐
- SQL查询交集、并集、差集
- SQL查询:并集、差集、交集
- Java中使用Set进行并集,差集,交集查找
- oracle中sql语句中多个查询结果的交集、差集和并集
- Oracle 多个查询结果的交集、差集和并集
- SQL集合运算 差集 并集 交集
- 在SQL中使用convert函数进行日期的查询的代码
- SQL的并集UNION,交集JOIN,交叉连接(CROSS JOIN笛卡尔积),差集(NOT IN)
- 构造使用IN子句的动态Transact-SQL方法进行编号查询
- 使用SQL语句对表进行分页查询
- 构造使用IN子句的动态Transact-SQL方法进行编号查询
- Hibernate学习31 -- Hibernate查询语言(HQL)5 -- 直接使用sql进行查询
- 使用sql-server进行分布式查询(链接服务器)
- 在Hibernate中使用HibernateTemplate来进行包含sql语句的查询
- ORACLE 数据查询集合即:查询结果的集合操作 并集 交集 差集
- SQL集合运算:差集、交集、并集
- 构造使用IN子句的动态Transact-SQL方法进行编号查询
- 在SQL中使用convert函数进行日期的查询的代码
- 在SQL中使用convert函数进行日期的查询
- sql关键字 左连接 右连接 外连接 交叉连接 交集 并集 差集 批量更新/删除