您的位置:首页 > 数据库

使用cloudera impala sql 进行交集、并集、差集查询

2014-12-25 21:30 429 查看
impala-shell -i hadoop3 -f join_test.sql

Starting Impala Shell without Kerberos authentication

Connected to hadoop3:21000

Server version: impalad version 1.2.4 RELEASE (build ac29ae09d66c1244fe2ceb293083723226e66c1a)

use db02

show tables

+------+

| name |

+------+

| d1   |

| d2   |

+------+

Returned 2 row(s) in 0.01s

select * from d1

+-----------+-----+---------+

| username  | age | is_male |

+-----------+-----+---------+

| zhangshan | 23  | 1       |

| lisiiiii  | 24  | 1       |

| wangmazi  | 30  | 1       |

| meinvvvv  | 18  | 0       |

| damaaaaa  | 55  | 0       |

+-----------+-----+---------+

Returned 5 row(s) in 0.19s

select * from d2

+-----+---------+

| age | options |

+-----+---------+

| 1   | a       |

| 23  | bb      |

| 50  | ccc     |

| 30  | dddd    |

| 66  | eeeee   |

+-----+---------+

Returned 5 row(s) in 0.16s

#交集

select * from d1 a inner join d2 b on a.age=b.age

+-----------+-----+---------+-----+---------+

| username  | age | is_male | age | options |

+-----------+-----+---------+-----+---------+

| zhangshan | 23  | 1       | 23  | bb      |

| wangmazi  | 30  | 1       | 30  | dddd    |

+-----------+-----+---------+-----+---------+

Returned 2 row(s) in 0.29s

#多个条件下的交集

select * from d1 a inner join d2 b on a.age=b.age and a.username in ('zhangshan','lisiiiii','meinvvvv') and b.options in ('bb','ccc')

+-----------+-----+---------+-----+---------+

| username  | age | is_male | age | options |

+-----------+-----+---------+-----+---------+

| zhangshan | 23  | 1       | 23  | bb      |

+-----------+-----+---------+-----+---------+

Returned 1 row(s) in 0.33s

#左集

select * from d1 a left  join d2 b on a.age=b.age

+-----------+-----+---------+------+---------+

| username  | age | is_male | age  | options |

+-----------+-----+---------+------+---------+

| zhangshan | 23  | 1       | 23   | bb      |

| lisiiiii  | 24  | 1       | NULL | NULL    |

| wangmazi  | 30  | 1       | 30   | dddd    |

| meinvvvv  | 18  | 0       | NULL | NULL    |

| damaaaaa  | 55  | 0       | NULL | NULL    |

+-----------+-----+---------+------+---------+

Returned 5 row(s) in 0.28s

#左集-右集=差集

select * from d1 a left  join d2 b on a.age=b.age where b.age is null

+----------+-----+---------+------+---------+

| username | age | is_male | age  | options |

+----------+-----+---------+------+---------+

| lisiiiii | 24  | 1       | NULL | NULL    |

| meinvvvv | 18  | 0       | NULL | NULL    |

| damaaaaa | 55  | 0       | NULL | NULL    |

+----------+-----+---------+------+---------+

Returned 3 row(s) in 0.28s

#右集

select * from d1 a right join d2 b on a.age=b.age

+-----------+------+---------+-----+---------+

| username  | age  | is_male | age | options |

+-----------+------+---------+-----+---------+

| zhangshan | 23   | 1       | 23  | bb      |

| wangmazi  | 30   | 1       | 30  | dddd    |

| NULL      | NULL | NULL    | 1   | a       |

| NULL      | NULL | NULL    | 50  | ccc     |

| NULL      | NULL | NULL    | 66  | eeeee   |

+-----------+------+---------+-----+---------+

Returned 5 row(s) in 0.31s

#右集-左集=差集

select * from d1 a right join d2 b on a.age=b.age where a.age is null

+----------+------+---------+-----+---------+

| username | age  | is_male | age | options |

+----------+------+---------+-----+---------+

| NULL     | NULL | NULL    | 1   | a       |

| NULL     | NULL | NULL    | 50  | ccc     |

| NULL     | NULL | NULL    | 66  | eeeee   |

+----------+------+---------+-----+---------+

Returned 3 row(s) in 0.31s

#并集

select * from d1 a full outer join d2 b on a.age=b.age

+-----------+------+---------+------+---------+

| username  | age  | is_male | age  | options |

+-----------+------+---------+------+---------+

| zhangshan | 23   | 1       | 23   | bb      |

| lisiiiii  | 24   | 1       | NULL | NULL    |

| wangmazi  | 30   | 1       | 30   | dddd    |

| meinvvvv  | 18   | 0       | NULL | NULL    |

| damaaaaa  | 55   | 0       | NULL | NULL    |

| NULL      | NULL | NULL    | 1    | a       |

| NULL      | NULL | NULL    | 50   | ccc     |

| NULL      | NULL | NULL    | 66   | eeeee   |

+-----------+------+---------+------+---------+

Returned 8 row(s) in 0.31s

#并集-交集

select * from d1 a full outer join d2 b on a.age=b.age where a.age is null or b.age is null

+----------+------+---------+------+---------+

| username | age  | is_male | age  | options |

+----------+------+---------+------+---------+

| lisiiiii | 24   | 1       | NULL | NULL    |

| meinvvvv | 18   | 0       | NULL | NULL    |

| damaaaaa | 55   | 0       | NULL | NULL    |

| NULL     | NULL | NULL    | 1    | a       |

| NULL     | NULL | NULL    | 50   | ccc     |

| NULL     | NULL | NULL    | 66   | eeeee   |

+----------+------+---------+------+---------+

Returned 6 row(s) in 0.31s

#多个条件下的左集

select * from d1 a left join d2 b on a.age=b.age and a.username in ('zhangshan','lisiiiii','meinvvvv') and b.options in ('bb','ccc')

+-----------+-----+---------+------+---------+

| username  | age | is_male | age  | options |

+-----------+-----+---------+------+---------+

| zhangshan | 23  | 1       | 23   | bb      |

| lisiiiii  | 24  | 1       | NULL | NULL    |

| wangmazi  | 30  | 1       | NULL | NULL    |

| meinvvvv  | 18  | 0       | NULL | NULL    |

| damaaaaa  | 55  | 0       | NULL | NULL    |

+-----------+-----+---------+------+---------+

Returned 5 row(s) in 0.30s

#多个条件下的左集

select * from d1 a left join d2 b on a.age=b.age and a.username in ('zhangshan','lisiiiii','meinvvvv')

+-----------+-----+---------+------+---------+

| username  | age | is_male | age  | options |

+-----------+-----+---------+------+---------+

| zhangshan | 23  | 1       | 23   | bb      |

| lisiiiii  | 24  | 1       | NULL | NULL    |

| wangmazi  | 30  | 1       | NULL | NULL    |

| meinvvvv  | 18  | 0       | NULL | NULL    |

| damaaaaa  | 55  | 0       | NULL | NULL    |

+-----------+-----+---------+------+---------+

Returned 5 row(s) in 0.31s

#两个表分别多个条件下的左集

select * from (select * from d1 where username in ('zhangshan','lisiiiii','meinvvvv')) as a

left join (select * from d2 where options in ('bb','ccc')) as b on a.age=b.age

+-----------+-----+---------+------+---------+

| username  | age | is_male | age  | options |

+-----------+-----+---------+------+---------+

| zhangshan | 23  | 1       | 23   | bb      |

| lisiiiii  | 24  | 1       | NULL | NULL    |

| meinvvvv  | 18  | 0       | NULL | NULL    |

+-----------+-----+---------+------+---------+

Returned 3 row(s) in 0.32s

#两个表分别多个条件下的左集-右集=差集

select * from (select * from d1 where username in ('zhangshan','lisiiiii','meinvvvv')) as a

left join (select * from d2 where options in ('bb','ccc')) as b on a.age=b.age where b.age is null

+----------+-----+---------+------+---------+

| username | age | is_male | age  | options |

+----------+-----+---------+------+---------+

| lisiiiii | 24  | 1       | NULL | NULL    |

| meinvvvv | 18  | 0       | NULL | NULL    |

+----------+-----+---------+------+---------+

Returned 2 row(s) in 0.31s

#差集的另一种办法 (注意:Impala 2.0 版本开始支持下面的语句,1.x 版本不支持。)

select distinct age from d1 where username in ('zhangshan','lisiiiii','meinvvvv') and age not in (select distinct age from d2 where options in ('bb','ccc'))

ERROR: AnalysisException: Syntax error in line 1:

...einvvvv') and age not in (select distinct age from d2 ...

                             ^

Encountered: SELECT

Expected: AVG, CASE, CAST, COUNT, DISTINCTPC, DISTINCTPCSA, FALSE,

GROUP_CONCAT, IF, INTERVAL, MAX, MIN, NDV, NOT, NULL, SUM, TRUE, IDENTIFIER

CAUSED BY: Exception: Syntax error

Could not execute command: select distinct age from d1 where username in

('zhangshan','lisiiiii','meinvvvv') and age not in (select distinct age from

d2 where options in ('bb','ccc'))
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息