建表
-- 表名:t2
CREATE TABLE t2(a STRING,b string,c INT);
-- 表结构
INSERT INTO t2 values('2014','A',3),('2014','B',1),('2014','C',2),('2014','A',4),('2015','D',3);
DESCRIBE t2;
1)按a分组取b字段最小时对应的c字段
思路
通过子查询row_number() over(partition by a order by b)
从让a分组 取得b字段的升序结果,然后查出abc
结果,rn=1
为排名第一为最小
SQL
SELECT a,
c AS min_c
FROM
(SELECT a,
b,
c,
row_number() over(partition BY a ORDER BY b) AS rn
FROM t2)a
WHERE rn = 1;
输出结果
2)按a分组取b字段排名第二的c字段
思路
同上,只是结果rn=2
即可
SQL
SELECT a,
c AS second_c
FROM
(SELECT a,
b,
c,
row_number() over(partition BY a
ORDER BY b) AS rn
FROM t2)a where rn = 2;
输出结果
因为我插入的时候2015年只有一条数据,所以只显示14年的
3)按a分组取b字段最小和最大时的c
思路
跟着上面的思路,可以使用两个窗口函数row_number()
求出最大和最小值然后在外面一层使用if判断是否为空取出即可
SQL
INSERT INTO t2 values('2015','d',3);
INSERT INTO t2 values('2015','E',2);
SELECT
a,
min(if(rn_asc = 1,c,NULL)) as min_c,
max(if(rn_desc = 1,c,null)) as max_c
FROM
(SELECT a,
b,
c,
row_number() OVER(PARTITION BY a ORDER BY b) rn_asc,
row_number() OVER(PARTITION BY a ORDER BY b DESC) rn_desc
FROM t2)tmp
WHERE rn_asc = 1 or rn_desc =1;
GROUP BY a;
输出结果
4)按照a分组取b字段的第二小和第二大的c
思路
同上,只需将排名改成2即可
SELECT
a,
min(if(rn_asc = 2, c, NULL)) as min_c,
max(if(rn_desc = 2, c, null)) as max_c
FROM
(
SELECT
a,
b,
c,
row_number() OVER(PARTITION BY a
ORDER BY
b) rn_asc,
row_number() OVER(PARTITION BY a
ORDER BY
b DESC) rn_desc
FROM
t2)tmp
WHERE
rn_asc = 2
or rn_desc = 2
GROUP BY
a;
输出结果
5)按照a分组取b字段的前两小和前两大c
思路
要取出前两小和前两大的话要先查出前两大和前两小然后join在一起,要查出前两大的话肯定是一个数组,那么就得用到collect_set()
和count_ws()
函数进行汇总,然后再嵌套一层子查询通过row_number()
窗口分区查询b对应c的排序
SQL
SELECT tmp1.a,
tmp1.min_c,
tmp2.max_c
FROM (
select a,
concat_ws(',',
collect_list(CAST(c as string))) as min_c
from (
select a,
b,
c,
row_number() over (partition by a
order by
b) as asc_rn
from t2) a
where asc_rn <= 2
group by a) tmp1
JOIN
(
select a,
concat_ws(',',
collect_list(CAST(c as string))) as max_c
from (
select a,
b,
c,
row_number() over (partition by a
order by
b DESC) as desc_rn
from t2) a
where desc_rn <= 2
group by a) tmp2
ON
tmp1.a = tmp2.a;