建表

-- 表名:t2
CREATE TABLE t2(a STRING,b string,c INT);
-- 表结构
INSERT INTO t2 values('2014','A',3),('2014','B',1),('2014','C',2),('2014','A',4),('2015','D',3);
DESCRIBE t2;

image-20220516180959802

1)按a分组取b字段最小时对应的c字段

思路

通过子查询row_number() over(partition by a order by b)从让a分组 取得b字段的升序结果,然后查出abc结果,rn=1为排名第一为最小

SQL

SELECT a,
       c AS min_c
FROM
  (SELECT a,
          b,
          c,
          row_number() over(partition BY a ORDER BY b) AS rn
   FROM t2)a
WHERE rn = 1;

输出结果

image-20220516181015381

2)按a分组取b字段排名第二的c字段

思路

同上,只是结果rn=2即可

SQL

SELECT a,
       c AS second_c
FROM
  (SELECT a,
          b,
          c,
          row_number() over(partition BY a
                            ORDER BY b) AS rn
   FROM t2)a where rn = 2;

输出结果

因为我插入的时候2015年只有一条数据,所以只显示14年的

image-20220516181028195

3)按a分组取b字段最小和最大时的c

思路

跟着上面的思路,可以使用两个窗口函数row_number()求出最大和最小值然后在外面一层使用if判断是否为空取出即可

SQL

INSERT INTO t2 values('2015','d',3);
INSERT INTO t2 values('2015','E',2)SELECT 
     a,
    min(if(rn_asc = 1,c,NULL)) as min_c,
    max(if(rn_desc = 1,c,null)) as max_c
FROM
  (SELECT a,
          b,
          c,
          row_number() OVER(PARTITION BY a ORDER BY b) rn_asc,
          row_number() OVER(PARTITION BY a ORDER BY b DESC) rn_desc
   FROM t2)tmp
   WHERE rn_asc = 1 or rn_desc =1;
   GROUP BY a;

输出结果

image-20220516181042155

4)按照a分组取b字段的第二小和第二大的c

思路

同上,只需将排名改成2即可


SELECT
	a,
	min(if(rn_asc = 2, c, NULL)) as min_c,
	max(if(rn_desc = 2, c, null)) as max_c
FROM
	(
	SELECT
		a,
		b,
		c,
		row_number() OVER(PARTITION BY a
	ORDER BY
		b) rn_asc,
		row_number() OVER(PARTITION BY a
	ORDER BY
		b DESC) rn_desc
	FROM
		t2)tmp
WHERE
	rn_asc = 2
	or rn_desc = 2
GROUP BY
	a;

输出结果

image-20220516181101765

5)按照a分组取b字段的前两小和前两大c

思路

要取出前两小和前两大的话要先查出前两大和前两小然后join在一起,要查出前两大的话肯定是一个数组,那么就得用到collect_set()count_ws()函数进行汇总,然后再嵌套一层子查询通过row_number()窗口分区查询b对应c的排序

SQL

SELECT tmp1.a,
       tmp1.min_c,
       tmp2.max_c
FROM (
         select a,
                concat_ws(',',
                          collect_list(CAST(c as string))) as min_c
         from (
                  select a,
                         b,
                         c,
                         row_number() over (partition by a
                             order by
                                 b) as asc_rn
                  from t2) a
         where asc_rn <= 2
         group by a) tmp1
         JOIN
     (
         select a,
                concat_ws(',',
                          collect_list(CAST(c as string))) as max_c
         from (
                  select a,
                         b,
                         c,
                         row_number() over (partition by a
                             order by
                                 b DESC) as desc_rn
                  from t2) a
         where desc_rn <= 2
         group by a) tmp2
     ON
         tmp1.a = tmp2.a; 

输出结果

image-20220516181113508