临沂培训学校网站建设,西安比较好的软件公司,优质的做pc端网站,网站服务器收费目录 1.删除2.更新#xff1a;3.行转列#xff1a;4.列转行#xff1a;5.分析函数#xff1a;6.多维分析7.数据倾斜groupby#xff1a;join#xff1a; 掌握下面的技巧#xff0c;你的SQL水平将有一个质的提升#xff01; 1.删除
正常hive删除操作基本都是覆盖原数据3.行转列4.列转行5.分析函数6.多维分析7.数据倾斜groupbyjoin 掌握下面的技巧你的SQL水平将有一个质的提升 1.删除
正常hive删除操作基本都是覆盖原数据
insert overwrite tmp
select * from tmp where id ! 666;2.更新
更新也是覆盖操作
insert overwrite tmp
select id,label,if(id 1 and label grade,25,value) as value
from tmp where id ! 666;3.行转列
思路1 先通过concat函数把多列数据拼接成一个长的字符串分割符为逗号再通过explode函数炸裂成多行然后使用split函数根据分隔符进行切割
-- Step03最后将info的内容切分
select id,split(info,:)[0] as label,split(info,:)[1] as value
from
(
-- Step01先将数据拼接成“heit:180,weit:60,age:26”select id,concat(heit,:,height,,,weit,:,weight,,,age,:,age) as value from tmp
) as tmp
-- Step02然后在借用explode函数将数据膨胀至多行
lateral view explode(split(value,,)) mytable as info;思路2使用union all函数,多段union
select id,heit as label,height as value
union all
select id,weit as label,weight as value
union all
select id,age as label,age as value4.列转行
思路1:多表join进行关联
select
tmp1.id as id,tmp1.value as height,tmp2.value as weight,tmp3.value as age
from
(select id,label,value from tmp2 where label heit) as tmp1
join
on tmp1.id tmp2.id
(select id,label,value from tmp2 where label weit) as tmp2
join
on tmp1.id tmp2.id
(select id,label,value from tmp2 where label age) as tmp3
on tmp1.id tmp3.id;思路2使用max(if) 或max(case when ),可以根据实际情况换成sum函数
select
id,
max(case when label heit then value end) as height,
max(case when label weit then value end) as weight,
max(case when label age then value end) as age
from tmp2
group by
id;
思路3map的思想先拼接成map的形式再取下标
select
id,tmpmap[height] as height,tmpmap[weight] as weight,tmpmap[age] as age
from
(select id,str_to_map(concat_ws(,,collect_set(concat(label,:,value))),,,:) as tmpmap from tmp2 group by id
) as tmp1;5.分析函数
select id,label,value,lead(value,1,0)over(partition by id order by label) as lead,lag(value,1,999)over(partition by id order by label) as lag,first_value(value)over(partition by id order by label) as first_value,last_value(value)over(partition by id order by label) as last_value
from tmp;select id,label,value,row_number()over(partition by id order by value) as row_number,rank()over(partition by id order by value) as rank,dense_rank()over(partition by id order by value) as dense_rank
from tmp;6.多维分析
select col1,col2,col3,count(1),Grouping__ID
from tmp
group by col1,col2,col3
grouping sets(col1,col2,col3,(col1,col2),(col1,col3),(col2,col3),())select col1,col2,col3,count(1),Grouping__ID
from tmp
group by col1,col2,col3
with cube;7.数据倾斜
groupby
select label,sum(cnt) as all from
(select rd,label,sum(1) as cnt from (select id,label,round(rand(),2) as rd,value from tmp1) as tmpgroup by rd,label
) as tmp
group by label;join
select label,sum(value) as all from
(select rd,label,sum(value) as cnt from(select tmp1.rd as rd,tmp1.label as label,tmp1.value*tmp2.value as value from (select id,round(rand(),1) as rd,label,value from tmp1) as tmp1join(select id,rd,label,value from tmp2lateral view explode(split(0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,,)) mytable as rd) as tmp2on tmp1.rd tmp2.rd and tmp1.label tmp2.label) as tmp1group by rd,label
) as tmp1
group by label;