当前位置：首页 > news >正文

临沂培训学校网站建设西安比较好的软件公司

news 2025/12/27 12:32:54

临沂培训学校网站建设,西安比较好的软件公司,优质的做pc端网站,网站服务器收费目录 1.删除2.更新#xff1a;3.行转列#xff1a;4.列转行#xff1a;5.分析函数#xff1a;6.多维分析7.数据倾斜groupby#xff1a;join#xff1a; 掌握下面的技巧#xff0c;你的SQL水平将有一个质的提升#xff01; 1.删除正常hive删除操作基本都是覆盖原数据3.行转列4.列转行5.分析函数6.多维分析7.数据倾斜groupbyjoin 掌握下面的技巧你的SQL水平将有一个质的提升 1.删除正常hive删除操作基本都是覆盖原数据 insert overwrite tmp select * from tmp where id ! 666;2.更新更新也是覆盖操作 insert overwrite tmp select id,label,if(id 1 and label grade,25,value) as value from tmp where id ! 666;3.行转列思路1 先通过concat函数把多列数据拼接成一个长的字符串分割符为逗号再通过explode函数炸裂成多行然后使用split函数根据分隔符进行切割 -- Step03最后将info的内容切分 select id,split(info,:)[0] as label,split(info,:)[1] as value from ( -- Step01先将数据拼接成“heit:180,weit:60,age:26”select id,concat(heit,:,height,,,weit,:,weight,,,age,:,age) as value from tmp ) as tmp -- Step02然后在借用explode函数将数据膨胀至多行 lateral view explode(split(value,,)) mytable as info;思路2使用union all函数,多段union select id,heit as label,height as value union all select id,weit as label,weight as value union all select id,age as label,age as value4.列转行思路1:多表join进行关联 select tmp1.id as id,tmp1.value as height,tmp2.value as weight,tmp3.value as age from (select id,label,value from tmp2 where label heit) as tmp1 join on tmp1.id tmp2.id (select id,label,value from tmp2 where label weit) as tmp2 join on tmp1.id tmp2.id (select id,label,value from tmp2 where label age) as tmp3 on tmp1.id tmp3.id;思路2使用max(if) 或max(case when ),可以根据实际情况换成sum函数 select id, max(case when label heit then value end) as height, max(case when label weit then value end) as weight, max(case when label age then value end) as age from tmp2 group by id; 思路3map的思想先拼接成map的形式再取下标 select id,tmpmap[height] as height,tmpmap[weight] as weight,tmpmap[age] as age from (select id,str_to_map(concat_ws(,,collect_set(concat(label,:,value))),,,:) as tmpmap from tmp2 group by id ) as tmp1;5.分析函数 select id,label,value,lead(value,1,0)over(partition by id order by label) as lead,lag(value,1,999)over(partition by id order by label) as lag,first_value(value)over(partition by id order by label) as first_value,last_value(value)over(partition by id order by label) as last_value from tmp;select id,label,value,row_number()over(partition by id order by value) as row_number,rank()over(partition by id order by value) as rank,dense_rank()over(partition by id order by value) as dense_rank from tmp;6.多维分析 select col1,col2,col3,count(1),Grouping__ID from tmp group by col1,col2,col3 grouping sets(col1,col2,col3,(col1,col2),(col1,col3),(col2,col3),())select col1,col2,col3,count(1),Grouping__ID from tmp group by col1,col2,col3 with cube;7.数据倾斜 groupby select label,sum(cnt) as all from (select rd,label,sum(1) as cnt from (select id,label,round(rand(),2) as rd,value from tmp1) as tmpgroup by rd,label ) as tmp group by label;join select label,sum(value) as all from (select rd,label,sum(value) as cnt from(select tmp1.rd as rd,tmp1.label as label,tmp1.value*tmp2.value as value from (select id,round(rand(),1) as rd,label,value from tmp1) as tmp1join(select id,rd,label,value from tmp2lateral view explode(split(0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,,)) mytable as rd) as tmp2on tmp1.rd tmp2.rd and tmp1.label tmp2.label) as tmp1group by rd,label ) as tmp1 group by label;

查看全文

http://wiki.neutronadmin.com/news/202254/