求一个界面的广告数量
page adid
page1 1
page1 2
page1 3
page1 4
[root@node1 data]# cat ad.txt
page1,1_3_5_9_10_56
page2,30_123_34_7_9_10
page3,40_50_60_88_23_10_9
hive (test)> create table page_ad(
> page string,
> aids string)
> row format delimited fields terminated by ','
> lines terminated by '\n';
hive (test)> load data local inpath "/opt/data/ad.txt" into table page_ad;
select
adid,
count(*)
from page_ad
lateral view explode(split(aids, '_')) tmp as adid group by adid;
# 有一个文件,内容如下:userid, order_info
1,water:5_friut:10_c:40_d:5
2,a:10_b:20_d:50
3,a:1
4,a:30_d:40
# 案例:统计每个用户的消费金额
order(userid int, product map
# 提醒
1 water 5
1 fruit 10
# 创建表格
create table tt(
userid int,
product map
)
row format delimited
fields terminated by ','
collection items terminated by '_'
map keys terminated by ':'
lines terminated by '\n';
# 查表格
select
id,
tmp.fruit,
tmp.num
from tt
lateral view explode(product) tmp as fruit, num;
# 得出每个用户的消费金额
select
id,
sum(tmp.num)
from tt
lateral view explode(product) tmp as fruit, num
group by id;
# 有一个文件,记录学生三次模拟考试的成绩,每一次模拟考试有三门课
[root@node1 data]# cat score.txt
s001,60_90_20,90_80_30,70_60_39
s002,61_80_50,60_35_42,80_70_40
s003,62_70_60,70_75_56,90_80_41
s004,63_60_40,90_65_77,100_90_42
# 求学生三次模拟考试的平均成绩
s001 60 90 70
s001 90 80 60
s001 20 30 39
create table sc(
> sid int,
> first_score string,
> second_score string,
> third_score string)
> row format delimited by ','
> collection items terminated by '_'
> lines terminated by '\n';
hive (test)> select sid,fs from sc lateral view explode(split(first_score, "_")) tmp1 as fs
> union
> select sid,ss from sc lateral view explode(split(second_score, "_")) tmp1 as ss
> union
> select sid,ts from sc lateral view explode(split(third_score, "_")) tmp1 as ts;
本文来自博客园,作者:jsqup,转载请注明原文链接:https://www.cnblogs.com/jsqup/p/16547712.html