标签:
[hadoop@vm1 ~]$ cat orders.data 1 apple 30 x 2 apple 50 x 3 banana 30 y 4 pear 20 y 5 banana 10 y [hadoop@vm1 ~]$
data = load ‘/orders.data‘ as (orderid:int, fruit:chararray, amount:int); grpd = group data by fruit;
describe grpd; grpd: {group: chararray,data: {(orderid: int,fruit: chararray,amount: int)}}
dump grpd; (pear,{(4,pear,20)}) (apple,{(2,apple,50),(1,apple,30)}) (banana,{(5,banana,10),(3,banana,30)})
dump grpd; (pear,{(4,pear,20)}) (apple,{(2,apple,50),(1,apple,30)}) (banana,{(5,banana,10),(3,banana,30)})
group data by $0+$1;
orders = load ‘/orders.data‘ as (orderid:int, fruit:chararray, amount:int, type:chararray); grpd = group orders by (fruit, type); describe grpd; grpd: {group: (fruit: chararray,type: chararray),orders: {(orderid: int,fruit: chararray,amount: int,type: chararray)}} dump grpd; ((pear,y),{(4,pear,20,y)}) ((apple,x),{(2,apple,50,x),(1,apple,30,x)}) ((banana,y),{(5,banana,10,y),(3,banana,30,y)})
sums = foreach grpd generate group, SUM(orders.amount); dump sums; ((pear,y),20) ((apple,x),80) ((banana,y),40)
sums2 = foreach grpd generate group.$0, group.$1, SUM(orders.amount); dump sums2; (pear,y,20) (apple,x,80) (banana,y,40
grpd = group orders all; describe grpd; grpd: {group: chararray,orders: {(orderid: int,fruit: chararray,amount: int,type: chararray)}} dump grpd; (all,{(5,banana,10,y),(4,pear,20,y),(3,banana,30,y),(2,apple,50,x),(1,apple,30,x)})
A = LOAD ‘data1‘ AS (owner:chararray,pet:chararray); DUMP A; (Alice,turtle) (Alice,goldfish) (Alice,cat) (Bob,dog) (Bob,cat) B = LOAD ‘data2‘ AS (friend1:chararray,friend2:chararray); DUMP B; (Cindy,Alice) (Mark,Alice) (Paul,Bob) (Paul,Jane) X = COGROUP A BY owner, B BY friend2; DESCRIBE X; X: {group: chararray,A: {owner: chararray,pet: chararray},B: {friend1: chararray,friend2: chararray}} DUMP X; (Alice,{(Alice,turtle),(Alice,goldfish),(Alice,cat)},{(Cindy,Alice),(Mark,Alice)}) (Bob,{(Bob,dog),(Bob,cat)},{(Paul,Bob)}) (Jane,{},{(Paul,Jane)})
A = LOAD ‘input_data‘; B = GROUP A BY $0 PARTITION BY org.apache.pig.test.utils.SimpleCustomPartitioner PARALLEL 2;
public class SimpleCustomPartitioner extends Partitioner <PigNullableWritable, Writable> { //@Override public int getPartition(PigNullableWritable key, Writable value, int numPartitions) { if(key.getValueAsPigType() instanceof Integer) { int ret = (((Integer)key.getValueAsPigType()).intValue() % numPartitions); return ret; } else { return (key.hashCode()) % numPartitions; } } }
标签:
原文地址:http://www.cnblogs.com/lishouguang/p/4559593.html