CREATE TABLE feedback_vnr(comments STRING);
load data LOCAL INPATH
'/home/cloudera/Desktop/file.txt' INTO TABLE feedback_vnr;
select * from feedback_vnr;
select split(comments,' ') FROM feedback_vnr;
select explode(split(comments,' ')) FROM
feedback_vnr
select word,count(*) from (select
explode(split(comments,' ')) as word from feedback_vnr) tmp GROUP BY word;
___________________________
A = load '/user/cloudera/55';
B = foreach A generate flatten(TOKENIZE((chararray)$0)) as word;
C = filter B by word matches '\\w+';
D = group C by word;
E = foreach D generate COUNT(C),group;
store E into '/user/cloudera/n66';
____________________________
Most
occurred first character in the word of a
file
lines = LOAD
'/user/cloudera/my-friends' AS (line: chararray);
tokens = FOREACH lines GENERATE flatten(TOKENIZE(line))
As token:chararray;
letters = FOREACH tokens GENERATE SUBSTRING(token,0,1) As
letter:chararray;
lettergrp = GROUP letters by letter;
countletter =
FOREACH lettergrp GENERATE group,COUNT(letters);
OrderCnt = ORDER countletter BY $1 DESC;
result = LIMIT OrderCnt 1;
STORE result into '/user/cloudera/dummy5556777777';
____________________________________
tier1.sources = source1
tier1.channels = channel1
tier1.sinks = sink1
tier1.sources.source1.type = netcat
tier1.sources.source1.bind = 127.0.0.1
tier1.sources.source1.port = 44444
tier1.sources.source1.channels = channel1
tier1.channels.channel1.type = memory
tier1.channels.channel1.capacity = 100
tier1.sinks.sink1.type= HDFS
tier1.sinks.sink1.fileType=DataStream
tier1.sinks.sink1.channel = channel1
tier1.sinks.sink1.hdfs.path = hdfs://localhost:8020/user/cloudera/flume/events_manish_rvim
____________________________________
tier1.sources = source1
tier1.channels = channel1
tier1.sinks = sink1
tier1.sources.source1.type = netcat
tier1.sources.source1.bind = 127.0.0.1
tier1.sources.source1.port = 44444
tier1.sources.source1.channels = channel1
tier1.channels.channel1.type = memory
tier1.channels.channel1.capacity = 100
tier1.sinks.sink1.type= HDFS
tier1.sinks.sink1.fileType=DataStream
tier1.sinks.sink1.channel = channel1
tier1.sinks.sink1.hdfs.path = hdfs://localhost:8020/user/cloudera/flume/events_manish_rvim
No comments:
Post a Comment