Most
occurred first character in the word of a
file
lines = LOAD
'/user/cloudera/my-friends' AS (line: chararray);
tokens = FOREACH lines GENERATE flatten(TOKENIZE(line))
As token:chararray;
letters = FOREACH tokens GENERATE SUBSTRING(token,0,1) As
letter:chararray;
lettergrp = GROUP letters by letter;
countletter =
FOREACH lettergrp GENERATE group,COUNT(letters);
OrderCnt = ORDER countletter BY $1 DESC;
result = LIMIT OrderCnt 1;
STORE result into '/user/cloudera/dummy5556777777';
No comments:
Post a Comment