19. ユーザ数の多い地域ランキング
location(任意テキスト) の代わりにtime_zoneで算
出
SELECT
RANK() OVER ( ORDER BY users DESC ) AS ranking,
*
FROM (
SELECT
time_zone, -- not use 'location'
COUNT(1) AS users
FROM
dfs.`tmp/twitter/user_2013_demo`
WHERE
NOT (time_zone = '') -- 30 sec
GROUP BY
time_zone )
ORDER BY
ranking
LIMIT 1000
20. rank time_zone users
1 Central Time (US & Canada) 15,198,713
2 Eastern Time (US & Canada) 14,932,129
3 Pacific Time (US & Canada) 13,234,999
4 Tokyo 11,972,200
5 Hawaii 9,652,707
6 Brasilia 9,350,127
7 Quito 8,553,937
8 Santiago 6,684,055
9 Greenland 6,330,616
10 Bangkok 6,251,371
23. フォロワー数ランキング
SELECT
RANK() OVER ( ORDER BY followers_count DESC ) AS ranking,
screen_name,
followers_count
FROM
dfs.`tmp/twitter/user_2013_demo`
WHERE
followers_count > 1E5 -- 40 sec
ORDER BY
ranking
LIMIT 1000
25. フォロー数ランキング
SELECT
RANK() OVER ( ORDER BY friends_count DESC ) AS ranking,
screen_name,
friends_count
FROM
dfs.`tmp/twitter/user_2013_demo`
WHERE
friends_count > 1E5 -- 40 sec
ORDER BY
ranking
LIMIT 1000
27. 有名人の影響力を調べる
仮) フォロワー数10,000人以上を有名人とする
SELECT
SUM( followers_count ) AS total_followers,
SUM( CASE WHEN followers_count > 10000 THEN followers_count ELSE 0
SUM( CASE WHEN followers_count <= 10000 THEN followers_count ELSE 0
COUNT( 1 ) AS total_users,
SUM( CASE WHEN followers_count > 10000 THEN 1 END ) AS celb_users,
SUM( CASE WHEN followers_count <= 10000 THEN 1 ELSE 0 END ) AS normal_users
FROM
dfs.`tmp/twitter/user_2013_demo`
28. 有名人の影響力を調べる
count percent
total_followers 52,002,443,928 100.0 %
celb_followers 21,410,243,998 41.2 %
normal_followers 30,592,199,930 58.8 %
count percent
total_users 857,645,612 100.0 %
celb_users 381,567 4.44 %
normal_users 857,264,045 95.56 %
"The rich get richer and the poor get poorer"