| | 121 | 使用Hive建表查数据,可以不需要编程,直接使用SQL查询所需要数据 |
| | 122 | |
| | 123 | |
| | 124 | {{{ |
| | 125 | CREATE EXTERNAL TABLE DM_USER(COOKIE STRING, SEX INT, AGE INT, PRICE_LEVEL INT, AREA INT, |
| | 126 | PLAN INT, INTEREST1 INT, INTEREST2 INT, INTEREST3 INT, INTEREST4 INT, INTEREST5 INT) |
| | 127 | PARTITIONED BY(DAY STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' |
| | 128 | LOCATION '/dingxiang/warehouse/user/'; |
| | 129 | |
| | 130 | ALTER TABLE DM_USER ADD PARTITION (DAY = '20120322') location '/dingxiang/warehouse/user/20120322'; |
| | 131 | }}} |
| | 132 | |
| | 133 | |
| | 134 | 使用hive见表时要注意输出的路径和压缩方式,如果输出的数据比较多,建议使用压缩,缺省的压缩方式在hadoop里面设置 |
| | 135 | |
| | 136 | {{{ |
| | 137 | set hive.exec.compress.output=true; |
| | 138 | |
| | 139 | create table dm_label_opt_p as select distinct cookie, date_time, click |
| | 140 | from dm_ad_counter where day >= 20120913 and adid = 230072 and cookie <> "null" and click <> 0 and plan <> 4 and noise <> 1; |
| | 141 | |
| | 142 | }}} |
| | 143 | |
| | 144 | ---- |