Apache Kylin : Analytical Data Warehouse for Big Data
Welcome to Kylin Wiki.
Prepare data and schema
LambdaTable
CREATE EXTERNAL TABLE IF NOT EXISTS lambda_flat_table ( -- event timestamp and debug purpose column EVENT_TIME timestamp, str_minute_second string COMMENT "For debug purpose, maybe check timezone etc" -- dimension column act_type string COMMENT "What did user interact with our mobile app in this event", user_devide_type string COMMENT "Which kind of device did user use in this event", location_city string COMMENT "Which city did user locate in this event", video_id bigint "Which video did user watch in this event", device_brand string, page_id string, -- measure column play_times bigint, play_duration decimal(23, 10), pageview_id string COMMENT "Identier", -- for kylin used (dimension) ,MINUTE_START timestamp, ,HOUR_START timestamp, ,MONTH_START date ) COMMENT 'Fact table. Store raw user action log.' PARTITIONED BY (DAY_START date) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE LOCATION 'hdfs:///LACUS/lambda_data/lambda_flat_table';
Message
{ "content_list":[ "22", "22", "22" ], "act_type":"click", "event_ts_2":1600877255000, "event_ts":1600877255000, "user_detail":{ "devide_type":7, "location":{ "city":"shenzhen" }, "network_type":3 }, "video_id":22, "event_date_2":"2020-09-23 16:07:35.000+08:00", "str_minute":"7", "video_type":"3c8416", "play_times":22, "pageview_id":"3c84cf9d-b8fb-3dec-8b8c-f510c4b6fd097", "active_minutes":50.0208, "device_brand":"vivo", "str_minute_second":"16_7_35", "play_duration":37.6584, "event_date":"2020-09-23 16:07:35.000+08:00", "page_id":"page_22", "str_second":"35", "uid":2 }
SendMsg
# bin/kafka-topics.sh --create --topic useraction --zookeeper cdh-master --partitions 10 --replication-factor 1 rm -rf out.data python fake.py | kafka-console-producer --t opic useraction_xxyu --broker-list cdh-master:9092,cdh-worker-1:9092,cdh-worker-2:9092
script
nohup sh SendMsg.sh > start.log &
Prepare Kylin
kylin.stream.cube.duration=3600 kylin.stream.build.additional.cuboids=true kylin.stream.metrics.option=console kylin.stream.hive.database-for-lambda-cube=lambda_311 kylin.stream.event.timezone=GMT+8 kylin.stream.print-realtime-dict-enabled=true
Overview
Content Tools
ThemeBuilder
Apps