THIS IS A TEST INSTANCE. ALL YOUR CHANGES WILL BE LOST!!!!

Apache Kylin : Analytical Data Warehouse for Big Data

Page tree

Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

Code Block
languagejs
themeRDark
titleCubeDesc
linenumberstrue
collapsetrue
{
  "uuid": "2e9eaf25-31e0-16e1-42fc-32d83c05255d",
  "last_modified": 1600870179000,
  "version": "3.0.0.20500",
  "name": "UserActionBasicAnslysisCube",
  "is_draft": false,
  "model_name": "LambdaVerifyModel",
  "description": "",
  "null_string": null,
  "dimensions": [
    {
      "name": "USER_DEVIDE_TYPE",
      "table": "LAMBDA_FLAT_TABLE",
      "column": "USER_DEVIDE_TYPE",
      "derived": null
    },
    {
      "name": "LOCATION_CITY",
      "table": "LAMBDA_FLAT_TABLE",
      "column": "LOCATION_CITY",
      "derived": null
    },
    {
      "name": "DEVICE_BRAND",
      "table": "LAMBDA_FLAT_TABLE",
      "column": "DEVICE_BRAND",
      "derived": null
    },
    {
      "name": "STR_MINUTE_SECOND",
      "table": "LAMBDA_FLAT_TABLE",
      "column": "STR_MINUTE_SECOND",
      "derived": null
    },
    {
      "name": "PAGE_ID",
      "table": "LAMBDA_FLAT_TABLE",
      "column": "PAGE_ID",
      "derived": null
    },
    {
      "name": "MONTH_START",
      "table": "LAMBDA_FLAT_TABLE",
      "column": "MONTH_START",
      "derived": null
    },
    {
      "name": "DAY_START",
      "table": "LAMBDA_FLAT_TABLE",
      "column": "DAY_START",
      "derived": null
    },
    {
      "name": "HOUR_START",
      "table": "LAMBDA_FLAT_TABLE",
      "column": "HOUR_START",
      "derived": null
    },
    {
      "name": "MINUTE_START",
      "table": "LAMBDA_FLAT_TABLE",
      "column": "MINUTE_START",
      "derived": null
    }
  ],
  "measures": [
    {
      "name": "_COUNT_",
      "function": {
        "expression": "COUNT",
        "parameter": {
          "type": "constant",
          "value": "1"
        },
        "returntype": "bigint"
      }
    },
    {
      "name": "PV_STAT",
      "function": {
        "expression": "COUNT_DISTINCT",
        "parameter": {
          "type": "column",
          "value": "LAMBDA_FLAT_TABLE.PAGEVIEW_ID"
        },
        "returntype": "hllc(16)"
      }
    },
    {
      "name": "PLAY_STAT",
      "function": {
        "expression": "SUM",
        "parameter": {
          "type": "column",
          "value": "LAMBDA_FLAT_TABLE.PLAY_TIMES"
        },
        "returntype": "bigint"
      }
    }
  ],
  "dictionaries": [],
  "rowkey": {
    "rowkey_columns": [
      {
        "column": "LAMBDA_FLAT_TABLE.USER_DEVIDE_TYPE",
        "encoding": "dict",
        "encoding_version": 1,
        "isShardBy": false
      },
      {
        "column": "LAMBDA_FLAT_TABLE.LOCATION_CITY",
        "encoding": "dict",
        "encoding_version": 1,
        "isShardBy": false
      },
      {
        "column": "LAMBDA_FLAT_TABLE.DEVICE_BRAND",
        "encoding": "dict",
        "encoding_version": 1,
        "isShardBy": false
      },
      {
        "column": "LAMBDA_FLAT_TABLE.STR_MINUTE_SECOND",
        "encoding": "dict",
        "encoding_version": 1,
        "isShardBy": false
      },
      {
        "column": "LAMBDA_FLAT_TABLE.PAGE_ID",
        "encoding": "dict",
        "encoding_version": 1,
        "isShardBy": false
      },
      {
        "column": "LAMBDA_FLAT_TABLE.MONTH_START",
        "encoding": "date",
        "encoding_version": 1,
        "isShardBy": false
      },
      {
        "column": "LAMBDA_FLAT_TABLE.DAY_START",
        "encoding": "date",
        "encoding_version": 1,
        "isShardBy": false
      },
      {
        "column": "LAMBDA_FLAT_TABLE.HOUR_START",
        "encoding": "time",
        "encoding_version": 1,
        "isShardBy": false
      },
      {
        "column": "LAMBDA_FLAT_TABLE.MINUTE_START",
        "encoding": "time",
        "encoding_version": 1,
        "isShardBy": false
      }
    ]
  },
  "hbase_mapping": {
    "column_family": [
      {
        "name": "F1",
        "columns": [
          {
            "qualifier": "M",
            "measure_refs": [
              "_COUNT_",
              "PLAY_STAT"
            ]
          }
        ]
      },
      {
        "name": "F2",
        "columns": [
          {
            "qualifier": "M",
            "measure_refs": [
              "PV_STAT"
            ]
          }
        ]
      }
    ]
  },
  "aggregation_groups": [
    {
      "includes": [
        "LAMBDA_FLAT_TABLE.USER_DEVIDE_TYPE",
        "LAMBDA_FLAT_TABLE.LOCATION_CITY",
        "LAMBDA_FLAT_TABLE.DEVICE_BRAND",
        "LAMBDA_FLAT_TABLE.STR_MINUTE_SECOND",
        "LAMBDA_FLAT_TABLE.PAGE_ID",
        "LAMBDA_FLAT_TABLE.MONTH_START",
        "LAMBDA_FLAT_TABLE.DAY_START",
        "LAMBDA_FLAT_TABLE.HOUR_START",
        "LAMBDA_FLAT_TABLE.MINUTE_START"
      ],
      "select_rule": {
        "hierarchy_dims": [],
        "mandatory_dims": [
          "LAMBDA_FLAT_TABLE.USER_DEVIDE_TYPE",
          "LAMBDA_FLAT_TABLE.STR_MINUTE_SECOND",
          "LAMBDA_FLAT_TABLE.DEVICE_BRAND"
        ],
        "joint_dims": []
      }
    }
  ],
  "signature": "z2i5rv8LtjgoSByK5a/Y9w==",
  "notify_list": [],
  "status_need_notify": [
    "ERROR",
    "DISCARDED",
    "SUCCEED"
  ],
  "partition_date_start": 0,
  "partition_date_end": 3153600000000,
  "auto_merge_time_ranges": [
    604800000,
    2419200000
  ],
  "volatile_range": 0,
  "retention_range": 0,
  "engine_type": 2,
  "storage_type": 3,
  "override_kylin_properties": {
    "kylin.stream.cube.window": "3600",
    "kylin.stream.cube.duration": "3600",
    "kylin.stream.index.checkpoint.intervals": "300",
    "kylin.cube.algorithm": "INMEM",
    "kylin.stream.segment.retention.policy": "fullBuild",
    "kylin.stream.build.additional.cuboids": "true",
    "kylin.stream.event.timezone": "GMT+8"
  },
  "cuboid_black_list": [],
  "parent_forward": 3,
  "mandatory_dimension_set_list": [],
  "snapshot_table_desc_list": []
}

Enable streaming cube

Load data into lambda table

...

Code Block
languagesql
themeRDark
titleCreate flat table
linenumberstrue
collapsetrue
USE APACHE;

DROP TABLE IF EXISTS kylin_intermediate_useractionbasicanslysiscube_542a8a51_5a5c_d24a_19a2_a37a59fac050;
CREATE EXTERNAL TABLE IF NOT EXISTS kylin_intermediate_useractionbasicanslysiscube_542a8a51_5a5c_d24a_19a2_a37a59fac050
(
\`LAMBDA_FLAT_TABLE_USER_DEVIDE_TYPE\` string
,\`LAMBDA_FLAT_TABLE_LOCATION_CITY\` string
,\`LAMBDA_FLAT_TABLE_DEVICE_BRAND\` string
,\`LAMBDA_FLAT_TABLE_STR_MINUTE_SECOND\` string
,\`LAMBDA_FLAT_TABLE_PAGE_ID\` string
,\`LAMBDA_FLAT_TABLE_MONTH_START\` date
,\`LAMBDA_FLAT_TABLE_DAY_START\` date
,\`LAMBDA_FLAT_TABLE_HOUR_START\` timestamp
,\`LAMBDA_FLAT_TABLE_MINUTE_START\` timestamp
,\`LAMBDA_FLAT_TABLE_PAGEVIEW_ID\` string
,\`LAMBDA_FLAT_TABLE_PLAY_TIMES\` int
)
STORED AS SEQUENCEFILE
LOCATION 'hdfs://cdh-master:8020/APACHE/REALTIME_OLAP/REALTIME/kylin-f1b9f3ab-45f6-8ec8-f701-2306d24bc825/kylin_intermediate_useractionbasicanslysiscube_542a8a51_5a5c_d24a_19a2_a37a59fac050';
ALTER TABLE kylin_intermediate_useractionbasicanslysiscube_542a8a51_5a5c_d24a_19a2_a37a59fac050 SET TBLPROPERTIES('auto.purge'='true');
INSERT OVERWRITE TABLE \`kylin_intermediate_useractionbasicanslysiscube_542a8a51_5a5c_d24a_19a2_a37a59fac050\` SELECT
\`LAMBDA_FLAT_TABLE\`.\`USER_DEVIDE_TYPE\` as \`LAMBDA_FLAT_TABLE_USER_DEVIDE_TYPE\`
,\`LAMBDA_FLAT_TABLE\`.\`LOCATION_CITY\` as \`LAMBDA_FLAT_TABLE_LOCATION_CITY\`
,\`LAMBDA_FLAT_TABLE\`.\`DEVICE_BRAND\` as \`LAMBDA_FLAT_TABLE_DEVICE_BRAND\`
,\`LAMBDA_FLAT_TABLE\`.\`STR_MINUTE_SECOND\` as \`LAMBDA_FLAT_TABLE_STR_MINUTE_SECOND\`
,\`LAMBDA_FLAT_TABLE\`.\`PAGE_ID\` as \`LAMBDA_FLAT_TABLE_PAGE_ID\`
,\`LAMBDA_FLAT_TABLE\`.\`MONTH_START\` as \`LAMBDA_FLAT_TABLE_MONTH_START\`
,\`LAMBDA_FLAT_TABLE\`.\`DAY_START\` as \`LAMBDA_FLAT_TABLE_DAY_START\`
,\`LAMBDA_FLAT_TABLE\`.\`HOUR_START\` as \`LAMBDA_FLAT_TABLE_HOUR_START\`
,\`LAMBDA_FLAT_TABLE\`.\`MINUTE_START\` as \`LAMBDA_FLAT_TABLE_MINUTE_START\`
,\`LAMBDA_FLAT_TABLE\`.\`PAGEVIEW_ID\` as \`LAMBDA_FLAT_TABLE_PAGEVIEW_ID\`
,\`LAMBDA_FLAT_TABLE\`.\`PLAY_TIMES\` as \`LAMBDA_FLAT_TABLE_PLAY_TIMES\`
 FROM \`APACHE\`.\`LAMBDA_FLAT_TABLE\` as \`LAMBDA_FLAT_TABLE\`
WHERE 1=1 AND (\`LAMBDA_FLAT_TABLE\`.\`DAY_START\` >= '2020-09-23' AND \`LAMBDA_FLAT_TABLE\`.\`DAY_START\` < '2020-09-24')
;

 Verify query result

Code Block
languagesql
themeRDark
titleQuery
linenumberstrue
select hour_start, count(*) as event_num, count(distinct PAGEVIEW_ID) as pv_stat
from LAMBDA_FLAT_TABLE 
where day_start >= '2020-09-23' and day_start < '2020-09-24'
group by hour_start
order by hour_start;


select page_id, user_devide_type, count(*) as event_num, count(distinct PAGEVIEW_ID) as pv_stat
from LAMBDA_FLAT_TABLE
where day_start >= '2020-09-23' and day_start <= '2020-09-24'
group by page_id, user_devide_type
order by page_id, user_devide_type;

...