THIS IS A TEST INSTANCE. ALL YOUR CHANGES WILL BE LOST!!!!

Apache Kylin : Analytical Data Warehouse for Big Data

Page tree

Welcome to Kylin Wiki.


Before you start

How to enable Lambda Mode in Kylin 3.1

Overall steps

  1. Create hive table(historical table)
  2. Prepare script to mock sample event
  3. Start kafka producer
  4. Deploy Kylin Coordinator and Streaming Receiver
  5. Load streaming table
  6. Create model/cube into kylin
  7. Enable streaming cube
  8. Load data into historical table
  9. Refresh streaming table

Create lambda table

We know that segments which built from kafka may be incorrect for some reason, such as very late message. User may want a chance to update them.

Here we create a hive table to store "correct" data, so user will have a chance to clean and transform data from Kafka, and refresh segment from the hive table.

Please make sure that lambda_flat_table contains all the columns that you want to be included in your streaming cube.

LambdaTable DDL
CREATE EXTERNAL TABLE IF NOT EXISTS lambda_flat_table
(
-- event timestamp and debug purpose column
EVENT_TIME timestamp,
str_minute_second string COMMENT "For debug purpose, maybe check timezone etc",

-- dimension column
act_type string COMMENT "What did user interact with our mobile app in this event",
user_devide_type string COMMENT "Which kind of device did user use in this event",
location_city string COMMENT "Which city did user locate in this event",
video_id bigint COMMENT "Which video did user watch in this event",
device_brand string,
page_id string,

-- measure column
play_times bigint,
play_duration decimal(23, 10),
pageview_id string COMMENT "Identier of a pageview",


-- for kylin used (dimension)
MINUTE_START timestamp,
HOUR_START timestamp,
MONTH_START date
)
COMMENT 'Fact table. Store raw user action log.'
PARTITIONED BY (DAY_START date)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS TEXTFILE
LOCATION 'hdf
s:///LACUS/lambda_data/lambda_flat_table';

Prepare sample event script

Says that we have a mobile application which 

MockMessage
# -*- coding: utf-8 -*-
import sys
import json
import datetime
from datetime import timedelta
import random
import uuid
import time
import argparse

########################################################
### User Config

## Begin of UTC
BEGIN_DT = datetime.datetime(1970, 1, 1)

## Sleep Per ten events
SLEEP_PER_TEN = 0.03

## Lost some dimsension value?
LOST_SOME_DIMSENSION = False

ENABLE_HOUR_POWER = True

MAX_UID = 20000
MAX_VID = 11000
pv_id_set_hour = set()
pv_id_stat = dict()
pv_id_set = set()
########################################################
# For verify the correctness of COUNT DISTINCT
# For verify the correctness of COUNT DISTINCT
########################################################


########################################################
### Program
random.seed(datetime.datetime.now().second)

act_type_power = [
    ['start', 32],
    ['exit', 43],
    ['play', 224],
    ['stop', 156],
    ['pause', 23],
    ['click', 367],
    ['comment', 14],
    ['share', 22],
    ['like', 55],
    ['dislike', 70]
]

tmp = 0
for act_type_item in act_type_power:
    p = act_type_item[1]
    act_type_item[1] = tmp
    tmp += p
TOTAL_POWER = tmp

hour_power = {
    0: 1.75,
    1: 1.25,
    2: 0.71,
    3: 0.43,
    4: 0.31,
    5: 0.55,
    6: 0.61,
    7: 0.72,
    8: 0.96,
    9: 1.01,
    10: 1.12,
    11: 1.11,
    12: 1.31,
    13: 1.21,
    14: 1.51,
    15: 1.23,
    16: 1.02,
    17: 1.31,
    18: 1.41,
    19: 1.22,
    20: 1.42,
    21: 1.55,
    22: 1.34,
    23: 1.67
}

gmt_8 = 8 * 3600


def fake_curr_date_long(now):
    return int((now - BEGIN_DT).total_seconds()) * 1000


def fake_curr_date_str(now):
    return now.strftime('%Y-%m-%d %H:%M:%S.000+08:00')


def fake_act_type():
    ran = random.randint(0, TOTAL_POWER * 3) % TOTAL_POWER
    preact = act_type_power[0][0]
    for act in act_type_power:
        if ran < act[1]:
            return preact
        else:
            preact = act[0]
    return preact


def fake_active_minutes(row_no, now):
    now_hour = now.hour
    if row_no % 2 == 0:
        ran_num = random.randint(0, 3) + random.randint(0, 999) / 100.0
    else:
        ran_num = random.randint(4, 50) + random.randint(0, 999) / 100.0
    if ENABLE_HOUR_POWER:
        return round(ran_num * hour_power[now_hour], 4)
    else:
        return round(ran_num, 4)


def fake_uid(row_no, now):
    if row_no % 3 == 0:
        return random.randint(1, 10)
    else:
        return random.randint(10, MAX_UID) + now.hour * 100


def fake_video_id(now):
    tmp1 = random.randint(0, MAX_VID)
    if now.minute % 2 == 1:
        return tmp % 100 + now.hour
    else:
        return tmp1 + now.hour * 100


def fake_play_times(row_no, now):
    now_hour = now.hour
    if row_no % 2 == 0:
        pt = random.randint(0, 10)
    else:
        pt = random.randint(10, 55)

    if ENABLE_HOUR_POWER:
        return int(pt * hour_power[now_hour])
    else:
        return pt


def fake_pageview_id(str2, now):
    return str2 + str(now.minute)


city_list = ["shanghai", "beijing", "hangzhou", "shenzhen",
             "taibei", "hongkong", "guangzhou",
             "nanjing", "chongqin", "berlin", "tokyo"]
video_type = ["Sports", "Computer", "Science", "Game",
              "News", "Taste", "Tour", "Music",
              "Finance", "Arts", u"军事", u"中文", u"音乐", u"古装剧"]

brand_list = ["huawei", "iPhone", "xiaomi", "vivo", "360", "meizu"]

def fake_video_type(str1, now):
    tt = now.second % 3
    if tt == 0:
        return video_type[random.randint(0, 3)]
    elif tt == 1:
        return video_type[random.randint(0, len(video_type) - 1)]
    else:
        return str1 + str(now.hour)


def fake_string_in_list(row_no, str_list, top_n=3):
    if row_no % 2 == 0:
        _id = row_no % top_n
    else:
        _id = row_no % len(str_list)
    return str_list[_id]


def fake_event(row_no, long_str, short_str, now):
    """
    Fake single event
    """
    row = dict()

    ########################################################
    # Dimsension
    row['event_date'] = fake_curr_date_str(now)
    row['event_date_2'] = fake_curr_date_str(now)
    row['event_ts'] = fake_curr_date_long(now)
    row['event_ts_2'] = fake_curr_date_long(now)
    if not LOST_SOME_DIMSENSION or row_no % 10 >= 8:
        row['act_type'] = fake_act_type()
    row['video_type'] = fake_video_type(short_str[0:4], now)

    # Dimsension or Measure (UserView)
    row['uid'] = fake_uid(row_no, now)
    row['page_id'] = "page_" + str(fake_video_id(now) % 50)
    row['video_id'] = fake_video_id(now)
    row['device_brand'] = fake_string_in_list(row_no, brand_list)

    # multi level key-value
    row['user_detail'] = {"location": {"city": fake_string_in_list(row_no, city_list)},
                          "devide_type": now.minute,
                          "network_type": now.microsecond % 4 + 2}
    row['content_list'] = [str(fake_video_id(now)), str(fake_video_id(now)), str(fake_video_id(now))]

    ########################################################
    # Measure
    row['play_duration'] = fake_active_minutes(row_no, now)
    row['active_minutes'] = fake_active_minutes(row_no, now)
    row['play_times'] = fake_play_times(row_no, now)
    pvid = fake_pageview_id(long_str, now)
    pv_id_set.add(pvid)  # add stat of pv_id
    pv_id_set_hour.add(pvid)
    row['pageview_id'] = pvid

    # Fix string for verify count distinct(bitmap)
    row['str_minute'] = '%s' % now.minute
    row['str_second'] = '%s' % now.second
    row['str_minute_second'] = '%s_%s_%s' % (now.hour, now.minute, now.second)
    return json.dumps(row)


def fake_all_rows():
    total_row = 0
    init = False
    start = datetime.datetime.now()
    data_file = open('out.data', 'w+')
    while True:
        row_no = 0
        while row_no <= 10:
            time.sleep(SLEEP_PER_TEN)
            now = datetime.datetime.now()

            if now.hour == 1:
                print >> sys.stderr, "Sleep at " + now.strftime('%Y-%m-%d %H:%M:%S.000+08:00')
                time.sleep(4 * 3600)
                print >> sys.stderr, "End sleep at " + now.strftime('%Y-%m-%d %H:%M:%S.000+08:00')
            elif now.hour == 7:
                print >> sys.stderr, "Sleep at " + now.strftime('%Y-%m-%d %H:%M:%S.000+08:00')
                time.sleep(4 * 3600)
                print >> sys.stderr, "End sleep at " + now.strftime('%Y-%m-%d %H:%M:%S.000+08:00')

            unique_str = str(uuid.uuid3(uuid.NAMESPACE_URL, str(row_no + now.microsecond)))

            ####################################################
            # For verify the correctness of COUNT DISTINCT
            if not init:
                current_minute = -1
                current_hour = -1
                init = True
            if current_minute != now.minute:
                pv_id_stat['%s_%s' % (now.hour, current_minute)] = len(pv_id_set)
                print >> sys.stderr, "\nMETRICS,%s,%s,%s,%s" % (
                    current_hour + 8, current_minute, len(pv_id_set), str(total_row))
                current_minute = now.minute
                pv_id_set.clear()
            if current_hour != now.hour:
                print >> sys.stderr, "\nHOUR_METRICS,%s,%s" % (current_hour + 8, len(pv_id_set_hour))
                current_hour = now.hour
                pv_id_set_hour.clear()
            # For verify the correctness of COUNT DISTINCT
            ####################################################

            single_row = fake_event(row_no + 0, unique_str, unique_str + str(row_no), now)
            print single_row
            print >> data_file, single_row

            row_no += 1
            total_row += 1


def init_argument():
    parser = argparse.ArgumentParser()
    parser.add_argument('--max-uid', required=True, type=int)
    parser.add_argument('--max-vid', required=True, type=int)
    parser.add_argument('--enable-hour-power', required=True, type=bool)
    args = parser.parse_args()
    return args


if __name__ == "__main__":
    current_minute = -1
    fake_all_rows()

Start kafka producer

SendMsg
# bin/kafka-topics.sh --create --topic useraction --zookeeper cdh-master --partitions 10 --replication-factor 1
rm -rf out.data
python fake.py | kafka-console-producer --t
opic useraction_xxyu --broker-list cdh-master:9092,cdh-worker-1:9092,cdh-worker-2:9092


script
nohup sh SendMsg.sh > start.log &

Deploy Kylin 

kylin.stream.cube.duration=3600
kylin.stream.build.additional.cuboids=true
kylin.stream.metrics.option=console
kylin.stream.hive.database-for-lambda-cube=lambda_311
kylin.stream.event.timezone=GMT+8
kylin.stream.print-realtime-dict-enabled=true

Load kafka topic into Kylin

TableMeta
{
  "uuid" : "8ff3d826-195f-0b3d-00d7-9df8dadfae10",
  "last_modified" : 1600869884560,
  "version" : "3.0.0.20500",
  "name" : "LAMBDA_FLAT_TABLE",
  "columns" : [ {
    "id" : "0",
    "name" : "USER_DEVIDE_TYPE",
    "datatype" : "varchar(256)",
    "comment" : "user_detail|devide_type"
  }, {
    "id" : "1",
    "name" : "LOCATION_CITY",
    "datatype" : "varchar(256)",
    "comment" : "user_detail|location|city"
  }, {
    "id" : "2",
    "name" : "PLAY_TIMES",
    "datatype" : "integer",
    "comment" : "play_times"
  }, {
    "id" : "3",
    "name" : "PAGEVIEW_ID",
    "datatype" : "varchar(256)",
    "comment" : "pageview_id"
  }, {
    "id" : "4",
    "name" : "DEVICE_BRAND",
    "datatype" : "varchar(256)",
    "comment" : "device_brand"
  }, {
    "id" : "5",
    "name" : "STR_MINUTE_SECOND",
    "datatype" : "varchar(256)",
    "comment" : "str_minute_second"
  }, {
    "id" : "6",
    "name" : "EVENT_TIME",
    "datatype" : "timestamp",
    "comment" : "event_date"
  }, {
    "id" : "7",
    "name" : "PAGE_ID",
    "datatype" : "varchar(256)",
    "comment" : "page_id"
  }, {
    "id" : "8",
    "name" : "MONTH_START",
    "datatype" : "date"
  }, {
    "id" : "9",
    "name" : "DAY_START",
    "datatype" : "date"
  }, {
    "id" : "10",
    "name" : "HOUR_START",
    "datatype" : "timestamp"
  }, {
    "id" : "11",
    "name" : "MINUTE_START",
    "datatype" : "timestamp"
  } ],
  "source_type" : 21,
  "table_type" : null,
  "database" : "APACHE"
}

Create model/cube into kylin

ModeDesc
{
  "uuid": "0352eb6f-1249-952d-aabc-78399493f1e4",
  "last_modified": 1600869976000,
  "version": "3.0.0.20500",
  "name": "LambdaVerifyModel",
  "owner": "ADMIN",
  "is_draft": false,
  "description": "我就看看 Lambda 究竟能不能用",
  "fact_table": "APACHE.LAMBDA_FLAT_TABLE",
  "fact_table_alias": "LAMBDA_FLAT_TABLE",
  "lookups": [],
  "dimensions": [
    {
      "table": "LAMBDA_FLAT_TABLE",
      "columns": [
        "USER_DEVIDE_TYPE",
        "LOCATION_CITY",
        "DEVICE_BRAND",
        "STR_MINUTE_SECOND",
        "EVENT_TIME",
        "MONTH_START",
        "DAY_START",
        "HOUR_START",
        "MINUTE_START",
        "PAGE_ID"
      ]
    }
  ],
  "metrics": [
    "LAMBDA_FLAT_TABLE.PLAY_TIMES",
    "LAMBDA_FLAT_TABLE.PAGEVIEW_ID"
  ],
  "filter_condition": "",
  "partition_desc": {
    "partition_date_column": "LAMBDA_FLAT_TABLE.DAY_START",
    "partition_time_column": null,
    "partition_date_start": 0,
    "partition_date_format": "yyyy-MM-dd",
    "partition_time_format": "HH:mm:ss",
    "partition_type": "APPEND",
    "partition_condition_builder": "org.apache.kylin.metadata.model.PartitionDesc$DefaultPartitionConditionBuilder"
  },
  "capacity": "MEDIUM",
  "projectName": "LambdaPrj"
}

{
  "uuid": "2e9eaf25-31e0-16e1-42fc-32d83c05255d",
  "last_modified": 1600870179000,
  "version": "3.0.0.20500",
  "name": "UserActionBasicAnslysisCube",
  "is_draft": false,
  "model_name": "LambdaVerifyModel",
  "description": "",
  "null_string": null,
  "dimensions": [
    {
      "name": "USER_DEVIDE_TYPE",
      "table": "LAMBDA_FLAT_TABLE",
      "column": "USER_DEVIDE_TYPE",
      "derived": null
    },
    {
      "name": "LOCATION_CITY",
      "table": "LAMBDA_FLAT_TABLE",
      "column": "LOCATION_CITY",
      "derived": null
    },
    {
      "name": "DEVICE_BRAND",
      "table": "LAMBDA_FLAT_TABLE",
      "column": "DEVICE_BRAND",
      "derived": null
    },
    {
      "name": "STR_MINUTE_SECOND",
      "table": "LAMBDA_FLAT_TABLE",
      "column": "STR_MINUTE_SECOND",
      "derived": null
    },
    {
      "name": "PAGE_ID",
      "table": "LAMBDA_FLAT_TABLE",
      "column": "PAGE_ID",
      "derived": null
    },
    {
      "name": "MONTH_START",
      "table": "LAMBDA_FLAT_TABLE",
      "column": "MONTH_START",
      "derived": null
    },
    {
      "name": "DAY_START",
      "table": "LAMBDA_FLAT_TABLE",
      "column": "DAY_START",
      "derived": null
    },
    {
      "name": "HOUR_START",
      "table": "LAMBDA_FLAT_TABLE",
      "column": "HOUR_START",
      "derived": null
    },
    {
      "name": "MINUTE_START",
      "table": "LAMBDA_FLAT_TABLE",
      "column": "MINUTE_START",
      "derived": null
    }
  ],
  "measures": [
    {
      "name": "_COUNT_",
      "function": {
        "expression": "COUNT",
        "parameter": {
          "type": "constant",
          "value": "1"
        },
        "returntype": "bigint"
      }
    },
    {
      "name": "PV_STAT",
      "function": {
        "expression": "COUNT_DISTINCT",
        "parameter": {
          "type": "column",
          "value": "LAMBDA_FLAT_TABLE.PAGEVIEW_ID"
        },
        "returntype": "hllc(16)"
      }
    },
    {
      "name": "PLAY_STAT",
      "function": {
        "expression": "SUM",
        "parameter": {
          "type": "column",
          "value": "LAMBDA_FLAT_TABLE.PLAY_TIMES"
        },
        "returntype": "bigint"
      }
    }
  ],
  "dictionaries": [],
  "rowkey": {
    "rowkey_columns": [
      {
        "column": "LAMBDA_FLAT_TABLE.USER_DEVIDE_TYPE",
        "encoding": "dict",
        "encoding_version": 1,
        "isShardBy": false
      },
      {
        "column": "LAMBDA_FLAT_TABLE.LOCATION_CITY",
        "encoding": "dict",
        "encoding_version": 1,
        "isShardBy": false
      },
      {
        "column": "LAMBDA_FLAT_TABLE.DEVICE_BRAND",
        "encoding": "dict",
        "encoding_version": 1,
        "isShardBy": false
      },
      {
        "column": "LAMBDA_FLAT_TABLE.STR_MINUTE_SECOND",
        "encoding": "dict",
        "encoding_version": 1,
        "isShardBy": false
      },
      {
        "column": "LAMBDA_FLAT_TABLE.PAGE_ID",
        "encoding": "dict",
        "encoding_version": 1,
        "isShardBy": false
      },
      {
        "column": "LAMBDA_FLAT_TABLE.MONTH_START",
        "encoding": "date",
        "encoding_version": 1,
        "isShardBy": false
      },
      {
        "column": "LAMBDA_FLAT_TABLE.DAY_START",
        "encoding": "date",
        "encoding_version": 1,
        "isShardBy": false
      },
      {
        "column": "LAMBDA_FLAT_TABLE.HOUR_START",
        "encoding": "time",
        "encoding_version": 1,
        "isShardBy": false
      },
      {
        "column": "LAMBDA_FLAT_TABLE.MINUTE_START",
        "encoding": "time",
        "encoding_version": 1,
        "isShardBy": false
      }
    ]
  },
  "hbase_mapping": {
    "column_family": [
      {
        "name": "F1",
        "columns": [
          {
            "qualifier": "M",
            "measure_refs": [
              "_COUNT_",
              "PLAY_STAT"
            ]
          }
        ]
      },
      {
        "name": "F2",
        "columns": [
          {
            "qualifier": "M",
            "measure_refs": [
              "PV_STAT"
            ]
          }
        ]
      }
    ]
  },
  "aggregation_groups": [
    {
      "includes": [
        "LAMBDA_FLAT_TABLE.USER_DEVIDE_TYPE",
        "LAMBDA_FLAT_TABLE.LOCATION_CITY",
        "LAMBDA_FLAT_TABLE.DEVICE_BRAND",
        "LAMBDA_FLAT_TABLE.STR_MINUTE_SECOND",
        "LAMBDA_FLAT_TABLE.PAGE_ID",
        "LAMBDA_FLAT_TABLE.MONTH_START",
        "LAMBDA_FLAT_TABLE.DAY_START",
        "LAMBDA_FLAT_TABLE.HOUR_START",
        "LAMBDA_FLAT_TABLE.MINUTE_START"
      ],
      "select_rule": {
        "hierarchy_dims": [],
        "mandatory_dims": [
          "LAMBDA_FLAT_TABLE.USER_DEVIDE_TYPE",
          "LAMBDA_FLAT_TABLE.STR_MINUTE_SECOND",
          "LAMBDA_FLAT_TABLE.DEVICE_BRAND"
        ],
        "joint_dims": []
      }
    }
  ],
  "signature": "z2i5rv8LtjgoSByK5a/Y9w==",
  "notify_list": [],
  "status_need_notify": [
    "ERROR",
    "DISCARDED",
    "SUCCEED"
  ],
  "partition_date_start": 0,
  "partition_date_end": 3153600000000,
  "auto_merge_time_ranges": [
    604800000,
    2419200000
  ],
  "volatile_range": 0,
  "retention_range": 0,
  "engine_type": 2,
  "storage_type": 3,
  "override_kylin_properties": {
    "kylin.stream.cube.window": "3600",
    "kylin.stream.cube.duration": "3600",
    "kylin.stream.index.checkpoint.intervals": "300",
    "kylin.cube.algorithm": "INMEM",
    "kylin.stream.segment.retention.policy": "fullBuild",
    "kylin.stream.build.additional.cuboids": "true",
    "kylin.stream.event.timezone": "GMT+8"
  },
  "cuboid_black_list": [],
  "parent_forward": 3,
  "mandatory_dimension_set_list": [],
  "snapshot_table_desc_list": []
}

Enable streaming cube


Load data into lambda table

data
2020-09-22 16:07:35,EMPTY,play,3,Shanghai,100010001,vivo,page_001,223,33.2222,3c84cf9d-b8fb-3dec-8b8c-f510c4b6fd097,2020-09-22 16:07:00,2020-09-22 16:00:00,2020-09-01
2020-09-22 16:07:35,EMPTY,stop,4,London,100010002,apple,page_002,224,24.2342,9311744c-3746-3502-84c9-d06e8b5ea2d6,2020-09-22 16:07:00,2020-09-22 16:00:00,2020-09-01
2020-09-22 17:07:35,EMPTY,play,3,Shanghai,100010001,vivo,page_001,225,33.2222,3c84cf9d-b8fb-3dec-8b8c-f510c4b6fd097,2020-09-22 17:07:00,2020-09-22 17:00:00,2020-09-01
2020-09-22 17:07:35,EMPTY,stop,4,London,100010002,apple,page_002,226,24.2342,9311744c-3746-3502-84c9-d06e8b5ea2d6,2020-09-22 17:07:00,2020-09-22 17:00:00,2020-09-01
2020-09-22 18:07:35,EMPTY,play,3,Shanghai,100010001,vivo,page_001,227,33.2222,3c84cf9d-b8fb-3dec-8b8c-f510c4b6fd097,2020-09-22 18:07:00,2020-09-22 18:00:00,2020-09-01
2020-09-22 18:07:35,EMPTY,stop,4,London,100010002,apple,page_002,228,24.2342,9311744c-3746-3502-84c9-d06e8b5ea2d6,2020-09-22 18:07:00,2020-09-22 18:00:00,2020-09-01
2020-09-22 19:07:35,EMPTY,play,3,Shanghai,100010001,vivo,page_001,229,33.2222,3c84cf9d-b8fb-3dec-8b8c-f510c4b6fd097,2020-09-22 19:07:00,2020-09-22 19:00:00,2020-09-01
2020-09-22 19:07:35,EMPTY,stop,4,London,100010002,apple,page_002,230,24.2342,9311744c-3746-3502-84c9-d06e8b5ea2d6,2020-09-22 19:07:00,2020-09-22 19:00:00,2020-09-01
2020-09-22 20:07:35,EMPTY,play,3,Shanghai,100010001,vivo,page_001,231,33.2222,3c84cf9d-b8fb-3dec-8b8c-f510c4b6fd097,2020-09-22 20:07:00,2020-09-22 20:00:00,2020-09-01
2020-09-22 20:07:35,EMPTY,stop,4,London,100010002,apple,page_002,232,24.2342,9311744c-3746-3502-84c9-d06e8b5ea2d6,2020-09-22 20:07:00,2020-09-22 20:00:00,2020-09-01
2020-09-22 21:07:35,EMPTY,play,3,Shanghai,100010001,vivo,page_001,233,33.2222,3c84cf9d-b8fb-3dec-8b8c-f510c4b6fd097,2020-09-22 21:07:00,2020-09-22 21:00:00,2020-09-01
2020-09-22 22:07:35,EMPTY,stop,4,London,100010002,apple,page_002,234,24.2342,9311744c-3746-3502-84c9-d06e8b5ea2d6,2020-09-22 22:07:00,2020-09-22 22:00:00,2020-09-01
2020-09-22 23:07:35,EMPTY,play,3,Shanghai,100010001,huawei,page_001,235,33.2222,3c84cf9d-b8fb-3dec-8b8c-f510c4b6fd097,2020-09-22 23:07:00,2020-09-22 23:00:00,2020-09-01
2020-09-23 00:07:35,EMPTY,pause,5,Tianjin,100010003,huawei,page_003,235,33.2222,e80160bc-f25a-3566-bf9b-a16e91ef6ee4,2020-09-23 00:07:00,2020-09-23 00:00:00,2020-09-01
2020-09-23 02:07:35,EMPTY,pause,5,Tokyo,100010004,huawei,page_004,235,33.2222,3c84cf9d-b8fb-3dec-8b8c-f510c4b6fd097,2020-09-23 02:07:00,2020-09-23 02:00:00,2020-09-01
2020-09-23 05:07:35,EMPTY,pause,5,Beijing,100010005,huawei,page_005,235,33.2222,e80160bc-f25a-3566-bf9b-a16e91ef6ee4,2020-09-23 05:07:00,2020-09-23 05:00:00,2020-09-01
2020-09-23 07:07:35,EMPTY,pause,5,Wuhan,100010006,huawei,page_006,235,33.2222,3c84cf9d-b8fb-3dec-8b8c-f510c4b6fd097,2020-09-23 07:07:00,2020-09-23 07:00:00,2020-09-01
2020-09-23 10:07:35,EMPTY,pause,5,Tianjin,100010007,huawei,page_007,235,33.2222,e80160bc-f25a-3566-bf9b-a16e91ef6ee4,2020-09-23 10:07:00,2020-09-23 10:00:00,2020-09-01
2020-09-23 12:07:35,EMPTY,pause,5,Tokyo,100010008,huawei,page_008,235,33.2222,3c84cf9d-b8fb-3dec-8b8c-f510c4b6fd097,2020-09-23 12:07:00,2020-09-23 12:00:00,2020-09-01
2020-09-23 15:07:35,EMPTY,pause,5,Beijing,100010009,huawei,page_009,235,33.2222,e80160bc-f25a-3566-bf9b-a16e91ef6ee4,2020-09-23 15:07:00,2020-09-23 15:00:00,2020-09-01



hadoop fs -put partition.csv hdfs:///LACUS/lambda_data/lambda_flat_table/day_start=2020-09-23




Insert Partition
use lacus_0923;
ALTER TABLE lambda_flat_table ADD PARTITION(DAY_START='2020-09-23') location 'hdfs:///LACUS/lambda_data/lambda_flat_table/day_start=2020-09-23';
select * from lambda_flat_table limit 5;

Check data
hive>
    > select hour_start, count(*) as event_num, count(distinct PAGEVIEW_ID) as pv_stat
    > from LAMBDA_FLAT_TABLE
    > where day_start >= '2020-09-23' and day_start < '2020-09-24'
    > group by hour_start
    > order by hour_start;
Query ID = root_20200928185454_2421456a-49d6-4f92-aac6-13099428b05d
Total jobs = 2
Launching Job 1 out of 2
Number of reduce tasks not specified. Estimated from input data size: 1
In order to change the average load for a reducer (in bytes):
  set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
  set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
  set mapreduce.job.reduces=<number>
Starting Job = job_1589169585068_18951, Tracking URL = http://cdh-master:8088/proxy/application_1589169585068_18951/
Kill Command = /opt/cloudera/parcels/CDH-5.7.6-1.cdh5.7.6.p0.6/lib/hadoop/bin/hadoop job  -kill job_1589169585068_18951
Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 1
2020-09-28 18:54:06,077 Stage-1 map = 0%,  reduce = 0%
2020-09-28 18:54:12,248 Stage-1 map = 100%,  reduce = 0%, Cumulative CPU 1.38 sec
2020-09-28 18:54:18,411 Stage-1 map = 100%,  reduce = 100%, Cumulative CPU 3.93 sec
MapReduce Total cumulative CPU time: 3 seconds 930 msec
Ended Job = job_1589169585068_18951
Launching Job 2 out of 2
Number of reduce tasks determined at compile time: 1
In order to change the average load for a reducer (in bytes):
  set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
  set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
  set mapreduce.job.reduces=<number>
Starting Job = job_1589169585068_18954, Tracking URL = http://cdh-master:8088/proxy/application_1589169585068_18954/
Kill Command = /opt/cloudera/parcels/CDH-5.7.6-1.cdh5.7.6.p0.6/lib/hadoop/bin/hadoop job  -kill job_1589169585068_18954
Hadoop job information for Stage-2: number of mappers: 1; number of reducers: 1
2020-09-28 18:54:39,111 Stage-2 map = 0%,  reduce = 0%
2020-09-28 18:54:49,388 Stage-2 map = 100%,  reduce = 0%, Cumulative CPU 1.28 sec
2020-09-28 18:54:56,570 Stage-2 map = 100%,  reduce = 100%, Cumulative CPU 3.89 sec
MapReduce Total cumulative CPU time: 3 seconds 890 msec
Ended Job = job_1589169585068_18954
MapReduce Jobs Launched:
Stage-Stage-1: Map: 1  Reduce: 1   Cumulative CPU: 3.93 sec   HDFS Read: 12528 HDFS Write: 441 SUCCESS
Stage-Stage-2: Map: 1  Reduce: 1   Cumulative CPU: 3.89 sec   HDFS Read: 5352 HDFS Write: 360 SUCCESS
Total MapReduce CPU Time Spent: 7 seconds 820 msec
OK
2020-09-22 16:00:00	2	2
2020-09-22 17:00:00	2	2
2020-09-22 18:00:00	2	2
2020-09-22 19:00:00	2	2
2020-09-22 20:00:00	2	2
2020-09-22 21:00:00	1	1
2020-09-22 22:00:00	1	1
2020-09-22 23:00:00	1	1
2020-09-23 00:00:00	1	1
2020-09-23 02:00:00	1	1
2020-09-23 05:00:00	1	1
2020-09-23 07:00:00	1	1
2020-09-23 10:00:00	1	1
2020-09-23 12:00:00	1	1
2020-09-23 15:00:00	1	1
Time taken: 60.126 seconds, Fetched: 15 row(s)

Refresh streaming table

Send request to refresh segment

http://cdh-master:7200/kylin/api/cubes/UserActionBasicAnslysisCube/rebuild

Build API
{  
   "startTime":160081
9200000, 
   "endTime":  1600905600000, 
   "buildType":"BUILD"
}


Build Resopnse
{
    "uuid": "f1b9f3ab-45f6-8ec8-f701-2306d24bc825",
    "last_modified": 1601288320263,
    "version": "3.0.0.20500",
    "name": "BUILD CUBE - UserActionBasicAnslysisCube - 20200923000000_20200924000000 - GMT+08:00 2020-09-28 18:18:40",
    "projectName": "LambdaPrj",
    "type": "BUILD",
    "duration": 0,
    "related_cube": "UserActionBasicAnslysisCube",
    "display_cube_name": "UserActionBasicAnslysisCube",
    "related_segment": "542a8a51-5a5c-d24a-19a2-a37a59fac050",
    "related_segment_name": "20200923000000_20200924000000",
    "exec_start_time": 0,
    "exec_end_time": 0,
    "exec_interrupt_time": 0,
    "mr_waiting": 0,
    "steps": [
        {
            "interruptCmd": null,
            "id": "f1b9f3ab-45f6-8ec8-f701-2306d24bc825-00",
            "name": "Create Intermediate Flat Hive Table",
            "sequence_id": 0,
            "exec_cmd": null,
            "interrupt_cmd": null,
            "exec_start_time": 0,
            "exec_end_time": 0,
            "exec_wait_time": 0,
            "step_status": "PENDING",
            "cmd_type": "SHELL_CMD_HADOOP",
            "info": {},
            "run_async": false
        },
        {
            "interruptCmd": null,
            "id": "f1b9f3ab-45f6-8ec8-f701-2306d24bc825-01",
            "name": "Redistribute Flat Hive Table",
            "sequence_id": 1,
            "exec_cmd": null,
            "interrupt_cmd": null,
            "exec_start_time": 0,
            "exec_end_time": 0,
            "exec_wait_time": 0,
            "step_status": "PENDING",
            "cmd_type": "SHELL_CMD_HADOOP",
            "info": {},
            "run_async": false
        },
        {
            "interruptCmd": null,
            "id": "f1b9f3ab-45f6-8ec8-f701-2306d24bc825-02",
            "name": "Extract Fact Table Distinct Columns",
            "sequence_id": 2,
            "exec_cmd": " -conf /root/xiaoxiang.yu/tmp/apache-kylin-3.1.1-SNAPSHOT-bin-0928/conf/kylin_job_conf.xml -cubename UserActionBasicAnslysisCube -output hdfs://cdh-master:8020/APACHE/REALTIME_OLAP/REALTIME/kylin-f1b9f3ab-45f6-8ec8-f701-2306d24bc825/UserActionBasicAnslysisCube/fact_distinct_columns -segmentid 542a8a51-5a5c-d24a-19a2-a37a59fac050 -statisticsoutput hdfs://cdh-master:8020/APACHE/REALTIME_OLAP/REALTIME/kylin-f1b9f3ab-45f6-8ec8-f701-2306d24bc825/UserActionBasicAnslysisCube/fact_distinct_columns/statistics -statisticssamplingpercent 100 -jobname Kylin_Fact_Distinct_Columns_UserActionBasicAnslysisCube_Step -cubingJobId f1b9f3ab-45f6-8ec8-f701-2306d24bc825",
            "interrupt_cmd": null,
            "exec_start_time": 0,
            "exec_end_time": 0,
            "exec_wait_time": 0,
            "step_status": "PENDING",
            "cmd_type": "SHELL_CMD_HADOOP",
            "info": {},
            "run_async": false
        },
        {
            "interruptCmd": null,
            "id": "f1b9f3ab-45f6-8ec8-f701-2306d24bc825-03",
            "name": "Build Dimension Dictionary",
            "sequence_id": 3,
            "exec_cmd": " -cubename UserActionBasicAnslysisCube -segmentid 542a8a51-5a5c-d24a-19a2-a37a59fac050 -input hdfs://cdh-master:8020/APACHE/REALTIME_OLAP/REALTIME/kylin-f1b9f3ab-45f6-8ec8-f701-2306d24bc825/UserActionBasicAnslysisCube/fact_distinct_columns -dictPath hdfs://cdh-master:8020/APACHE/REALTIME_OLAP/REALTIME/kylin-f1b9f3ab-45f6-8ec8-f701-2306d24bc825/UserActionBasicAnslysisCube/dict -cubingJobId f1b9f3ab-45f6-8ec8-f701-2306d24bc825",
            "interrupt_cmd": null,
            "exec_start_time": 0,
            "exec_end_time": 0,
            "exec_wait_time": 0,
            "step_status": "PENDING",
            "cmd_type": "SHELL_CMD_HADOOP",
            "info": {},
            "run_async": false
        },
        {
            "interruptCmd": null,
            "id": "f1b9f3ab-45f6-8ec8-f701-2306d24bc825-04",
            "name": "Save Cuboid Statistics",
            "sequence_id": 4,
            "exec_cmd": null,
            "interrupt_cmd": null,
            "exec_start_time": 0,
            "exec_end_time": 0,
            "exec_wait_time": 0,
            "step_status": "PENDING",
            "cmd_type": "SHELL_CMD_HADOOP",
            "info": {},
            "run_async": false
        },
        {
            "interruptCmd": null,
            "id": "f1b9f3ab-45f6-8ec8-f701-2306d24bc825-05",
            "name": "Create HTable",
            "sequence_id": 5,
            "exec_cmd": " -cubename UserActionBasicAnslysisCube -segmentid 542a8a51-5a5c-d24a-19a2-a37a59fac050 -partitions hdfs://cdh-master:8020/APACHE/REALTIME_OLAP/REALTIME/kylin-f1b9f3ab-45f6-8ec8-f701-2306d24bc825/UserActionBasicAnslysisCube/rowkey_stats/part-r-00000 -cuboidMode CURRENT -hbaseConfPath hdfs://cdh-master:8020/APACHE/REALTIME_OLAP/REALTIME/kylin-f1b9f3ab-45f6-8ec8-f701-2306d24bc825/hbase-conf.xml",
            "interrupt_cmd": null,
            "exec_start_time": 0,
            "exec_end_time": 0,
            "exec_wait_time": 0,
            "step_status": "PENDING",
            "cmd_type": "SHELL_CMD_HADOOP",
            "info": {},
            "run_async": false
        },
        {
            "interruptCmd": null,
            "id": "f1b9f3ab-45f6-8ec8-f701-2306d24bc825-06",
            "name": "Build Base Cuboid",
            "sequence_id": 6,
            "exec_cmd": " -conf /root/xiaoxiang.yu/tmp/apache-kylin-3.1.1-SNAPSHOT-bin-0928/conf/kylin_job_conf.xml -cubename UserActionBasicAnslysisCube -segmentid 542a8a51-5a5c-d24a-19a2-a37a59fac050 -input FLAT_TABLE -output hdfs://cdh-master:8020/APACHE/REALTIME_OLAP/REALTIME/kylin-f1b9f3ab-45f6-8ec8-f701-2306d24bc825/UserActionBasicAnslysisCube/cuboid/level_base_cuboid -jobname Kylin_Base_Cuboid_Builder_UserActionBasicAnslysisCube -level 0 -cubingJobId f1b9f3ab-45f6-8ec8-f701-2306d24bc825",
            "interrupt_cmd": null,
            "exec_start_time": 0,
            "exec_end_time": 0,
            "exec_wait_time": 0,
            "step_status": "PENDING",
            "cmd_type": "SHELL_CMD_HADOOP",
            "info": {},
            "run_async": false
        },
        {
            "interruptCmd": null,
            "id": "f1b9f3ab-45f6-8ec8-f701-2306d24bc825-07",
            "name": "Build N-Dimension Cuboid : level 1",
            "sequence_id": 7,
            "exec_cmd": " -conf /root/xiaoxiang.yu/tmp/apache-kylin-3.1.1-SNAPSHOT-bin-0928/conf/kylin_job_conf.xml -cubename UserActionBasicAnslysisCube -segmentid 542a8a51-5a5c-d24a-19a2-a37a59fac050 -input hdfs://cdh-master:8020/APACHE/REALTIME_OLAP/REALTIME/kylin-f1b9f3ab-45f6-8ec8-f701-2306d24bc825/UserActionBasicAnslysisCube/cuboid/level_base_cuboid -output hdfs://cdh-master:8020/APACHE/REALTIME_OLAP/REALTIME/kylin-f1b9f3ab-45f6-8ec8-f701-2306d24bc825/UserActionBasicAnslysisCube/cuboid/level_1_cuboid -jobname Kylin_ND-Cuboid_Builder_UserActionBasicAnslysisCube_Step -level 1 -cubingJobId f1b9f3ab-45f6-8ec8-f701-2306d24bc825",
            "interrupt_cmd": null,
            "exec_start_time": 0,
            "exec_end_time": 0,
            "exec_wait_time": 0,
            "step_status": "PENDING",
            "cmd_type": "SHELL_CMD_HADOOP",
            "info": {},
            "run_async": false
        },
        {
            "interruptCmd": null,
            "id": "f1b9f3ab-45f6-8ec8-f701-2306d24bc825-08",
            "name": "Build N-Dimension Cuboid : level 2",
            "sequence_id": 8,
            "exec_cmd": " -conf /root/xiaoxiang.yu/tmp/apache-kylin-3.1.1-SNAPSHOT-bin-0928/conf/kylin_job_conf.xml -cubename UserActionBasicAnslysisCube -segmentid 542a8a51-5a5c-d24a-19a2-a37a59fac050 -input hdfs://cdh-master:8020/APACHE/REALTIME_OLAP/REALTIME/kylin-f1b9f3ab-45f6-8ec8-f701-2306d24bc825/UserActionBasicAnslysisCube/cuboid/level_1_cuboid -output hdfs://cdh-master:8020/APACHE/REALTIME_OLAP/REALTIME/kylin-f1b9f3ab-45f6-8ec8-f701-2306d24bc825/UserActionBasicAnslysisCube/cuboid/level_2_cuboid -jobname Kylin_ND-Cuboid_Builder_UserActionBasicAnslysisCube_Step -level 2 -cubingJobId f1b9f3ab-45f6-8ec8-f701-2306d24bc825",
            "interrupt_cmd": null,
            "exec_start_time": 0,
            "exec_end_time": 0,
            "exec_wait_time": 0,
            "step_status": "PENDING",
            "cmd_type": "SHELL_CMD_HADOOP",
            "info": {},
            "run_async": false
        },
        {
            "interruptCmd": null,
            "id": "f1b9f3ab-45f6-8ec8-f701-2306d24bc825-09",
            "name": "Build N-Dimension Cuboid : level 3",
            "sequence_id": 9,
            "exec_cmd": " -conf /root/xiaoxiang.yu/tmp/apache-kylin-3.1.1-SNAPSHOT-bin-0928/conf/kylin_job_conf.xml -cubename UserActionBasicAnslysisCube -segmentid 542a8a51-5a5c-d24a-19a2-a37a59fac050 -input hdfs://cdh-master:8020/APACHE/REALTIME_OLAP/REALTIME/kylin-f1b9f3ab-45f6-8ec8-f701-2306d24bc825/UserActionBasicAnslysisCube/cuboid/level_2_cuboid -output hdfs://cdh-master:8020/APACHE/REALTIME_OLAP/REALTIME/kylin-f1b9f3ab-45f6-8ec8-f701-2306d24bc825/UserActionBasicAnslysisCube/cuboid/level_3_cuboid -jobname Kylin_ND-Cuboid_Builder_UserActionBasicAnslysisCube_Step -level 3 -cubingJobId f1b9f3ab-45f6-8ec8-f701-2306d24bc825",
            "interrupt_cmd": null,
            "exec_start_time": 0,
            "exec_end_time": 0,
            "exec_wait_time": 0,
            "step_status": "PENDING",
            "cmd_type": "SHELL_CMD_HADOOP",
            "info": {},
            "run_async": false
        },
        {
            "interruptCmd": null,
            "id": "f1b9f3ab-45f6-8ec8-f701-2306d24bc825-10",
            "name": "Build N-Dimension Cuboid : level 4",
            "sequence_id": 10,
            "exec_cmd": " -conf /root/xiaoxiang.yu/tmp/apache-kylin-3.1.1-SNAPSHOT-bin-0928/conf/kylin_job_conf.xml -cubename UserActionBasicAnslysisCube -segmentid 542a8a51-5a5c-d24a-19a2-a37a59fac050 -input hdfs://cdh-master:8020/APACHE/REALTIME_OLAP/REALTIME/kylin-f1b9f3ab-45f6-8ec8-f701-2306d24bc825/UserActionBasicAnslysisCube/cuboid/level_3_cuboid -output hdfs://cdh-master:8020/APACHE/REALTIME_OLAP/REALTIME/kylin-f1b9f3ab-45f6-8ec8-f701-2306d24bc825/UserActionBasicAnslysisCube/cuboid/level_4_cuboid -jobname Kylin_ND-Cuboid_Builder_UserActionBasicAnslysisCube_Step -level 4 -cubingJobId f1b9f3ab-45f6-8ec8-f701-2306d24bc825",
            "interrupt_cmd": null,
            "exec_start_time": 0,
            "exec_end_time": 0,
            "exec_wait_time": 0,
            "step_status": "PENDING",
            "cmd_type": "SHELL_CMD_HADOOP",
            "info": {},
            "run_async": false
        },
        {
            "interruptCmd": null,
            "id": "f1b9f3ab-45f6-8ec8-f701-2306d24bc825-11",
            "name": "Build N-Dimension Cuboid : level 5",
            "sequence_id": 11,
            "exec_cmd": " -conf /root/xiaoxiang.yu/tmp/apache-kylin-3.1.1-SNAPSHOT-bin-0928/conf/kylin_job_conf.xml -cubename UserActionBasicAnslysisCube -segmentid 542a8a51-5a5c-d24a-19a2-a37a59fac050 -input hdfs://cdh-master:8020/APACHE/REALTIME_OLAP/REALTIME/kylin-f1b9f3ab-45f6-8ec8-f701-2306d24bc825/UserActionBasicAnslysisCube/cuboid/level_4_cuboid -output hdfs://cdh-master:8020/APACHE/REALTIME_OLAP/REALTIME/kylin-f1b9f3ab-45f6-8ec8-f701-2306d24bc825/UserActionBasicAnslysisCube/cuboid/level_5_cuboid -jobname Kylin_ND-Cuboid_Builder_UserActionBasicAnslysisCube_Step -level 5 -cubingJobId f1b9f3ab-45f6-8ec8-f701-2306d24bc825",
            "interrupt_cmd": null,
            "exec_start_time": 0,
            "exec_end_time": 0,
            "exec_wait_time": 0,
            "step_status": "PENDING",
            "cmd_type": "SHELL_CMD_HADOOP",
            "info": {},
            "run_async": false
        },
        {
            "interruptCmd": null,
            "id": "f1b9f3ab-45f6-8ec8-f701-2306d24bc825-12",
            "name": "Build Cube In-Mem",
            "sequence_id": 12,
            "exec_cmd": " -conf /root/xiaoxiang.yu/tmp/apache-kylin-3.1.1-SNAPSHOT-bin-0928/conf/kylin_job_conf_inmem.xml -cubename UserActionBasicAnslysisCube -segmentid 542a8a51-5a5c-d24a-19a2-a37a59fac050 -output hdfs://cdh-master:8020/APACHE/REALTIME_OLAP/REALTIME/kylin-f1b9f3ab-45f6-8ec8-f701-2306d24bc825/UserActionBasicAnslysisCube/cuboid/ -jobname Kylin_Cube_Builder_UserActionBasicAnslysisCube -cubingJobId f1b9f3ab-45f6-8ec8-f701-2306d24bc825",
            "interrupt_cmd": null,
            "exec_start_time": 0,
            "exec_end_time": 0,
            "exec_wait_time": 0,
            "step_status": "PENDING",
            "cmd_type": "SHELL_CMD_HADOOP",
            "info": {},
            "run_async": false
        },
        {
            "interruptCmd": null,
            "id": "f1b9f3ab-45f6-8ec8-f701-2306d24bc825-13",
            "name": "Convert Cuboid Data to HFile",
            "sequence_id": 13,
            "exec_cmd": " -conf /root/xiaoxiang.yu/tmp/apache-kylin-3.1.1-SNAPSHOT-bin-0928/conf/kylin_job_conf.xml -cubename UserActionBasicAnslysisCube -partitions hdfs://cdh-master:8020/APACHE/REALTIME_OLAP/REALTIME/kylin-f1b9f3ab-45f6-8ec8-f701-2306d24bc825/UserActionBasicAnslysisCube/rowkey_stats/part-r-00000_hfile -input hdfs://cdh-master:8020/APACHE/REALTIME_OLAP/REALTIME/kylin-f1b9f3ab-45f6-8ec8-f701-2306d24bc825/UserActionBasicAnslysisCube/cuboid/* -output hdfs://cdh-master:8020/APACHE/REALTIME_OLAP/REALTIME/kylin-f1b9f3ab-45f6-8ec8-f701-2306d24bc825/UserActionBasicAnslysisCube/hfile -htablename APACHE:REALTIME_OLAP_ZPIAP5RO7L -jobname Kylin_HFile_Generator_UserActionBasicAnslysisCube_Step",
            "interrupt_cmd": null,
            "exec_start_time": 0,
            "exec_end_time": 0,
            "exec_wait_time": 0,
            "step_status": "PENDING",
            "cmd_type": "SHELL_CMD_HADOOP",
            "info": {},
            "run_async": false
        },
        {
            "interruptCmd": null,
            "id": "f1b9f3ab-45f6-8ec8-f701-2306d24bc825-14",
            "name": "Load HFile to HBase Table",
            "sequence_id": 14,
            "exec_cmd": " -input hdfs://cdh-master:8020/APACHE/REALTIME_OLAP/REALTIME/kylin-f1b9f3ab-45f6-8ec8-f701-2306d24bc825/UserActionBasicAnslysisCube/hfile -htablename APACHE:REALTIME_OLAP_ZPIAP5RO7L -cubename UserActionBasicAnslysisCube",
            "interrupt_cmd": null,
            "exec_start_time": 0,
            "exec_end_time": 0,
            "exec_wait_time": 0,
            "step_status": "PENDING",
            "cmd_type": "SHELL_CMD_HADOOP",
            "info": {},
            "run_async": false
        },
        {
            "interruptCmd": null,
            "id": "f1b9f3ab-45f6-8ec8-f701-2306d24bc825-15",
            "name": "Update Cube Info",
            "sequence_id": 15,
            "exec_cmd": null,
            "interrupt_cmd": null,
            "exec_start_time": 0,
            "exec_end_time": 0,
            "exec_wait_time": 0,
            "step_status": "PENDING",
            "cmd_type": "SHELL_CMD_HADOOP",
            "info": {},
            "run_async": false
        },
        {
            "interruptCmd": null,
            "id": "f1b9f3ab-45f6-8ec8-f701-2306d24bc825-16",
            "name": "Hive Cleanup",
            "sequence_id": 16,
            "exec_cmd": null,
            "interrupt_cmd": null,
            "exec_start_time": 0,
            "exec_end_time": 0,
            "exec_wait_time": 0,
            "step_status": "PENDING",
            "cmd_type": "SHELL_CMD_HADOOP",
            "info": {},
            "run_async": false
        },
        {
            "interruptCmd": null,
            "id": "f1b9f3ab-45f6-8ec8-f701-2306d24bc825-17",
            "name": "Garbage Collection on HDFS",
            "sequence_id": 17,
            "exec_cmd": null,
            "interrupt_cmd": null,
            "exec_start_time": 0,
            "exec_end_time": 0,
            "exec_wait_time": 0,
            "step_status": "PENDING",
            "cmd_type": "SHELL_CMD_HADOOP",
            "info": {},
            "run_async": false
        }
    ],
    "submitter": "ADMIN",
    "job_status": "PENDING",
    "build_instance": "unknown",
    "progress": 0
}


Verify query result


  • No labels