...
Code Block |
---|
create table t (a integer); -- create a scheduled query; every 10 minute insert a new row create scheduled query sc1 cron ‘0'0 */10 * * * ? *’' as insert into t values (1); -- depending on hive.scheduled.queries.create.as.enabled the query might get create in disabled mode -- it can be enabled using: alter scheduled query sc1 enabled; -- inspect scheduled queries using the information_schema select * from information_schema.scheduled_queries s where schedule_name='sc1'; +-----------------------+------------------+------------+----------------------+-------------------+---------+-----------+----------------------+ | s.scheduled_query_id | s.schedule_name | s.enabled | s.cluster_namespace | s.schedule | s.user | s.query | s.next_execution | +-----------------------+------------------+------------+----------------------+-------------------+---------+-----------+----------------------+ | 1 | sc1 | true | hive | 0 */10 * * * ? * | dev | select 1 | 2020-02-03 15:10:00 | +-----------------------+------------------+------------+----------------------+-------------------+---------+-----------+----------------------+ -- wait 10 minutes select * from information_schema.scheduled_executions s where schedule_name='sc1' order by scheduled_execution_id desc limit 1; +---------------------------+------------------+----------------------------------------------------+-----------+----------------------+----------------------+------------+------------------+---------------------+ | s.scheduled_execution_id | s.schedule_name | s.executor_query_id | s.state | s.start_time | s.end_time | s.elapsed | s.error_message | s.last_update_time | +---------------------------+------------------+----------------------------------------------------+-----------+----------------------+----------------------+------------+------------------+---------------------+ | 496 | sc1 | dev_20200203152025_bdf3deac-0ca6-407f-b122-c637e50f99c8 | FINISHED | 2020-02-03 15:20:23 | 2020-02-03 15:20:31 | 8 | NULL | NULL | +---------------------------+------------------+----------------------------------------------------+-----------+----------------------+----------------------+------------+------------------+---------------------+ |
...
Code Block |
---|
-- some settings...they might be there already set hive.support.concurrency=true; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; set hive.strict.checks.cartesian.product=false; set hive.stats.fetch.column.stats=true; set hive.materializedview.rewriting=true; -- create some tables CREATE TABLE emps ( empid INT, deptno INT, name VARCHAR(256), salary FLOAT, hire_date TIMESTAMP) STORED AS ORC TBLPROPERTIES ('transactional'='true'); CREATE TABLE depts ( deptno INT, deptname VARCHAR(256), locationid INT) STORED AS ORC TBLPROPERTIES ('transactional'='true'); -- load data insert into emps values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (120, 10, 'Bill', 10000, 250), (1330, 10, 'Bill', 10000, '2020-01-02'); insert into depts values (10, 'Sales', 10), (30, 'Marketing', null), (20, 'HR', 20); insert into emps values (1330, 10, 'Bill', 10000, '2020-01-02'); -- create mv CREATE MATERIALIZED VIEW mv1 AS SELECT empid, deptname, hire_date FROM emps JOIN depts ON (emps.deptno = depts.deptno) WHERE hire_date >= '2016-01-01 00:00:00'; EXPLAIN SELECT empid, deptname FROM emps JOIN depts ON (emps.deptno = depts.deptno) WHERE hire_date >= '2018-01-01'; -- create a schedule to rebuild mv create scheduled query d cron '0 */10 * * * ? *' defined as alter materialized view mv1 rebuild; -- from this expalin it will be seen that the mv1 is being used EXPLAIN SELECT empid, deptname FROM emps JOIN depts ON (emps.deptno = depts.deptno) WHERE hire_date >= '2018-01-01'; -- insert a new record insert into emps values (1330, 10, 'Bill', 10000, '2020-01-02'); -- the source tables are scanned EXPLAIN SELECT empid, deptname FROM emps JOIN depts ON (emps.deptno = depts.deptno) WHERE hire_date >= '2018-01-01'; -- wait 10 minutes -- run it again...the view should be rebuilt EXPLAIN SELECT empid, deptname FROM emps JOIN depts ON (emps.deptno = depts.deptno) WHERE hire_date >= '2018-01-01'; |
create table t (a integer);
create scheduled query sc1 cron ‘0 */10 * * * ? *’ as insert into t values (1);
Example 1 - general usage of schedules
create table t (a integer);
-- create scheduled queries
create scheduled query sc1 cron ‘0 */10 * * * ? *’ as insert into t values (1);
-- depending on hive.scheduled.queries.create.as.enabled the query might get create in disabled mode
-- it can be enabled using:
alter scheduled query sc1 enabled;
-- inspect scheduled queries using the information_schema
...