...
ALTER SCHEDULED QUERY <scheduled_query_name>
(<scheduleSpec>|<executedAsSpec>|<enableSpecification>|<definedAsSpec>|<executeSpec>);
Drop syntax
DROP SCHEDULED QUERY <scheduled_query_name>;
...
Note |
---|
For CREATE SCHEDULED QUERY statements the default behaviour is set by the configuration key hive.scheduled.queries.create.as.enabled |
Note |
---|
In case there are in-flight scheduled executions at the time when the corresponding schedule is disabled - the already running executions will still finish. But no more executions will be triggered. |
AnchordefinedAsSpec definedAsSpec
Defined AS syntax
definedAsSpec | |
definedAsSpec |
...
The “query” is a single statement expression to be scheduled for execution.
Anchor | ||||
---|---|---|---|---|
|
EXECUTE
Changes the schedules next execution time to be now. Could be useful during debugging/development.
...
- hive.scheduled.queries.executor.enabled (default: true)
Controls whether HS2 will run scheduled query executor. - hive.scheduled.queries.namespace (default: "hive")
Sets the scheduled query namespace to be used. New scheduled queries are created in this namespace; and execution is also bound to the namespace - hive.scheduled.queries.executor.idle.sleep.time (default: 1 minute)
Time to sleep between querying for the presence of a scheduled query. - hive.scheduled.queries.executor.progress.report.interval (default: 1 minute)
While scheduled queries are in flight; a background update happens periodically to report the actual state of the query. - hive.scheduled.queries.create.as.enabled (default: true)
This option sets the default behaviour of newly created scheduled queries. - hive.security.authorization.scheduled.queries.supported (default: false)
Enable this if the configured authorizer is able to handle scheduled query related calls.
Examples
Example 1
...
– basic example of using schedules
Code Block |
---|
create table t (a integer);
-- create a scheduled query; every 10 minute insert a new row
create scheduled query sc1 cron '0 */10 * * * ? *' as insert into t values (1);
-- depending on hive.scheduled.queries.create.as.enabled the query might get create in disabled mode
-- it can be enabled using:
alter scheduled query sc1 enabled;
-- inspect scheduled queries using the information_schema
select * from information_schema.scheduled_queries s where schedule_name='sc1';
+-----------------------+------------------+------------+----------------------+-------------------+---------+-----------+----------------------+
| s.scheduled_query_id | s.schedule_name | s.enabled | s.cluster_namespace | s.schedule | s.user | s.query | s.next_execution |
+-----------------------+------------------+------------+----------------------+-------------------+---------+-----------+----------------------+
| 1 | sc1 | true | hive | 0 */10 * * * ? * | dev | select 1 | 2020-02-03 15:10:00 |
+-----------------------+------------------+------------+----------------------+-------------------+---------+-----------+----------------------+
-- wait 10 minutes or execute by issuing:
alter scheduled query sc1 execute;
select * from information_schema.scheduled_executions s where schedule_name='sc1' order by scheduled_execution_id desc limit 1;
+---------------------------+------------------+----------------------------------------------------+-----------+----------------------+----------------------+------------+------------------+---------------------+
| s.scheduled_execution_id | s.schedule_name | s.executor_query_id | s.state | s.start_time | s.end_time | s.elapsed | s.error_message | s.last_update_time |
+---------------------------+------------------+----------------------------------------------------+-----------+----------------------+----------------------+------------+------------------+---------------------+
| 496 | sc1 | dev_20200203152025_bdf3deac-0ca6-407f-b122-c637e50f99c8 | FINISHED | 2020-02-03 15:20:23 | 2020-02-03 15:20:31 | 8 | NULL | NULL |
+---------------------------+------------------+----------------------------------------------------+-----------+----------------------+----------------------+------------+------------------+---------------------+
|
Example 2
...
– analyze external table periodically
Suppose you have an external table - the contents of it is slowly changing...which will eventually lead that Hive will utilize outdated statistics during planning time
Code Block |
---|
-- create external table create external table ext (a integer); -- see where the table lives: desc formatted t; [...] | Location: | file:/data/hive/warehouse/ext | NULL | [...] -- in a terminal; load some data into the table directory: seq 1 10 > /data/hive/warehouse/ext/f1 -- back in hive you will see that select count(1) from ext; 10 -- meanwhile basic stats show that the table has "0" rows desc formatted ext; [...] | | numRows | 0 | [...] create scheduled query ext_analyze cron '0 */1 * * * ? *' as analyze table ext compute statistics for columns; -- wait some time... or execute by issuing: alter scheduled query t_analyze execute; select * from information_schema.scheduled_executions s where schedule_name='ex_analyze' order by scheduled_execution_id desc limit 3; +---------------------------+------------------+----------------------------------------------------+------------+----------------------+----------------------+------------+------------------+----------------------+ | s.scheduled_execution_id | s.schedule_name | s.executor_query_id | s.state | s.start_time | s.end_time | s.elapsed | s.error_message | s.last_update_time | +---------------------------+------------------+----------------------------------------------------+------------+----------------------+----------------------+------------+------------------+----------------------+ | 498 | ext_analyze | dev_20200203152640_a59bc198-3ed3-4ef2-8f63-573607c9914e | FINISHED | 2020-02-03 15:26:38 | 2020-02-03 15:28:01 | 83 | NULL | NULL | +---------------------------+------------------+----------------------------------------------------+------------+----------------------+----------------------+------------+------------------+----------------------+ -- and the numrows have been updated desc formatted ext; [...] | | numRows | 10 | [...] -- we don't want this running every minute anymore... alter scheduled query ext_analyze disable; |
Example 3
...
– materialized view rebuild
...
Code Block |
---|
-- some settings...they might be there already set hive.support.concurrency=true; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; set hive.strict.checks.cartesian.product=false; set hive.stats.fetch.column.stats=true; set hive.materializedview.rewriting=true; -- create some tables CREATE TABLE emps ( empid INT, deptno INT, name VARCHAR(256), salary FLOAT, hire_date TIMESTAMP) STORED AS ORC TBLPROPERTIES ('transactional'='true'); CREATE TABLE depts ( deptno INT, deptname VARCHAR(256), locationid INT) STORED AS ORC TBLPROPERTIES ('transactional'='true'); -- load data insert into emps values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (120, 10, 'Bill', 10000, 250), (1330, 10, 'Bill', 10000, '2020-01-02'); insert into depts values (10, 'Sales', 10), (30, 'Marketing', null), (20, 'HR', 20); insert into emps values (1330, 10, 'Bill', 10000, '2020-01-02'); -- create mv CREATE MATERIALIZED VIEW mv1 AS SELECT empid, deptname, hire_date FROM emps JOIN depts ON (emps.deptno = depts.deptno) WHERE hire_date >= '2016-01-01 00:00:00'; EXPLAIN SELECT empid, deptname FROM emps JOIN depts ON (emps.deptno = depts.deptno) WHERE hire_date >= '2018-01-01'; -- create a schedule to rebuild mv create scheduled query dmv_rebuild cron '0 */10 * * * ? *' defined as alter materialized view mv1 rebuild; -- from this expalin it will be seen that the mv1 is being used EXPLAIN SELECT empid, deptname FROM emps JOIN depts ON (emps.deptno = depts.deptno) WHERE hire_date >= '2018-01-01'; -- insert a new record insert into emps values (1330, 10, 'Bill', 10000, '2020-01-02'); -- the source tables are scanned EXPLAIN SELECT empid, deptname FROM emps JOIN depts ON (emps.deptno = depts.deptno) WHERE hire_date >= '2018-01-01'; -- wait 10 minutes or execute alter scheduled query mv_rebuild execute; -- run it again...the view should be rebuilt EXPLAIN SELECT empid, deptname FROM emps JOIN depts ON (emps.deptno = depts.deptno) WHERE hire_date >= '2018-01-01'; |
Example 4 – Ingestion
Code Block |
---|
drop table if exists t;
drop table if exists s;
-- suppose that this table is an external table or something
-- which supports the pushdown of filter condition on the id column
create table s(id integer, cnt integer);
-- create an internal table and an offset table
create table t(id integer, cnt integer);
create table t_offset(offset integer);
insert into t_offset values(0);
-- pretend that data is added to s
insert into s values(1,1);
-- run an ingestion...
from (select id==offset as first,* from s
join t_offset on id>=offset) s1
insert into t select id,cnt where first = false
insert overwrite table t_offset select max(s1.id);
-- configure to run ingestion every 10 minutes
create scheduled query ingest every 10 minutes defined as
from (select id==offset as first,* from s
join t_offset on id>=offset) s1
insert into t select id,cnt where first = false
insert overwrite table t_offset select max(s1.id);
-- add some new values
insert into s values(2,2),(3,3);
-- pretend that a timeout have happened
alter scheduled query ingest execute;
|