THIS IS A TEST INSTANCE. ALL YOUR CHANGES WILL BE LOST!!!!

Apache Kylin : Analytical Data Warehouse for Big Data

Page tree

Welcome to Kylin Wiki.


Deploy Kylin on EMR 5.31

  • Check Hadoop version and download Kylin and Spark
[root@ip-172-31-1-253 apache-kylin-4.0.0-SNAPSHOT-bin]# hadoop version
Hadoop 2.10.0-amzn-0
Subversion git@aws157git.com:/pkg/Aws157BigTop -r d1e860a34cc1aea3d600c57c5c0270ea41579e8c
Compiled by ec2-user on 2020-09-19T02:05Z
Compiled with protoc 2.5.0
From source with checksum 61f0bc74ab37bcbfbc09b3846ee32b
This command was run using /usr/lib/hadoop/hadoop-common-2.10.0-amzn-0.jar
[root@ip-172-31-1-253 apache-kylin-4.0.0-SNAPSHOT-bin]# hive --version
Hive 2.3.7-amzn-1
Git git://ip-10-0-0-57/workspace/workspace/bigtop.release-rpm-5.31.0/build/hive/rpm/BUILD/apache-hive-2.3.7-amzn-1-src -r d1e860a34cc1aea3d600c57c5c0270ea41579e8c
Compiled by ec2-user on Sat Sep 19 02:48:49 UTC 2020
From source with checksum b7d9cc83f78a0b3e0f2b22c78e54aae1


[root@ip-172-31-1-253 hadoop]# aws s3 cp s3://XXX/xxyu_upload/apache-kylin-4.0.0-SNAPSHOT-bin.tar-b08c1be22eb51796fb58c3694e86e60f948337f6.gz .
download: s3://xiaoxiang-yu/xxyu_upload/apache-kylin-4.0.0-SNAPSHOT-bin.tar-b08c1be22eb51796fb58c3694e86e60f948337f6.gz to ./apache-kylin-4.0.0-SNAPSHOT-bin.tar-b08c1be22eb51796fb58c3694e86e60f948337f6.gz
[root@ip-172-31-1-253 hadoop]# tar zxf apache-kylin-4.0.0-SNAPSHOT-bin.tar-b08c1be22eb51796fb58c3694e86e60f948337f6.gz
[root@ip-172-31-1-253 hadoop]# cd apache-kylin-4.0.0-SNAPSHOT-bin/
[root@ip-172-31-1-253 apache-kylin-4.0.0-SNAPSHOT-bin]# aws s3 cp s3://XXX/xxyu_upload/spark-2.4.6-bin-hadoop2.7.tgz .
download: s3://XXX/xxyu_upload/spark-2.4.6-bin-hadoop2.7.tgz to ./spark-2.4.6-bin-hadoop2.7.tgz
[root@ip-172-31-1-253 apache-kylin-4.0.0-SNAPSHOT-bin]# tar zxf spark-2.4.6-bin-hadoop2.7.tgz
[root@ip-172-31-1-253 apache-kylin-4.0.0-SNAPSHOT-bin]# mv spark-2.4.6-bin-hadoop2.7 spark
[root@ip-172-31-1-253 apache-kylin-4.0.0-SNAPSHOT-bin]# cat commit_SHA1
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
b08c1be22eb51796fb58c3694e86e60f948337f6
  • Prepare kylin.properties
kylin.metadata.url=kylin_default_instance@jdbc,url=jdbc:mysql://ip-172-31-1-253.cn-northwest-1.compute.internal:3306/kylin,driverClassName=org.mariadb.jdbc.Driver,username=xxyu,password=newpassword
kylin.spark-conf.auto.prior=false
kylin.engine.spark-conf.spark.executor.memory=5g
kylin.engine.spark-conf.spark.executor.cores=2
kylin.engine.spark-conf.spark.executor.instances=3
kylin.env.zookeeper-connect-string=ip-172-31-1-253.cn-northwest-1.compute.internal
  • Prepare Metastore for testing
[root@ip-172-31-1-253 apache-kylin-4.0.0-SNAPSHOT-bin]# mysql
Welcome to the MariaDB monitor.  Commands end with ; or \g.
Your MariaDB connection id is 70
Server version: 5.5.68-MariaDB MariaDB Server
#

Copyright (c) 2000, 2018, Oracle, MariaDB Corporation Ab and others.

Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.

MariaDB [(none)]> CREATE USER 'xxyu'@'ip-172-31-1-253.cn-northwest-1.compute.internal' IDENTIFIED BY 'newpassword' ;
Query OK, 0 rows affected (0.00 sec)

MariaDB [(none)]> GRANT ALL PRIVILEGES ON *.* TO 'xxyu'@'ip-172-31-1-253.cn-northwest-1.compute.internal' WITH GRANT OPTION ;
Query OK, 0 rows affected (0.00 sec)

MariaDB [(none)]> FLUSH PRIVILEGES;
Query OK, 0 rows affected (0.00 sec)

MariaDB [(none)]> Bye

[root@ip-172-31-1-253 apache-kylin-4.0.0-SNAPSHOT-bin]# mysql -u xxyu -h ip-172-31-1-253.cn-northwest-1.compute.internal  -p
Enter password:
Welcome to the MariaDB monitor.  Commands end with ; or \g.
Your MariaDB connection id is 73
Server version: 5.5.68-MariaDB MariaDB Server

Copyright (c) 2000, 2018, Oracle, MariaDB Corporation Ab and others.

Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.

MariaDB [(none)]> create database kylin;
Query OK, 1 row affected (0.00 sec)

MariaDB [(none)]> show databases;
+--------------------+
| Database           |
+--------------------+
| information_schema |
| hive               |
| hue                |
| kylin              |
| mysql              |
| performance_schema |
+--------------------+
6 rows in set (0.00 sec)

MariaDB [(none)]> Bye
  • Replace jars under $KYLIN_HOME/spark/jars
[root@ip-172-31-1-253 apache-kylin-4.0.0-SNAPSHOT-bin]# find /usr/lib -name "*mariadb*"
[root@ip-172-31-1-253 apache-kylin-4.0.0-SNAPSHOT-bin]# mkdir ext
[root@ip-172-31-1-253 apache-kylin-4.0.0-SNAPSHOT-bin]# cp /usr/lib/hive/lib/mariadb-connector-java.jar ext
[root@ip-172-31-1-253 apache-kylin-4.0.0-SNAPSHOT-bin]# rm -rf spark/jars/hadoop-*.jar
[root@ip-172-31-1-253 apache-kylin-4.0.0-SNAPSHOT-bin]# cp /usr/lib/spark/jars/hadoop-*.jar spark/jars/
[root@ip-172-31-1-253 apache-kylin-4.0.0-SNAPSHOT-bin]# cp /usr/lib/spark/jars/emr-spark-goodies.jar  spark/jars/
[root@ip-172-31-1-253 apache-kylin-4.0.0-SNAPSHOT-bin]# cp /usr/lib/spark/jars/htrace-core4-4.1.0-incubating.jar spark/jars
[root@ip-172-31-1-253 apache-kylin-4.0.0-SNAPSHOT-bin]# cp /usr/lib/hadoop-lzo/lib/hadoop-lzo-0.4.19.jar spark/jars
[root@ip-172-31-1-253 apache-kylin-4.0.0-SNAPSHOT-bin]# cp /usr/lib/hadoop/lib/woodstox-core-5.0.3.jar spark/jars/
  • Start Kylin 
[root@ip-172-31-1-253 apache-kylin-4.0.0-SNAPSHOT-bin]# sh bin/kylin.sh start
Retrieving hadoop conf dir...
KYLIN_HOME is set to /home/hadoop/apache-kylin-4.0.0-SNAPSHOT-bin
mkdir: Permission denied: user=root, access=WRITE, inode="/":hdfs:hadoop:drwxr-xr-x
Failed to create /kylin. Please make sure the user has right to access /kylin
[root@ip-172-31-1-253 apache-kylin-4.0.0-SNAPSHOT-bin]# sh bin/kylin.sh start
Retrieving hadoop conf dir...
KYLIN_HOME is set to /home/hadoop/apache-kylin-4.0.0-SNAPSHOT-bin
Retrieving hive dependency...
Retrieving hadoop conf dir...
Retrieving Spark dependency...
Start replacing hadoop jars under /home/hadoop/apache-kylin-4.0.0-SNAPSHOT-bin/spark/jars.
find: ‘/opt/cloudera/parcels/CDH/lib/hadoop-mapreduce/../hadoop/’: No such file or directory
2.10.0-amzn-0.jar
Find platform specific jars: , will replace with these jars under /home/hadoop/apache-kylin-4.0.0-SNAPSHOT-bin/spark/jars.
Please confirm that the corresponding hadoop jars have been replaced. The automatic replacement program cannot be executed correctly.
Done hadoop jars replacement under /home/hadoop/apache-kylin-4.0.0-SNAPSHOT-bin/spark/jars.
Start to check whether we need to migrate acl tables
Not HBase metadata. Skip check.

A new Kylin instance is started by root. To stop it, run 'kylin.sh stop'
Check the log at /home/hadoop/apache-kylin-4.0.0-SNAPSHOT-bin/logs/kylin.log
Web UI is at http://ip-172-31-1-253.cn-northwest-1.compute.internal:7070/kylin
  • Modify $KYLIN_HOME/hadoop_conf/hive-site.xml (and Kylin started)
[root@ip-172-31-1-253 apache-kylin-4.0.0-SNAPSHOT-bin]# ll hadoop_conf/
总用量 0
lrwxrwxrwx 1 root root 30 1月   6 11:46 core-site.xml -> /etc/hadoop/conf/core-site.xml
lrwxrwxrwx 1 root root 30 1月   6 11:46 hadoop-env.sh -> /etc/hadoop/conf/hadoop-env.sh
lrwxrwxrwx 1 root root 30 1月   6 11:46 hdfs-site.xml -> /etc/hadoop/conf/hdfs-site.xml
lrwxrwxrwx 1 root root 28 1月   6 11:46 hive-site.xml -> /etc/hive/conf/hive-site.xml
lrwxrwxrwx 1 root root 32 1月   6 11:46 mapred-site.xml -> /etc/hadoop/conf/mapred-site.xml
lrwxrwxrwx 1 root root 31 1月   6 11:46 ssl-client.xml -> /etc/hadoop/conf/ssl-client.xml
lrwxrwxrwx 1 root root 30 1月   6 11:46 yarn-site.xml -> /etc/hadoop/conf/yarn-site.xml
[root@ip-172-31-1-253 apache-kylin-4.0.0-SNAPSHOT-bin]# rm hadoop_conf/hive-site.xml
rm:是否删除符号链接 "hadoop_conf/hive-site.xml"?y
[root@ip-172-31-1-253 apache-kylin-4.0.0-SNAPSHOT-bin]# cp /etc/hive/conf/hive-site.xml hadoop_conf/
[root@ip-172-31-1-253 apache-kylin-4.0.0-SNAPSHOT-bin]# vim hadoop_conf/hive-site.xml



  • No labels