forked from galsalomon66/s3select
-
Notifications
You must be signed in to change notification settings - Fork 22
/
Copy pathrun_trino_on_ceph.bash
86 lines (70 loc) · 3.45 KB
/
run_trino_on_ceph.bash
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/bin/bash
root_dir()
{
cd $(git rev-parse --show-toplevel)
}
modify_end_point_on_hive_properties()
{
#not in use
return;
#TODO if ./trino/catalog/hive.properties exist
[ $# -lt 1 ] && echo type s3-endpoint-url && return
root_dir
export S3_ENDPOINT=$1
cat container/trino/trino/catalog/hive.properties | awk -v x=${S3_ENDPOINT:-NO_SET} '{if(/hive.s3.endpoint/){print "hive.s3.endpoint="x"\n";} else {print $0;}}' > /tmp/hive.properties
cp /tmp/hive.properties container/trino/trino/catalog/hive.properties
cat ./container/trino/hms_trino.yaml | awk -v x=${S3_ENDPOINT:-NOT_SET} '{if(/[ *]- S3_ENDPOINT/){print "\t- S3_ENDPOINT="x"\n";} else {print $0;}}' > /tmp/hms_trino.yaml
cp /tmp/hms_trino.yaml ./container/trino/hms_trino.yaml
cd -
}
trino_exec_command()
{
## run SQL statement on trino
sudo docker exec -it trino /bin/bash -c "time trino --catalog hive --schema cephs3 --execute \"$@\""
}
boot_trino_hms()
{
root_dir
[ -z ${S3_ENDPOINT} ] && echo "missing end-variable S3_ENDPOINT (URL)" && return
[ -z ${S3_ACCESS_KEY} ] && echo missing end-variable S3_ACCESS_KEY && return
[ -z ${S3_SECRET_KEY} ] && echo missing end-variable S3_SECRET_KEY && return
# modify hms_trino.yaml according to user setup (environment variables)
cat ./container/trino/hms_trino.yaml | \
awk -v x=${S3_ENDPOINT:-NOT_SET} '{if(/- S3_ENDPOINT/){print " - S3_ENDPOINT="x;} else {print $0;}}' | \
awk -v x=${S3_ACCESS_KEY:-NOT_SET} '{if(/- S3_ACCESS_KEY/){print " - S3_ACCESS_KEY="x;} else {print $0;}}' | \
awk -v x=${S3_SECRET_KEY:-NOT_SET} '{if(/- S3_SECRET_KEY/){print " - S3_SECRET_KEY="x;} else {print $0;}}' > /tmp/hms_trino.yaml
cp /tmp/hms_trino.yaml ./container/trino/hms_trino.yaml
# modify hive.properties according to user setup (environment variables)
cat container/trino/trino/catalog/hive.properties | \
awk -v x=${S3_ENDPOINT:-NO_SET} '{if(/hive.s3.endpoint/){print "hive.s3.endpoint="x"\n";} else {print $0;}}' | \
awk -v x=${S3_ACCESS_KEY:-NO_SET} '{if(/hive.s3.aws-access-key/){print "hive.s3.aws-access-key="x;} else {print $0;}}' | \
awk -v x=${S3_SECRET_KEY:-NO_SET} '{if(/hive.s3.aws-secret-key/){print "hive.s3.aws-secret-key="x;} else {print $0;}}' > /tmp/hive.properties
cp /tmp/hive.properties ./container/trino/trino/catalog/hive.properties
sudo docker compose -f ./container/trino/hms_trino.yaml up -d
cd -
}
shutdown_trino_hms()
{
root_dir
sudo docker compose -f ./container/trino/hms_trino.yaml down
cd -
}
trino_create_table()
{
table_name=$1
create_table_comm="create table hive.cephs3.${table_name}(c1 varchar,c2 varchar,c3 varchar,c4 varchar, c5 varchar,c6 varchar,c7 varchar,c8 varchar,c9 varchar,c10 varchar)
WITH ( external_location = 's3a://hive/warehouse/cephs3/${table_name}/',format = 'TEXTFILE',textfile_field_separator = ',');"
sudo docker exec -it trino /bin/bash -c "trino --catalog hive --schema cephs3 --execute \"${create_table_comm}\""
}
tpcds_cli()
{
## a CLI example for generating TPCDS data
sudo docker run --env S3_ENDPOINT=172.17.0.1:8000 --env S3_ACCESS_KEY=b2345678901234567890 --env S3_SECRET_KEY=b234567890123456789012345678901234567890 --env BUCKET_NAME=hive --env SCALE=2 -it galsl/hadoop:tpcds bash -c '/root/run_tpcds_with_scale'
}
update_table_external_location()
{
root_dir
[ -z ${BUCKET_NAME} ] && echo need to define BUCKET_NAME && return
[ -z ${SCALE} ] && echo need to define SCALE && return
cat TPCDS/ddl/create_tpcds_tables.sql | sed "s/tpcds2\/4/${BUCKET_NAME}\/SCALE_${SCALE}/"
}