Skip to content

Commit d752632

Browse files
authored
Merge pull request #1 from dingxin-tech/tpch_version
use tpch version as default version
2 parents 655d65e + f044e76 commit d752632

File tree

10 files changed

+350
-22
lines changed

10 files changed

+350
-22
lines changed

Dockerfile

+1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
FROM eclipse-temurin:17-jdk-alpine
22
VOLUME /tmp
33
COPY target/*.jar app.jar
4+
COPY target/classes/tpch.db /tpch-tiny.db
45
ENTRYPOINT ["java", "--add-opens=java.base/java.nio=ALL-UNNAMED", "-jar", "/app.jar"]
56
EXPOSE 8080

how_to_gen_tpch_data.md

+166
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
1. download tpch-tools
2+
from [official website](https://www.tpc.org/TPC_Documents_Current_Versions/download_programs/tools-download-request5.asp?bm_type=TPC-H&bm_vers=3.0.1&mode=CURRENT-ONLY)
3+
2. unzip the file and change dir to dbgen
4+
3. fill the `makefile.suite` and rename it to `Makefile`. the run `make` to build the executable file. (Note that if you
5+
work on
6+
MacOS, you need to change `import malloc.h` to `import stdlib.h` in file `varsub.c` and `bm_utils.c`)
7+
4. run `dbgen` like this `./dbgen -vf -s 1`, and the you will find `.tbl` files in the current dir.
8+
5. Process the obtained .tbl file into csv format for subsequent storage into sqllite. You can use the following script.
9+
10+
```sql
11+
CREATE TABLE nation (
12+
nationkey INTEGER PRIMARY KEY,
13+
name TEXT NOT NULL,
14+
regionkey INTEGER NOT NULL,
15+
comment TEXT
16+
);
17+
18+
CREATE TABLE region (
19+
regionkey INTEGER PRIMARY KEY,
20+
name TEXT NOT NULL,
21+
comment TEXT
22+
);
23+
24+
CREATE TABLE part (
25+
partkey INTEGER PRIMARY KEY,
26+
name TEXT NOT NULL,
27+
mfgr TEXT NOT NULL,
28+
brand TEXT NOT NULL,
29+
type TEXT NOT NULL,
30+
size INTEGER NOT NULL,
31+
container TEXT NOT NULL,
32+
retailprice REAL NOT NULL,
33+
comment TEXT NOT NULL
34+
);
35+
36+
CREATE TABLE supplier (
37+
suppkey INTEGER PRIMARY KEY,
38+
name TEXT NOT NULL,
39+
address TEXT NOT NULL,
40+
nationkey INTEGER NOT NULL,
41+
phone TEXT NOT NULL,
42+
acctbal REAL NOT NULL,
43+
comment TEXT NOT NULL
44+
);
45+
46+
CREATE TABLE partsupp (
47+
partkey INTEGER NOT NULL,
48+
suppkey INTEGER NOT NULL,
49+
availqty INTEGER NOT NULL,
50+
supplycost REAL NOT NULL,
51+
comment TEXT NOT NULL,
52+
PRIMARY KEY (partkey, suppkey)
53+
);
54+
55+
CREATE TABLE customer (
56+
custkey INTEGER PRIMARY KEY,
57+
name TEXT NOT NULL,
58+
address TEXT NOT NULL,
59+
nationkey INTEGER NOT NULL,
60+
phone TEXT NOT NULL,
61+
acctbal REAL NOT NULL,
62+
mktsegment TEXT NOT NULL,
63+
comment TEXT NOT NULL
64+
);
65+
66+
CREATE TABLE orders (
67+
orderkey INTEGER PRIMARY KEY,
68+
custkey INTEGER NOT NULL,
69+
orderstatus TEXT NOT NULL,
70+
totalprice REAL NOT NULL,
71+
orderdate TEXT NOT NULL,
72+
orderpriority TEXT NOT NULL,
73+
clerk TEXT NOT NULL,
74+
shippriority INTEGER NOT NULL,
75+
comment TEXT NOT NULL
76+
);
77+
78+
CREATE TABLE lineitem (
79+
orderkey INTEGER NOT NULL,
80+
partkey INTEGER NOT NULL,
81+
suppkey INTEGER NOT NULL,
82+
linenumber INTEGER NOT NULL,
83+
quantity REAL NOT NULL,
84+
extendedprice REAL NOT NULL,
85+
discount REAL NOT NULL,
86+
tax REAL NOT NULL,
87+
returnflag TEXT NOT NULL,
88+
linestatus TEXT NOT NULL,
89+
shipdate TEXT NOT NULL,
90+
commitdate TEXT NOT NULL,
91+
receiptdate TEXT NOT NULL,
92+
shipinstruct TEXT NOT NULL,
93+
shipmode TEXT NOT NULL,
94+
comment TEXT NOT NULL,
95+
PRIMARY KEY (orderkey, linenumber)
96+
);
97+
```
98+
99+
100+
```bash
101+
#!/bin/bash
102+
folder_path="."
103+
104+
# hard code the headers
105+
get_header() {
106+
case "$1" in
107+
"customer")
108+
echo "custkey|name|address|nationkey|phone|acctbal|mktsegment|comment"
109+
;;
110+
"lineitem")
111+
echo "orderkey|partkey|suppkey|linenumber|quantity|extendedprice|discount|tax|returnflag|linestatus|shipdate|commitdate|receiptdate|shipinstruct|shipmode|comment"
112+
;;
113+
"nation")
114+
echo "nationkey|name|regionkey|comment"
115+
;;
116+
"orders")
117+
echo "orderkey|custkey|orderstatus|totalprice|orderdate|orderpriority|clerk|shippriority|comment"
118+
;;
119+
"part")
120+
echo "partkey|name|mfgr|brand|type|size|container|retailprice|comment"
121+
;;
122+
"partsupp")
123+
echo "partkey|suppkey|availqty|supplycost|comment"
124+
;;
125+
"region")
126+
echo "regionkey|name|comment"
127+
;;
128+
"supplier")
129+
echo "suppkey|name|address|nationkey|phone|acctbal|comment"
130+
;;
131+
*)
132+
echo ""
133+
;;
134+
esac
135+
}
136+
137+
for file in customer.tbl lineitem.tbl nation.tbl orders.tbl part.tbl partsupp.tbl region.tbl supplier.tbl; do
138+
table_name="${file%.*}"
139+
header=$(get_header "$table_name")
140+
141+
if [ -n "$header" ]; then
142+
tmp_file=$(mktemp "${file}.tmp.XXXXXX")
143+
echo "$header" > "$tmp_file"
144+
sed 's/|$//' "$file" >> "$tmp_file"
145+
mv "$tmp_file" "$file"
146+
else
147+
echo "cannot found $table_name, skip $file"
148+
fi
149+
done
150+
151+
```g
152+
153+
6. load the data into the table. you can use follow scripts.
154+
155+
```sql
156+
.mode csv
157+
.separator |
158+
.import customer.tbl customer
159+
.import lineitem.tbl lineitem
160+
.import nation.tbl nation
161+
.import orders.tbl orders
162+
.import part.tbl part
163+
.import partsupp.tbl partsupp
164+
.import region.tbl region
165+
.import supplier.tbl supplier
166+
```

pom.xml

+2-2
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
</parent>
2929
<groupId>com.aliyun.odps</groupId>
3030
<artifactId>maxcompute-emulator</artifactId>
31-
<version>0.0.5</version>
31+
<version>0.0.6-tpch</version>
3232
<name>maxcompute-emulator</name>
3333
<description>mock version to access MaxCompute</description>
3434
<properties>
@@ -115,7 +115,7 @@
115115
<dependency>
116116
<groupId>tech.dingxin</groupId>
117117
<artifactId>arrow-utils</artifactId>
118-
<version>0.0.1</version>
118+
<version>0.0.2</version>
119119
</dependency>
120120
<dependency>
121121
<groupId>org.apache.arrow</groupId>

src/main/java/com/aliyun/odps/service/StorageService.java

+13-10
Original file line numberDiff line numberDiff line change
@@ -112,20 +112,23 @@ private TableData read(TableId tableId) throws Exception {
112112
sql.delete(sql.length() - 4, sql.length());
113113
sql.append(";");
114114
} else {
115-
sql = new StringBuilder("select * from " + tableId.getTableName().toUpperCase() + " where ");
115+
sql = new StringBuilder("select * from " + tableId.getTableName().toUpperCase());
116116
List<String> partitionNames = tableId.getPartitionNames();
117-
for (String partitionName : partitionNames) {
118-
sql.append("(");
119-
PartitionSpec partitionSpec = new PartitionSpec(partitionName);
120-
Set<String> keys = partitionSpec.keys();
121-
for (String key : keys) {
122-
sql.append(key).append(" = '").append(partitionSpec.get(key)).append("' and ");
117+
if (!partitionNames.isEmpty()) {
118+
sql.append(" where ");
119+
for (String partitionName : partitionNames) {
120+
sql.append("(");
121+
PartitionSpec partitionSpec = new PartitionSpec(partitionName);
122+
Set<String> keys = partitionSpec.keys();
123+
for (String key : keys) {
124+
sql.append(key).append(" = '").append(partitionSpec.get(key)).append("' and ");
125+
}
126+
sql.delete(sql.length() - 4, sql.length());
127+
128+
sql.append(") or ");
123129
}
124130
sql.delete(sql.length() - 4, sql.length());
125-
126-
sql.append(") or ");
127131
}
128-
sql.delete(sql.length() - 4, sql.length());
129132
sql.append(";");
130133
}
131134
try (

src/main/java/com/aliyun/odps/service/TableService.java

+8-1
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,14 @@ public boolean tableExist(String tableName) {
8484
pstmt.setString(1, tableName.toUpperCase());
8585

8686
try (ResultSet resultSet = pstmt.executeQuery()) {
87-
return resultSet.next();
87+
if (resultSet.next()) {
88+
return true;
89+
} else {
90+
pstmt.setString(1, tableName);
91+
try (ResultSet resultSet2 = pstmt.executeQuery()) {
92+
return resultSet2.next();
93+
}
94+
}
8895
}
8996
}
9097
} catch (SQLException e) {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package com.aliyun.odps.utils;
19+
20+
import com.aliyun.odps.entity.SqlLiteColumn;
21+
import com.aliyun.odps.entity.SqlLiteSchema;
22+
23+
import java.sql.Connection;
24+
import java.sql.DatabaseMetaData;
25+
import java.sql.DriverManager;
26+
import java.sql.ResultSet;
27+
import java.sql.SQLException;
28+
import java.sql.Statement;
29+
import java.util.ArrayList;
30+
import java.util.HashMap;
31+
import java.util.List;
32+
import java.util.Map;
33+
34+
/**
35+
* @author dingxin ([email protected])
36+
*/
37+
public class AnalyzeDbRunner {
38+
39+
public static void main(String[] args) {
40+
String url = "jdbc:sqlite:tpch.db";
41+
CommonUtils.initEmulator();
42+
try (Connection conn = DriverManager.getConnection(url)) {
43+
if (conn != null) {
44+
Map<String, SqlLiteSchema> schemas = getDatabaseSchema(conn);
45+
schemas.forEach((tableName, schema) -> {
46+
try {
47+
deleteRowIfFirstColumnEqualsColumnName(conn, tableName);
48+
SqlRunner.executeSql("INSERT INTO schemas VALUES ('" + tableName + "', '" + schema.toJson() +
49+
"');");
50+
} catch (SQLException e) {
51+
throw new RuntimeException(e);
52+
}
53+
});
54+
}
55+
} catch (SQLException e) {
56+
e.printStackTrace();
57+
}
58+
}
59+
60+
public static Map<String, SqlLiteSchema> getDatabaseSchema(Connection conn) throws SQLException {
61+
DatabaseMetaData meta = conn.getMetaData();
62+
ResultSet rsTables = meta.getTables(null, null, "%", new String[] {"TABLE"});
63+
Map<String, SqlLiteSchema> schemas = new HashMap<>();
64+
65+
while (rsTables.next()) {
66+
String tableName = rsTables.getString("TABLE_NAME");
67+
ResultSet rsColumns = meta.getColumns(null, null, tableName, "%");
68+
SqlLiteSchema schema = new SqlLiteSchema();
69+
List<SqlLiteColumn> columns = new ArrayList<>();
70+
71+
while (rsColumns.next()) {
72+
String columnName = rsColumns.getString("COLUMN_NAME");
73+
String columnType = rsColumns.getString("TYPE_NAME");
74+
if (columnType.equals("TEXT")) {
75+
columnType = "STRING";
76+
}
77+
boolean notNull = rsColumns.getInt("NULLABLE") == DatabaseMetaData.columnNoNulls;
78+
String defaultValue = rsColumns.getString("COLUMN_DEF");
79+
boolean primaryKey = isPrimaryKey(meta, tableName, columnName);
80+
81+
SqlLiteColumn column =
82+
new SqlLiteColumn(columnName, columnType, notNull, defaultValue, primaryKey, false);
83+
columns.add(column);
84+
}
85+
86+
schema.setColumns(columns);
87+
schema.setPartitionColumns(new ArrayList<>());
88+
schemas.put(tableName, schema);
89+
rsColumns.close();
90+
}
91+
92+
rsTables.close();
93+
return schemas;
94+
}
95+
96+
private static boolean isPrimaryKey(DatabaseMetaData meta, String tableName, String columnName)
97+
throws SQLException {
98+
ResultSet rsPrimaryKeys = meta.getPrimaryKeys(null, null, tableName);
99+
while (rsPrimaryKeys.next()) {
100+
String pkColumnName = rsPrimaryKeys.getString("COLUMN_NAME");
101+
if (columnName.equals(pkColumnName)) {
102+
rsPrimaryKeys.close();
103+
return true;
104+
}
105+
}
106+
rsPrimaryKeys.close();
107+
return false;
108+
}
109+
110+
private static void deleteRowIfFirstColumnEqualsColumnName(Connection conn, String tableName) throws SQLException {
111+
Statement stmt = conn.createStatement();
112+
ResultSet rs = stmt.executeQuery("PRAGMA table_info(" + tableName + ")");
113+
114+
if (rs.next()) {
115+
String firstColumnName = rs.getString("name");
116+
String sql = "DELETE FROM " + tableName + " WHERE " + firstColumnName + " = '" + firstColumnName + "'";
117+
stmt.executeUpdate(sql);
118+
System.out.println("Deleted rows from table " + tableName + " where " + firstColumnName + " equals " +
119+
firstColumnName);
120+
}
121+
122+
stmt.close();
123+
rs.close();
124+
}
125+
}

src/main/java/com/aliyun/odps/utils/CommonUtils.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ public static String generateUUID() {
4040
return java.util.UUID.randomUUID().toString().replace("-", "");
4141
}
4242

43-
private static final String URL = "jdbc:sqlite:/tmp/maxcompute-emulator.db";
43+
private static final String URL = "jdbc:sqlite:tpch-tiny.db";
4444

4545
public static void initEmulator() {
4646
try {

0 commit comments

Comments
 (0)