Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,5 @@ buildNumber.properties
hive-exec*.jar
tez-local-cache*
tez-conf.pb
sandbox-flavor
/connector/.flattened-pom.xml
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -780,6 +780,9 @@ The following environment variables must be set and **exported** first.
* `GOOGLE_CLOUD_PROJECT` - The Google cloud platform project used to test the connector
* `TEST_BUCKET` - The GCS bucked used to test writing to BigQuery during the integration tests
* `ACCEPTANCE_TEST_BUCKET` - The GCS bucked used to test writing to BigQuery during the acceptance tests
* `SERVICE_ACCOUNT` - The service account to use for setting up dataproc
* `SUBNET` - The VPC Network subnet to use for setting up dataproc
* `BIGLAKE_REGION` - The region to use for setting up dataproc

To run the acceptance tests:

Expand Down
140 changes: 41 additions & 99 deletions connector/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -61,15 +61,6 @@
</exclusions>
</dependency>

<!--
<dependency>
<groupId>com.google.cloud.hive</groupId>
<artifactId>shaded-dependencies</artifactId>
<version>${project.parent.version}</version>
<classifier>shaded</classifier>
</dependency>
-->

<dependency>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
Expand All @@ -78,59 +69,10 @@
</dependency>

<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.version}</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId>
</exclusion>
</exclusions>
</dependency>

<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-common</artifactId>
<version>${hadoop.version}</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId>
</exclusion>
</exclusions>
</dependency>

<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>${hive.version}</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<groupId>org.pentaho</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>

<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-common</artifactId>
<version>${hive.version}</version>
<scope>provided</scope>
</dependency>

<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-service</artifactId>
<version>${hive.version}</version>
<groupId>${project.parent.groupId}</groupId>
<artifactId>shaded-dependencies</artifactId>
<version>${project.parent.version}</version>
<classifier>shaded</classifier>
<scope>provided</scope>
</dependency>

Expand All @@ -152,7 +94,7 @@
<!-- ******* Testing dependencies ******* -->

<dependency>
<groupId>com.google.cloud.hive</groupId>
<groupId>${project.parent.groupId}</groupId>
<artifactId>shaded-test-dependencies</artifactId>
<version>${project.parent.version}</version>
<classifier>shaded</classifier>
Expand Down Expand Up @@ -181,54 +123,54 @@
</dependency>

<dependency>
<artifactId>tez-dag</artifactId>
<groupId>org.apache.tez</groupId>
<version>${tez.version}</version>
<groupId>io.github.hiverunner</groupId>
<artifactId>hiverunner</artifactId>
<version>${hiverunner.version}</version>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>org.apache.hadoop</groupId>
<groupId>org.apache.tez</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>

<dependency>
<artifactId>tez-common</artifactId>
<groupId>org.apache.tez</groupId>
<version>${tez.version}</version>
<scope>test</scope>
<exclusions>
<exclusion>
<artifactId>hive-serde</artifactId>
<groupId>org.apache.hive</groupId>
</exclusion>
<exclusion>
<artifactId>hive-upgrade-acid</artifactId>
<groupId>org.apache.hive</groupId>
</exclusion>
<exclusion>
<artifactId>hive-webhcat-java-client</artifactId>
<groupId>org.apache.hive.hcatalog</groupId>
</exclusion>
<exclusion>
<artifactId>hive-jdbc</artifactId>
<groupId>org.apache.hive</groupId>
</exclusion>
<exclusion>
<artifactId>hive-service</artifactId>
<groupId>org.apache.hive</groupId>
</exclusion>
<exclusion>
<artifactId>hadoop-mapreduce-client-common</artifactId>
<groupId>org.apache.hadoop</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>

<dependency>
<artifactId>tez-mapreduce</artifactId>
<groupId>org.apache.tez</groupId>
<version>${tez.version}</version>
<scope>test</scope>
<exclusions>
<exclusion>
<artifactId>hadoop-annotations</artifactId>
<groupId>org.apache.hadoop</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>


<dependency>
<groupId>io.github.hiverunner</groupId>
<artifactId>hiverunner</artifactId>
<version>${hiverunner.version}</version>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>org.apache.tez</groupId>
<artifactId>*</artifactId>
<artifactId>hadoop-yarn-client</artifactId>
<groupId>org.apache.hadoop</groupId>
</exclusion>
<exclusion>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<groupId>org.apache.hadoop</groupId>
</exclusion>
<exclusion>
<artifactId>jackson-annotations</artifactId>
<groupId>com.fasterxml.jackson.core</groupId>
</exclusion>
</exclusions>
</dependency>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
Expand Down Expand Up @@ -222,7 +221,7 @@ public void preCreateTable(Table table) throws MetaException {
new HiveBigQueryConnectorModule(conf, table.getParameters()));
BigQueryClient bqClient = injector.getInstance(BigQueryClient.class);
HiveBigQueryConfig opts = injector.getInstance(HiveBigQueryConfig.class);
if (MetaStoreUtils.isExternalTable(table)) {
if (HiveUtils.isExternalTable(table)) {
if (bqClient.tableExists(tableId)) {
Map<String, String> basicStats = BigQueryUtils.getBasicStatistics(bqClient, tableId);
basicStats.put(StatsSetupConst.COLUMN_STATS_ACCURATE, "{\"BASIC_STATS\":\"true\"}");
Expand Down Expand Up @@ -407,6 +406,13 @@ public void preInsertTable(Table table, boolean overwrite) throws MetaException
*/
@Override
public void commitInsertTable(Table table, boolean overwrite) throws MetaException {
String engine = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).toLowerCase();
if (engine.equals("mr")) {
// In Hive v3, `commitInsertTable()` never gets called for MR -- only for Tez.
// But in Hive v2, it does get called. So we exit here since the BigQueryOutputCommitter
// is already called automatically for MR.
return;
}
try {
JobDetails jobDetails = JobDetails.readJobDetailsFile(conf, HiveUtils.getDbTableName(table));
BigQueryOutputCommitter.commit(conf, jobDetails);
Expand All @@ -427,7 +433,7 @@ public void rollbackInsertTable(Table table, boolean overwrite) throws MetaExcep

@Override
public void commitDropTable(Table table, boolean deleteData) throws MetaException {
if (!MetaStoreUtils.isExternalTable(table) && deleteData) {
if (!HiveUtils.isExternalTable(table) && deleteData) {
// This is a managed table, so let's delete the table in BigQuery
Injector injector =
Guice.createInjector(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,17 @@ public class HiveBigQueryConfig
// empty
}

public static Map<String, String> hadoopConfigAsMap(Configuration conf) {
Iterator<Map.Entry<String, String>> iterator = conf.iterator();
Map<String, String> configMap = new HashMap();
while (iterator.hasNext()) {
String name = iterator.next().getKey();
String value = conf.get(name);
configMap.put(name, value);
}
return configMap;
}

private static Optional<String> getAnyOption(
String key, Configuration conf, Map<String, String> tableParameters) {
// TO-DO: here we choose conf value over table value, any issue?
Expand Down Expand Up @@ -258,7 +269,7 @@ public static HiveBigQueryConfig from(Configuration conf, Map<String, String> ta
Boolean.parseBoolean(getAnyOption(VIEWS_ENABLED_KEY, conf, tableParameters).or("false"));
MaterializationConfiguration materializationConfiguration =
MaterializationConfiguration.from(
ImmutableMap.copyOf(conf.getPropsWithPrefix("")), new HashMap<>());
ImmutableMap.copyOf(hadoopConfigAsMap(conf)), new HashMap<>());
opts.materializationProject = materializationConfiguration.getMaterializationProject();
opts.materializationDataset = materializationConfiguration.getMaterializationDataset();
opts.materializationExpirationTimeInMinutes =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,6 @@ public static Object serialize(
TimestampTZ timestampTZ = DateTimeUtils.getHiveTimestampTZFromUTC((long) avroObject);
return new TimestampLocalTZWritable(timestampTZ);
}

if (objectInspector instanceof ByteObjectInspector) { // Tiny Int
return new ByteWritable(((Long) avroObject).byteValue());
}
Expand Down Expand Up @@ -145,7 +144,7 @@ public static Object serialize(

if (objectInspector instanceof HiveDecimalObjectInspector) {
byte[] bytes = ((ByteBuffer) avroObject).array();
int scale = (int) actualSchema.getObjectProp("scale");
int scale = AvroUtils.getPropAsInt(actualSchema, "scale");
BigDecimal bigDecimal = new BigDecimal(new BigInteger(bytes), scale);
HiveDecimal hiveDecimal = HiveDecimal.create(bigDecimal);
return new HiveDecimalWritable(hiveDecimal);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
import java.util.regex.Pattern;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CommonConfigurationKeys;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.TaskAttemptID;
Expand Down Expand Up @@ -74,7 +73,7 @@ public static boolean hasGcsWriteAccess(
public static Path getQueryWorkDir(Configuration conf) {
String parentPath = conf.get(HiveBigQueryConfig.WORK_DIR_PARENT_PATH_KEY);
if (parentPath == null) {
parentPath = conf.get(CommonConfigurationKeys.HADOOP_TMP_DIR);
parentPath = conf.get("hadoop.tmp.dir");
}
return getQuerySubDir(conf, parentPath);
}
Expand Down
Loading