Integrating Apache Hive with Apache Spark and BIPDF version

Hive Warehouse Connector Interfaces

The HiveWarehouseSession, CreateTableBuilder, and MergeBuilder interfaces present available HWC operations.

package com.hortonworks.hwc;
        
public interface HiveWarehouseSession {
        
//Execute Hive SELECT query and return DataFrame (recommended)
Dataset<Row> sql(String sql);

//Execute Hive SELECT query and return DataFrame in JDBC client mode
//Execute Hive catalog-browsing operation and return DataFrame
Dataset<Row> execute(String sql);

//Execute Hive SELECT query and return DataFrame in LLAP mode (not available in this release)
Dataset<Row> executeQuery(String sql);

//Execute Hive update statement
boolean executeUpdate(String sql);
                
//Reference a Hive table as a DataFrame
Dataset<Row> table(String sql);
        
//Return the SparkSession attached to this HiveWarehouseSession
SparkSession session();
        
//Set the current database for unqualified Hive table references
void setDatabase(String name);
        
/**
* Helpers: wrapper functions over execute or executeUpdate
*/
        
//Helper for show databases
Dataset<Row> showDatabases();
        
//Helper for show tables
Dataset<Row> showTables();
        
//Helper for describeTable
Dataset<Row> describeTable(String table);
        
//Helper for create database
void createDatabase(String database, boolean ifNotExists);
        
//Helper for create table stored as ORC
CreateTableBuilder createTable(String tableName);
        
//Helper for drop database
void dropDatabase(String database, boolean ifExists, boolean cascade);
        
//Helper for drop table
void dropTable(String table, boolean ifExists, boolean purge);
        
//Helper for merge query
MergeBuilder mergeBuilder();

//Closes the HWC session. Session cannot be reused after being closed.
void close();

// Closes the transaction started by the direct reader. The transaction is not committed if user
// uses rdd APIs.
void commitTxn();
}
package com.hortonworks.hwc;
        
public interface CreateTableBuilder {
        
//Silently skip table creation if table name exists
CreateTableBuilder ifNotExists();
        
//Add a column with the specific name and Hive type
//Use more than once to add multiple columns
CreateTableBuilder column(String name, String type);
        
//Specific a column as table partition
//Use more than once to specify multiple partitions
CreateTableBuilder partition(String name, String type);
        
//Add a table property
//Use more than once to add multiple properties
CreateTableBuilder prop(String key, String value);
        
//Make table bucketed, with given number of buckets and bucket columns
CreateTableBuilder clusterBy(long numBuckets, String ... columns);
        
//Creates ORC table in Hive from builder instance
void create();
}   
package com.hortonworks.hwc;
        
public interface MergeBuilder {
        
//Specify the target table to merge
MergeBuilder mergeInto(tring targetTable, String alias);
        
//Specify the source table or expression, such as (select * from some_table)
// Enclose expression in braces if specified.
MergeBuilder using(String sourceTableOrExpr, String alias);
        
//Specify the condition expression for merging
MergeBuilder on(String expr);
        
//Specify fields to update for rows affected by merge condition and matchExpr 
MergeBuilder whenMatchedThenUpdate(String matchExpr, String... nameValuePairs);
        
//Delete rows affected by the merge condition and matchExpr
MergeBuilder whenMatchedThenDelete(String matchExpr);
        
//Insert rows into target table affected by merge condition and matchExpr
MergeBuilder whenNotMatchedInsert(String matchExpr, String... values);
        
//Execute the merge operation
void merge();
}