Hive demo data
To test drive Iceberg from Hive, you use demo data in the airline_online_iceberg database. To test drive Iceberg from Hive, you need to set up Hive demo data.
Iceberg Database creation and setup
The Airlines demo data for Iceberg is stored in the airline_online_iceberg database. The following queries created and set up this database.
create database if not exists airline_ontime_iceberg;
use airline_ontime_iceberg;
set hive.vectorized.execution.enabled=false;
set hive.stats.column.autogather=false;
Hive external table creation
The following Hive external tables were created in the airline_online_iceberg database:
- airports
- airlines
- planes
- flights
create external table if not exists airports (
iata string,
airport string,
city string,
state double,
country string,
lat double,
lon double
)
stored as orc;
create external table if not exists airlines (
code string,
description string
)
stored as orc;
create external table if not exists planes (
tailnum string,
owner_type string,
manufacturer string,
issue_date string,
model string,
status string,
aircraft_type string,
engine_type string,
year int
)
stored as orc;
create external table if not exists flights (
month int,
dayofmonth int,
dayofweek int,
deptime int,
crsdeptime int,
arrtime int,
crsarrtime int,
uniquecarrier string,
flightnum int,
tailnum string,
actualelapsedtime int,
crselapsedtime int,
airtime int,
arrdelay int,
depdelay int,
origin string,
dest string,
distance int,
taxiin int,
taxiout int,
cancelled int,
cancellationcode string,
diverted string,
carrierdelay int,
weatherdelay int,
nasdelay int,
securitydelay int,
lateaircraftdelay int
)
partitioned by (year int)
stored as orc;
Load data into the newly created tables
load data inpath '${datapath}/airline_ontime_iceberg.db/airports' into table airports;
load data inpath '${datapath}/airline_ontime_iceberg.db/airlines' into table airlines;
load data inpath '${datapath}/airline_ontime_iceberg.db/planes' into table planes;
load data inpath '${datapath}/airline_ontime_iceberg.db/flights/year=1995' into table flights partition (year=1995);
load data inpath '${datapath}/airline_ontime_iceberg.db/flights/year=1996' into table flights partition (year=1996);
load data inpath '${datapath}/airline_ontime_iceberg.db/flights/year=1997' into table flights partition (year=1997);
load data inpath '${datapath}/airline_ontime_iceberg.db/flights/year=1998' into table flights partition (year=1998);
load data inpath '${datapath}/airline_ontime_iceberg.db/flights/year=1999' into table flights partition (year=1999);
load data inpath '${datapath}/airline_ontime_iceberg.db/flights/year=2000' into table flights partition (year=2000);
load data inpath '${datapath}/airline_ontime_iceberg.db/flights/year=2001' into table flights partition (year=2001);
load data inpath '${datapath}/airline_ontime_iceberg.db/flights/year=2002' into table flights partition (year=2002);
load data inpath '${datapath}/airline_ontime_iceberg.db/flights/year=2003' into table flights partition (year=2003);
load data inpath '${datapath}/airline_ontime_iceberg.db/flights/year=2004' into table flights partition (year=2004);
load data inpath '${datapath}/airline_ontime_iceberg.db/flights/year=2005' into table flights partition (year=2005);
load data inpath '${datapath}/airline_ontime_iceberg.db/flights/year=2006' into table flights partition (year=2006);
load data inpath '${datapath}/airline_ontime_iceberg.db/flights/year=2007' into table flights partition (year=2007);
load data inpath '${datapath}/airline_ontime_iceberg.db/flights/year=2008' into table flights partition (year=2008);
Convert these existing Hive external tables to Iceberg tables
ALTER TABLE planes ADD CONSTRAINT planes_pk PRIMARY KEY (tailnum) DISABLE NOVALIDATE;
ALTER TABLE flights ADD CONSTRAINT planes_fk FOREIGN KEY (tailnum) REFERENCES planes(tailnum) DISABLE NOVALIDATE RELY;
ALTER TABLE airlines ADD CONSTRAINT airlines_pk PRIMARY KEY (code) DISABLE NOVALIDATE;
ALTER TABLE flights ADD CONSTRAINT airlines_fk FOREIGN KEY (uniquecarrier) REFERENCES airlines(code) DISABLE NOVALIDATE RELY;
ALTER TABLE airports ADD CONSTRAINT airports_pk PRIMARY KEY (iata) DISABLE NOVALIDATE;
ALTER TABLE flights ADD CONSTRAINT airports_orig_fk FOREIGN KEY (origin) REFERENCES airports(iata) DISABLE NOVALIDATE RELY;
ALTER TABLE flights ADD CONSTRAINT airports_dest_fk FOREIGN KEY (dest) REFERENCES airports(iata) DISABLE NOVALIDATE RELY;
ALTER TABLE airports SET TBLPROPERTIES ('storage_handler'='org.apache.iceberg.mr.hive.HiveIcebergStorageHandler');
ALTER TABLE airlines SET TBLPROPERTIES ('storage_handler'='org.apache.iceberg.mr.hive.HiveIcebergStorageHandler');
ALTER TABLE planes SET TBLPROPERTIES ('storage_handler'='org.apache.iceberg.mr.hive.HiveIcebergStorageHandler');
ALTER TABLE flights SET TBLPROPERTIES ('storage_handler'='org.apache.iceberg.mr.hive.HiveIcebergStorageHandler');