Skip to content

Commit

Permalink
[AIRFLOW-2412] Fix HiveCliHook.load_file to address HIVE-10541
Browse files Browse the repository at this point in the history
HiveCliHook.load_file doesn't actually execute
LOAD DATA statement via beeline bundled with
Hive under 2.0 due to HIVE-10541.
This PR provides a workaround for this problem.

Closes apache#3327 from sekikn/AIRFLOW-2412

(cherry picked from commit baf15e1)
Signed-off-by: Fokko Driesprong <[email protected]>
  • Loading branch information
sekikn authored and Fokko Driesprong committed May 8, 2018
1 parent c50093d commit a1fdd81
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 1 deletion.
5 changes: 5 additions & 0 deletions airflow/hooks/hive_hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,6 +420,11 @@ def load_file(
pvals = ", ".join(
["{0}='{1}'".format(k, v) for k, v in partition.items()])
hql += "PARTITION ({pvals});"

# As a workaround for HIVE-10541, add a newline character
# at the end of hql (AIRFLOW-2412).
hql += '\n'

hql = hql.format(**locals())
self.log.info(hql)
self.run_cli(hql)
Expand Down
26 changes: 25 additions & 1 deletion tests/hooks/test_hive_hook.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,14 @@

import datetime
import random

import mock
import unittest

from hmsclient import HMSClient

from airflow.exceptions import AirflowException
from airflow.hooks.hive_hooks import HiveMetastoreHook
from airflow.hooks.hive_hooks import HiveCliHook, HiveMetastoreHook
from airflow import DAG, configuration, operators
from airflow.utils import timezone

Expand Down Expand Up @@ -82,6 +84,28 @@ def tearDown(self):
metastore.drop_table(self.database, self.table, deleteData=True)


class TestHiveCliHook(unittest.TestCase):

def test_run_cli(self):
hook = HiveCliHook()
hook.run_cli("SHOW DATABASES")

@mock.patch('airflow.hooks.hive_hooks.HiveCliHook.run_cli')
def test_load_file(self, mock_run_cli):
filepath = "/path/to/input/file"
table = "output_table"

hook = HiveCliHook()
hook.load_file(filepath=filepath, table=table, create=False)

query = (
"LOAD DATA LOCAL INPATH '{filepath}' "
"OVERWRITE INTO TABLE {table} \n"
.format(filepath=filepath, table=table)
)
mock_run_cli.assert_called_with(query)


class TestHiveMetastoreHook(HiveEnvironmentTest):
VALID_FILTER_MAP = {'key2': 'value2'}

Expand Down

0 comments on commit a1fdd81

Please sign in to comment.