Skip to content

Commit

Permalink
[AIRFLOW-2426] Add Google Cloud Storage Hook tests
Browse files Browse the repository at this point in the history
- Added mock tests for methods in
`GoogleCloudStorageHook`.

Closes apache#3322 from kaxil/AIRFLOW-2426

(cherry picked from commit 868d392)
Signed-off-by: Fokko Driesprong <[email protected]>
  • Loading branch information
kaxil authored and Fokko Driesprong committed May 7, 2018
1 parent 27631b6 commit ff3cab6
Show file tree
Hide file tree
Showing 2 changed files with 240 additions and 4 deletions.
12 changes: 10 additions & 2 deletions airflow/contrib/hooks/gcs_hook.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
Expand All @@ -24,6 +24,8 @@
from airflow.contrib.hooks.gcp_api_base_hook import GoogleCloudBaseHook
from airflow.exceptions import AirflowException

import re


class GoogleCloudStorageHook(GoogleCloudBaseHook):
"""
Expand Down Expand Up @@ -417,6 +419,12 @@ def create_bucket(self,
'Invalid value ({}) passed to storage_class. Value should be ' \
'one of {}'.format(storage_class, storage_classes)

assert re.match('[a-zA-Z0-9]+', bucket_name[0]), \
'Bucket names must start with a number or letter.'

assert re.match('[a-zA-Z0-9]+', bucket_name[-1]), \
'Bucket names must end with a number or letter.'

service = self.get_conn()
bucket_resource = {
'name': bucket_name,
Expand Down
232 changes: 230 additions & 2 deletions tests/contrib/hooks/test_gcs_hook.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
Expand All @@ -20,7 +20,22 @@
import unittest

import airflow.contrib.hooks.gcs_hook as gcs_hook

from airflow.exceptions import AirflowException
from apiclient.errors import HttpError

try:
from unittest import mock
except ImportError:
try:
import mock
except ImportError:
mock = None

BASE_STRING = 'airflow.contrib.hooks.gcp_api_base_hook.{}'
GCS_STRING = 'airflow.contrib.hooks.gcs_hook.{}'

EMPTY_CONTENT = ''.encode('utf8')


class TestGCSHookHelperFunctions(unittest.TestCase):
Expand All @@ -45,3 +60,216 @@ def test_parse_gcs_url(self):
# bucket only
self.assertEqual(
gcs_hook._parse_gcs_url('gs://bucket/'), ('bucket', ''))


class TestGCSBucket(unittest.TestCase):
def test_bucket_name_value(self):

bad_start_bucket_name = '/testing123'
with self.assertRaises(AssertionError):

gcs_hook.GoogleCloudStorageHook().create_bucket(
bucket_name=bad_start_bucket_name
)

bad_end_bucket_name = 'testing123/'
with self.assertRaises(AssertionError):
gcs_hook.GoogleCloudStorageHook().create_bucket(
bucket_name=bad_end_bucket_name
)


class TestGoogleCloudStorageHook(unittest.TestCase):
def setUp(self):
with mock.patch(BASE_STRING.format('GoogleCloudBaseHook.__init__')):
self.gcs_hook = gcs_hook.GoogleCloudStorageHook(
google_cloud_storage_conn_id='test'
)

@mock.patch(GCS_STRING.format('GoogleCloudStorageHook.get_conn'))
def test_exists(self, mock_service):

test_bucket = 'test_bucket'
test_object = 'test_object'

(mock_service.return_value.objects.return_value
.get.return_value.execute.return_value) = {
"kind": "storage#object",
# The ID of the object, including the bucket name,
# object name, and generation number.
"id": "{}/{}/1521132662504504".format(test_bucket, test_object),
"name": test_object,
"bucket": test_bucket,
"generation": "1521132662504504",
"contentType": "text/csv",
"timeCreated": "2018-03-15T16:51:02.502Z",
"updated": "2018-03-15T16:51:02.502Z",
"storageClass": "MULTI_REGIONAL",
"timeStorageClassUpdated": "2018-03-15T16:51:02.502Z",
"size": "89",
"md5Hash": "leYUJBUWrRtks1UeUFONJQ==",
"metadata": {
"md5-hash": "95e614241516ad1b64b3551e50538d25"
},
"crc32c": "xgdNfQ==",
"etag": "CLf4hODk7tkCEAE="
}

response = self.gcs_hook.exists(bucket=test_bucket, object=test_object)

self.assertTrue(response)

@mock.patch(GCS_STRING.format('GoogleCloudStorageHook.get_conn'))
def test_exists_nonexisting_object(self, mock_service):

test_bucket = 'test_bucket'
test_object = 'test_object'

(mock_service.return_value.objects.return_value
.get.return_value.execute.side_effect) = HttpError(
resp={'status': '404'}, content=EMPTY_CONTENT)

response = self.gcs_hook.exists(bucket=test_bucket, object=test_object)

self.assertFalse(response)

@mock.patch(GCS_STRING.format('GoogleCloudStorageHook.get_conn'))
def test_copy(self, mock_service):
source_bucket = 'test-source-bucket'
source_object = 'test-source-object'
destination_bucket = 'test-dest-bucket'
destination_object = 'test-dest-object'

(mock_service.return_value.objects.return_value
.get.return_value.execute.return_value) = {
"kind": "storage#object",
# The ID of the object, including the bucket name, object name,
# and generation number.
"id": "{}/{}/1521132662504504".format(
destination_bucket, destination_object),
"name": destination_object,
"bucket": destination_bucket,
"generation": "1521132662504504",
"contentType": "text/csv",
"timeCreated": "2018-03-15T16:51:02.502Z",
"updated": "2018-03-15T16:51:02.502Z",
"storageClass": "MULTI_REGIONAL",
"timeStorageClassUpdated": "2018-03-15T16:51:02.502Z",
"size": "89",
"md5Hash": "leYUJBUWrRtks1UeUFONJQ==",
"metadata": {
"md5-hash": "95e614241516ad1b64b3551e50538d25"
},
"crc32c": "xgdNfQ==",
"etag": "CLf4hODk7tkCEAE="
}

response = self.gcs_hook.copy(
source_bucket=source_bucket,
source_object=source_object,
destination_bucket=destination_bucket,
destination_object=destination_object
)

self.assertTrue(response)

@mock.patch(GCS_STRING.format('GoogleCloudStorageHook.get_conn'))
def test_copy_failedcopy(self, mock_service):
source_bucket = 'test-source-bucket'
source_object = 'test-source-object'
destination_bucket = 'test-dest-bucket'
destination_object = 'test-dest-object'

(mock_service.return_value.objects.return_value
.copy.return_value.execute.side_effect) = HttpError(
resp={'status': '404'}, content=EMPTY_CONTENT)

response = self.gcs_hook.copy(
source_bucket=source_bucket,
source_object=source_object,
destination_bucket=destination_bucket,
destination_object=destination_object
)

self.assertFalse(response)

def test_copy_fail_same_source_and_destination(self):

source_bucket = 'test-source-bucket'
source_object = 'test-source-object'
destination_bucket = 'test-source-bucket'
destination_object = 'test-source-object'

with self.assertRaises(ValueError) as e:
self.gcs_hook.copy(source_bucket=source_bucket,
source_object=source_object,
destination_bucket=destination_bucket,
destination_object=destination_object)

self.assertEquals(
str(e.exception),
'Either source/destination bucket or source/destination object '
'must be different, not both the same: bucket=%s, object=%s' %
(source_bucket, source_object)
)

def test_copy_empty_source_bucket(self):

source_bucket = None
source_object = 'test-source-object'
destination_bucket = 'test-dest-bucket'
destination_object = 'test-dest-object'

with self.assertRaises(ValueError) as e:
self.gcs_hook.copy(source_bucket=source_bucket,
source_object=source_object,
destination_bucket=destination_bucket,
destination_object=destination_object)

self.assertEquals(
str(e.exception),
'source_bucket and source_object cannot be empty.'
)

def test_copy_empty_source_object(self):

source_bucket = 'test-source-object'
source_object = None
destination_bucket = 'test-dest-bucket'
destination_object = 'test-dest-object'

with self.assertRaises(ValueError) as e:
self.gcs_hook.copy(source_bucket=source_bucket,
source_object=source_object,
destination_bucket=destination_bucket,
destination_object=destination_object)

self.assertEquals(
str(e.exception),
'source_bucket and source_object cannot be empty.'
)

@mock.patch(GCS_STRING.format('GoogleCloudStorageHook.get_conn'))
def test_delete(self, mock_service):
test_bucket = 'test_bucket'
test_object = 'test_object'

(mock_service.return_value.objects.return_value
.delete.return_value.execute.return_value) = {}

response = self.gcs_hook.delete(bucket=test_bucket, object=test_object)

self.assertTrue(response)

@mock.patch(GCS_STRING.format('GoogleCloudStorageHook.get_conn'))
def test_delete_nonexisting_object(self, mock_service):
test_bucket = 'test_bucket'
test_object = 'test_object'

(mock_service.return_value.objects.return_value
.delete.return_value.execute.side_effect) = HttpError(
resp={'status': '404'}, content=EMPTY_CONTENT)

response = self.gcs_hook.delete(bucket=test_bucket, object=test_object)

self.assertFalse(response)

0 comments on commit ff3cab6

Please sign in to comment.