Skip to content

Commit

Permalink
Merge pull request #128 from OpenGeoMetadata/121-migrate-to-aardvark
Browse files Browse the repository at this point in the history
Add initial class for aardvark migration
  • Loading branch information
thatbudakguy committed Feb 28, 2023
2 parents e3d91fa + c0ee7e5 commit c2c5427
Show file tree
Hide file tree
Showing 5 changed files with 116 additions and 0 deletions.
3 changes: 3 additions & 0 deletions lib/geo_combine.rb
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ def to_html
# Require harvesting/indexing files
require 'geo_combine/geo_blacklight_harvester'

# Migrators
require 'geo_combine/migrators/v1_aardvark_migrator'

# Require gem files
require 'geo_combine/version'
require 'geo_combine/railtie' if defined?(Rails)
52 changes: 52 additions & 0 deletions lib/geo_combine/migrators/v1_aardvark_migrator.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# frozen_string_literal: true

module GeoCombine
module Migrators
# TODO: WARNING! This class is not fully implemented and should not be used in
# production. See https://github.com/OpenGeoMetadata/GeoCombine/issues/121
# for remaining work.
#
# migrates the v1 schema to the aardvark schema
class V1AardvarkMigrator
attr_reader :v1_hash

# @param v1_hash [Hash] parsed json in the v1 schema
def initialize(v1_hash:)
@v1_hash = v1_hash
end

def run
v2_hash = convert_keys
v2_hash['gbl_mdVersion_s'] = 'Aardvark'
v2_hash
end

def convert_keys
v1_hash.transform_keys do |k|
SCHEMA_FIELD_MAP[k] || k
end
end

SCHEMA_FIELD_MAP = {
'dc_title_s' => 'dct_title_s', # new namespace
'dc_description_s' => 'dct_description_sm', # new namespace; single to multi-valued
'dc_language_s' => 'dct_language_sm', # new namespace; single to multi-valued
'dc_language_sm' => 'dct_language_sm', # new namespace; single to multi-valued
'dc_creator_sm' => 'dct_creator_sm', # new namespace
'dc_publisher_s' => 'dct_publisher_sm', # new namespace; single to multi-valued
'dct_provenance_s' => 'schema_provider_s', # new URI name
'dc_subject_sm' => 'dct_subject_sm', # new namespace
'solr_year_i' => 'gbl_indexYear_im', # new URI name; single to multi-valued
'dc_source_sm' => 'dct_source_sm', # new namespace
'dc_rights_s' => 'dct_accessRights_s', # new URI name
'dc_format_s' => 'dct_format_s', # new namespace
'layer_id_s' => 'gbl_wxsIdentifier_s', # new URI name
'layer_slug_s' => 'id', # new URI name
'dc_identifier_s' => 'dct_identifier_sm', # new namespace; single to multi-valued
'layer_modified_dt' => 'gbl_mdModified_dt', # new URI name
'geoblacklight_version' => 'gbl_mdVersion_s', # new URI name
'suppressed_b' => 'gbl_suppressed_b' # new namespace
}.freeze
end
end
end
33 changes: 33 additions & 0 deletions spec/fixtures/docs/full_geoblacklight_aardvark.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
{
"gbl_mdVersion_s":"Aardvark",
"dct_identifier_sm":"http://purl.stanford.edu/cz128vq0535",
"dct_title_s":"2005 Rural Poverty GIS Database: Uganda",
"dct_description_sm":"This polygon shapefile contains 2005 poverty data for 855 rural subcounties in Uganda. These data are intended for researchers, students, policy makers and the general public for reference and mapping purposes, and may be used for basic applications such as viewing, querying, and map output production.",
"dct_accessRights_s":"Public",
"schema_provider_s":"Stanford",
"dct_references_s":"{\"http://schema.org/url\":\"http://purl.stanford.edu/cz128vq0535\",\"http://schema.org/downloadUrl\":\"http://stacks.stanford.edu/file/druid:cz128vq0535/data.zip\",\"http://www.loc.gov/mods/v3\":\"http://purl.stanford.edu/cz128vq0535.mods\",\"http://www.isotc211.org/schemas/2005/gmd/\":\"http://opengeometadata.stanford.edu/metadata/edu.stanford.purl/druid:cz128vq0535/iso19139.xml\",\"http://www.w3.org/1999/xhtml\":\"http://opengeometadata.stanford.edu/metadata/edu.stanford.purl/druid:cz128vq0535/default.html\",\"http://www.opengis.net/def/serviceType/ogc/wfs\":\"https://geowebservices.stanford.edu/geoserver/wfs\",\"http://www.opengis.net/def/serviceType/ogc/wms\":\"https://geowebservices.stanford.edu/geoserver/wms\"}",
"gbl_wxsIdentifier_s":"druid:cz128vq0535",
"id":"stanford-cz128vq0535",
"layer_geom_type_s":"Polygon",
"gbl_mdModified_dt":"2015-01-13T18:46:38Z",
"dct_format_s":"Shapefile",
"dct_language_sm":"English",
"dc_type_s":"Dataset",
"dct_publisher_sm":"Uganda Bureau of Statistics",
"dct_creator_sm":[
"Uganda Bureau of Statistics"
],
"dct_subject_sm":[
"Poverty",
"Statistics"
],
"dct_issued_s":"2005",
"dct_temporal_sm":[
"2005"
],
"dct_spatial_sm":[
"Uganda"
],
"solr_geom":"ENVELOPE(29.572742, 35.000308, 4.234077, -1.478794)",
"gbl_indexYear_im":2005
}
6 changes: 6 additions & 0 deletions spec/fixtures/json_docs.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@ def full_geoblacklight
File.read(File.join(File.dirname(__FILE__), './docs/full_geoblacklight.json'))
end

##
# full_geoblacklight fixture converted to the aardvark schema
def full_geoblacklight_aardvark
File.read(File.join(File.dirname(__FILE__), './docs/full_geoblacklight_aardvark.json'))
end

##
# A sample Esri OpenData metadata record
def esri_opendata_metadata
Expand Down
22 changes: 22 additions & 0 deletions spec/lib/geo_combine/migrators/v1_aardvark_migrator_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# frozen_string_literal: true

require 'spec_helper'

RSpec.describe GeoCombine::Migrators::V1AardvarkMigrator do
include JsonDocs

describe '#run' do
it 'migrates keys' do
input_hash = JSON.parse(full_geoblacklight)
# TODO: Note that this fixture has not yet been fully converted to
# aardvark. See https://github.com/OpenGeoMetadata/GeoCombine/issues/121
# for remaining work.
expected_output = JSON.parse(full_geoblacklight_aardvark)
expect(described_class.new(v1_hash: input_hash).run).to eq(expected_output)
end

context 'when the given record is already in aardvark schema' do
xit 'returns the record unchanged'
end
end
end

0 comments on commit c2c5427

Please sign in to comment.