Skip to content

Commit

Permalink
Merge pull request #497 from TuftsCTSI/add-dbtable-to-json
Browse files Browse the repository at this point in the history
Create function for exporting `dqdashboard_results` table to a json file
  • Loading branch information
katy-sadowski committed Oct 21, 2023
2 parents 7845a00 + 4aa1dde commit ef06bfc
Show file tree
Hide file tree
Showing 5 changed files with 183 additions and 43 deletions.
3 changes: 2 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ export(executeDqChecks)
export(listDqChecks)
export(reEvaluateThresholds)
export(viewDqDashboard)
export(writeDBResultsToJson)
export(writeJsonResultsToCsv)
export(writeJsonResultsToTable)
import(DatabaseConnector)
Expand All @@ -30,4 +31,4 @@ importFrom(tools,file_path_sans_ext)
importFrom(utils,install.packages)
importFrom(utils,menu)
importFrom(utils,packageVersion)
importFrom(utils,write.table)
importFrom(utils,write.table)
82 changes: 82 additions & 0 deletions R/writeDBResultsTo.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# Copyright 2023 Observational Health Data Sciences and Informatics
#
# This file is part of DataQualityDashboard
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#' Write DQD results database table to json
#'
#' @param connection A connection object
#' @param connectionDetails A connectionDetails object for connecting to the CDM database
#' @param resultsDatabaseSchema The fully qualified database name of the results schema
#' @param cdmDatabaseSchema The fully qualified database name of the CDM schema
#' @param writeTableName Name of DQD results table in the database to read from
#' @param outputFolder The folder to output the json results file to
#' @param outputFile The output filename of the json results file
#'
#' @export
#'

writeDBResultsToJson <- function(connection,
connectionDetails,
resultsDatabaseSchema,
cdmDatabaseSchema,
writeTableName,
outputFolder,
outputFile) {
metadata <- DatabaseConnector::renderTranslateQuerySql(
connection,
sql = "select * from @cdmDatabaseSchema.cdm_source;",
cdmDatabaseSchema = cdmDatabaseSchema,
targetDialect = connectionDetails$dbms,
snakeCaseToCamelCase = TRUE
)

checkResults <- DatabaseConnector::renderTranslateQuerySql(
connection,
sql = "select * from @resultsDatabaseSchema.@writeTableName;",
resultsDatabaseSchema = resultsDatabaseSchema,
writeTableName = writeTableName,
targetDialect = connectionDetails$dbms,
snakeCaseToCamelCase = TRUE
)

# Quick patch for missing value issues related to SQL Only Implementation
checkResults["error"][checkResults["error"] == ''] <- NA
checkResults["warning"][checkResults["warning"] == ''] <- NA
checkResults["executionTime"][checkResults["executionTime"] == ''] <- '0 secs'
checkResults["queryText"][checkResults["queryText"] == ''] <- '[Generated via SQL Only]'

overview <- .summarizeResults(
checkResults = checkResults
)

# Quick patch for non-camel-case column name
names(checkResults)[names(checkResults) == "checkid"] <- "checkId"

allResults <- list(
startTimestamp = Sys.time(),
endTimestamp = Sys.time(),
executionTime = '0 secs',
CheckResults = checkResults,
Metadata = metadata,
Overview = overview
)

.writeResultsToJson(
allResults,
outputFolder,
outputFile
)

}
34 changes: 34 additions & 0 deletions man/writeDBResultsToJson.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

54 changes: 54 additions & 0 deletions tests/testthat/test-writeDBResultsTo.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
library(testthat)

test_that("Write DB results to json", {
outputFolder <- tempfile("dqd_")
on.exit(unlink(outputFolder, recursive = TRUE))
connectionDetailsEunomia <- Eunomia::getEunomiaConnectionDetails()
cdmDatabaseSchemaEunomia <- "main"
resultsDatabaseSchemaEunomia <- "main"
writeTableName <- "dqdashboard_results"

results <- DataQualityDashboard::executeDqChecks(
connectionDetails = connectionDetailsEunomia,
cdmDatabaseSchema = cdmDatabaseSchemaEunomia,
resultsDatabaseSchema = resultsDatabaseSchemaEunomia,
cdmSourceName = "Eunomia",
checkNames = "measurePersonCompleteness",
outputFolder = outputFolder,
writeToTable = TRUE,
writeTableName = writeTableName
)


connection <- DatabaseConnector::connect(connectionDetailsEunomia)

testExportFile <- "dq-result-test.json"

DataQualityDashboard::writeDBResultsToJson(
connection,
connectionDetailsEunomia,
resultsDatabaseSchemaEunomia,
cdmDatabaseSchemaEunomia,
writeTableName,
outputFolder,
testExportFile
)

on.exit(DatabaseConnector::disconnect(connection), add = TRUE)

# Check that file was exported properly
expect_true(file.exists(file.path(outputFolder,testExportFile)))

# Check that export length matches length of db table
results <- jsonlite::fromJSON(file.path(outputFolder,testExportFile))
table_rows <- DatabaseConnector::renderTranslateQuerySql(
connection,
sql = "select count(*) from @resultsDatabaseSchema.@writeTableName;",
resultsDatabaseSchema = resultsDatabaseSchemaEunomia,
writeTableName = writeTableName,
targetDialect = connectionDetailsEunomia$dbms,
snakeCaseToCamelCase = TRUE
)
expect_true(length(results$CheckResults) == table_rows)

})
53 changes: 11 additions & 42 deletions vignettes/SqlOnly.rmd
Original file line number Diff line number Diff line change
Expand Up @@ -171,49 +171,18 @@ for (dqdSqlFile in dqdSqlFiles) {
)
}

# Get results
checkResults <- DatabaseConnector::querySql(
c,
SqlRender::render(
"SELECT * FROM @resultsDatabaseSchema.@writeTableName",
resultsDatabaseSchema = resultsDatabaseSchema,
writeTableName = writeTableName
),
snakeCaseToCamelCase = TRUE
)
DatabaseConnector::disconnect(c)

# convert check ID column name to correct format
colnames(checkResults)[colnames(checkResults) == "checkid"] ="checkId"

# Get overview of DQD results
library(DataQualityDashboard)
overview <- DataQualityDashboard:::.summarizeResults(checkResults = checkResults)

# Create results object, adding fake metadata
result <- list(
startTimestamp = Sys.time(),
endTimestamp = Sys.time(),
executionTime = "",
Metadata = data.frame(
cdmSourceName = cdmSourceName,
cdmSourceAbbreviation = cdmSourceName,
cdmHolder = "",
sourceDescription = "",
sourceDocumentationReference = "",
cdmEtlReference = "",
sourceReleaseDate = "",
cdmReleaseDate = "",
cdmVersion = cdmVersion,
cdmVersionConceptId = 0,
vocabularyVersion = "",
dqdVersion = as.character(packageVersion("DataQualityDashboard"))
),
Overview = overview,
CheckResults = checkResults
)
# Extract results table to JSON file for viewing or secondary use

DataQualityDashboard::writeDBResultsToJson(
c,
connectionDetails,
resultsDatabaseSchema,
cdmDatabaseSchema,
writeTableName,
jsonOutputFolder,
jsonOutputFile
)

DataQualityDashboard:::.writeResultsToJson(result, jsonOutputFolder, jsonOutputFile)

jsonFilePath <- R.utils::getAbsolutePath(file.path(jsonOutputFolder, jsonOutputFile))
DataQualityDashboard::viewDqDashboard(jsonFilePath)
Expand Down

0 comments on commit ef06bfc

Please sign in to comment.