From a8a9d9b2f12ce83c1e02c6620c19e300b461c169 Mon Sep 17 00:00:00 2001 From: jshoughtaling Date: Wed, 18 Oct 2023 16:26:57 -0400 Subject: [PATCH 1/3] Create function for exporting dqdashboard_results table to json file --- NAMESPACE | 3 +- R/writeDBResultsTo.R | 104 +++++++++++++++++++++++++ man/writeDBResultsToJson.Rd | 34 ++++++++ tests/testthat/test-writeDBResultsTo.R | 42 ++++++++++ 4 files changed, 182 insertions(+), 1 deletion(-) create mode 100644 R/writeDBResultsTo.R create mode 100644 man/writeDBResultsToJson.Rd create mode 100644 tests/testthat/test-writeDBResultsTo.R diff --git a/NAMESPACE b/NAMESPACE index 690105a7..26cb2a43 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,6 +5,7 @@ export(executeDqChecks) export(listDqChecks) export(reEvaluateThresholds) export(viewDqDashboard) +export(writeDBResultsToJson) export(writeJsonResultsToCsv) export(writeJsonResultsToTable) import(DatabaseConnector) @@ -30,4 +31,4 @@ importFrom(tools,file_path_sans_ext) importFrom(utils,install.packages) importFrom(utils,menu) importFrom(utils,packageVersion) -importFrom(utils,write.table) +importFrom(utils,write.table) \ No newline at end of file diff --git a/R/writeDBResultsTo.R b/R/writeDBResultsTo.R new file mode 100644 index 00000000..9f475543 --- /dev/null +++ b/R/writeDBResultsTo.R @@ -0,0 +1,104 @@ +# Copyright 2023 Observational Health Data Sciences and Informatics +# +# This file is part of DataQualityDashboard +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#' Write DQD results database table to json +#' +#' @param connection A connection object +#' @param connectionDetails A connectionDetails object for connecting to the CDM database +#' @param resultsDatabaseSchema The fully qualified database name of the results schema +#' @param cdmDatabaseSchema The fully qualified database name of the CDM schema +#' @param writeTableName Name of table in the database to write results to +#' @param outputFolder The output folder +#' @param outputFile The output filename +#' +#' @export +#' + +writeDBResultsToJson <- function(connection, + connectionDetails, + resultsDatabaseSchema, + cdmDatabaseSchema, + writeTableName, + outputFolder, + outputFile) { + startTime <- Sys.time() + + sql <- SqlRender::render( + sql = "select * from @cdmDatabaseSchema.cdm_source;", + cdmDatabaseSchema = cdmDatabaseSchema + ) + + sql <- SqlRender::translate( + sql = sql, + targetDialect = connectionDetails$dbms + ) + + metadata <- DatabaseConnector::querySql( + connection = connection, + sql = sql, + snakeCaseToCamelCase = TRUE + ) + + sql <- SqlRender::render( + sql = "select * from @resultsDatabaseSchema.@writeTableName;", + resultsDatabaseSchema = resultsDatabaseSchema, + writeTableName = writeTableName + ) + + sql <- SqlRender::translate( + sql = sql, + targetDialect = connectionDetails$dbms + ) + + checkResults <- DatabaseConnector::querySql( + connection, + sql, + snakeCaseToCamelCase = TRUE + ) + + # Quick patch for missing value issues related to SQL Only Implementation + checkResults["error"][checkResults["error"] == ''] <- NA + checkResults["warning"][checkResults["warning"] == ''] <- NA + checkResults["executionTime"][checkResults["executionTime"] == ''] <- '0.1 secs' + checkResults["queryText"][checkResults["queryText"] == ''] <- '[Generated via SQL Only]' + + overview <- .summarizeResults( + checkResults = checkResults + ) + + endTime <- Sys.time() + + delta <- startTime - endTime + + # Quick patch for non-camel-case column name + names(checkResults)[names(checkResults) == "checkid"] <- "checkId" + + allResults <- list( + startTimestamp = Sys.time(), + endTimestamp = Sys.time(), + executionTime = sprintf("%.0f %s", delta, attr(delta, "units")), + CheckResults = checkResults, + Metadata = metadata, + Overview = overview + ) + + .writeResultsToJson( + allResults, + outputFolder, + outputFile + ) + +} \ No newline at end of file diff --git a/man/writeDBResultsToJson.Rd b/man/writeDBResultsToJson.Rd new file mode 100644 index 00000000..67f23ac4 --- /dev/null +++ b/man/writeDBResultsToJson.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/writeDBResultsTo.R +\name{writeDBResultsToJson} +\alias{writeDBResultsToJson} +\title{Write DQD results database table to json} +\usage{ +writeDBResultsToJson( + connection, + connectionDetails, + resultsDatabaseSchema, + cdmDatabaseSchema, + writeTableName, + outputFolder, + outputFile +) +} +\arguments{ +\item{connection}{A connection object} + +\item{connectionDetails}{A connectionDetails object for connecting to the CDM database} + +\item{resultsDatabaseSchema}{The fully qualified database name of the results schema} + +\item{cdmDatabaseSchema}{The fully qualified database name of the CDM schema} + +\item{writeTableName}{Name of table in the database to write results to} + +\item{outputFolder}{The output folder} + +\item{outputFile}{The output filename} +} +\description{ +Write DQD results database table to json +} \ No newline at end of file diff --git a/tests/testthat/test-writeDBResultsTo.R b/tests/testthat/test-writeDBResultsTo.R new file mode 100644 index 00000000..4579d9a7 --- /dev/null +++ b/tests/testthat/test-writeDBResultsTo.R @@ -0,0 +1,42 @@ +library(testthat) + +test_that("Write DB results to json", { + outputFolder <- tempfile("dqd_") + on.exit(unlink(outputFolder, recursive = TRUE)) + connectionDetailsEunomia <- Eunomia::getEunomiaConnectionDetails() + cdmDatabaseSchemaEunomia <- "main" + resultsDatabaseSchemaEunomia <- "main" + + results <- DataQualityDashboard::executeDqChecks( + connectionDetails = connectionDetailsEunomia, + cdmDatabaseSchema = cdmDatabaseSchemaEunomia, + resultsDatabaseSchema = resultsDatabaseSchemaEunomia, + cdmSourceName = "Eunomia", + checkNames = "measurePersonCompleteness", + outputFolder = outputFolder, + writeToTable = TRUE, + writeTableName = "dqdashboard_results" + ) + + + connection <- DatabaseConnector::connect(connectionDetailsEunomia) + tableNames <- DatabaseConnector::getTableNames(connection = connection, databaseSchema = resultsDatabaseSchemaEunomia) + expect_true("dqdashboard_results" %in% tolower(tableNames)) + + testExportFile <- "dq-result-test.json" + + DataQualityDashboard::writeDBResultsToJson( + connection, + connectionDetailsEunomia, + resultsDatabaseSchemaEunomia, + cdmDatabaseSchemaEunomia, + "dqdashboard_results", + outputFolder, + testExportFile + ) + + on.exit(DatabaseConnector::disconnect(connection), add = TRUE) + expect_true(file.exists(file.path(outputFolder,testExportFile))) + + +}) \ No newline at end of file From b3b65dc65f8765a06bf62a47feb82479efef3247 Mon Sep 17 00:00:00 2001 From: jshoughtaling Date: Sat, 21 Oct 2023 12:37:18 -0400 Subject: [PATCH 2/3] Update function, tests, and documentation in response to PR Rev --- R/writeDBResultsTo.R | 46 ++++++++-------------- tests/testthat/test-writeDBResultsTo.R | 36 +++++++++++------ vignettes/SqlOnly.rmd | 53 ++++++-------------------- 3 files changed, 50 insertions(+), 85 deletions(-) diff --git a/R/writeDBResultsTo.R b/R/writeDBResultsTo.R index 9f475543..a0617c82 100644 --- a/R/writeDBResultsTo.R +++ b/R/writeDBResultsTo.R @@ -20,9 +20,9 @@ #' @param connectionDetails A connectionDetails object for connecting to the CDM database #' @param resultsDatabaseSchema The fully qualified database name of the results schema #' @param cdmDatabaseSchema The fully qualified database name of the CDM schema -#' @param writeTableName Name of table in the database to write results to -#' @param outputFolder The output folder -#' @param outputFile The output filename +#' @param writeTableName Name of DQD results table in the database to read from +#' @param outputFolder The folder to output the json results file to +#' @param outputFile The output filename of the json results file #' #' @export #' @@ -36,43 +36,27 @@ writeDBResultsToJson <- function(connection, outputFile) { startTime <- Sys.time() - sql <- SqlRender::render( + metadata <- DatabaseConnector::renderTranslateQuerySql( + connection, sql = "select * from @cdmDatabaseSchema.cdm_source;", - cdmDatabaseSchema = cdmDatabaseSchema + cdmDatabaseSchema = cdmDatabaseSchema, + targetDialect = connectionDetails$dbms, + snakeCaseToCamelCase = TRUE ) - sql <- SqlRender::translate( - sql = sql, - targetDialect = connectionDetails$dbms - ) - - metadata <- DatabaseConnector::querySql( - connection = connection, - sql = sql, - snakeCaseToCamelCase = TRUE - ) - - sql <- SqlRender::render( + checkResults <- DatabaseConnector::renderTranslateQuerySql( + connection, sql = "select * from @resultsDatabaseSchema.@writeTableName;", resultsDatabaseSchema = resultsDatabaseSchema, - writeTableName = writeTableName - ) - - sql <- SqlRender::translate( - sql = sql, - targetDialect = connectionDetails$dbms - ) - - checkResults <- DatabaseConnector::querySql( - connection, - sql, - snakeCaseToCamelCase = TRUE + writeTableName = writeTableName, + targetDialect = connectionDetails$dbms, + snakeCaseToCamelCase = TRUE ) # Quick patch for missing value issues related to SQL Only Implementation checkResults["error"][checkResults["error"] == ''] <- NA checkResults["warning"][checkResults["warning"] == ''] <- NA - checkResults["executionTime"][checkResults["executionTime"] == ''] <- '0.1 secs' + checkResults["executionTime"][checkResults["executionTime"] == ''] <- '0 secs' checkResults["queryText"][checkResults["queryText"] == ''] <- '[Generated via SQL Only]' overview <- .summarizeResults( @@ -89,7 +73,7 @@ writeDBResultsToJson <- function(connection, allResults <- list( startTimestamp = Sys.time(), endTimestamp = Sys.time(), - executionTime = sprintf("%.0f %s", delta, attr(delta, "units")), + executionTime = '0 secs', CheckResults = checkResults, Metadata = metadata, Overview = overview diff --git a/tests/testthat/test-writeDBResultsTo.R b/tests/testthat/test-writeDBResultsTo.R index 4579d9a7..66159efc 100644 --- a/tests/testthat/test-writeDBResultsTo.R +++ b/tests/testthat/test-writeDBResultsTo.R @@ -6,22 +6,21 @@ test_that("Write DB results to json", { connectionDetailsEunomia <- Eunomia::getEunomiaConnectionDetails() cdmDatabaseSchemaEunomia <- "main" resultsDatabaseSchemaEunomia <- "main" + writeTableName <- "dqdashboard_results" results <- DataQualityDashboard::executeDqChecks( - connectionDetails = connectionDetailsEunomia, - cdmDatabaseSchema = cdmDatabaseSchemaEunomia, - resultsDatabaseSchema = resultsDatabaseSchemaEunomia, - cdmSourceName = "Eunomia", - checkNames = "measurePersonCompleteness", - outputFolder = outputFolder, - writeToTable = TRUE, - writeTableName = "dqdashboard_results" + connectionDetails = connectionDetailsEunomia, + cdmDatabaseSchema = cdmDatabaseSchemaEunomia, + resultsDatabaseSchema = resultsDatabaseSchemaEunomia, + cdmSourceName = "Eunomia", + checkNames = "measurePersonCompleteness", + outputFolder = outputFolder, + writeToTable = TRUE, + writeTableName = writeTableName ) connection <- DatabaseConnector::connect(connectionDetailsEunomia) - tableNames <- DatabaseConnector::getTableNames(connection = connection, databaseSchema = resultsDatabaseSchemaEunomia) - expect_true("dqdashboard_results" %in% tolower(tableNames)) testExportFile <- "dq-result-test.json" @@ -30,13 +29,26 @@ test_that("Write DB results to json", { connectionDetailsEunomia, resultsDatabaseSchemaEunomia, cdmDatabaseSchemaEunomia, - "dqdashboard_results", + writeTableName, outputFolder, testExportFile - ) + ) on.exit(DatabaseConnector::disconnect(connection), add = TRUE) + + # Check that file was exported properly expect_true(file.exists(file.path(outputFolder,testExportFile))) + # Check that export length matches length of db table + results <- jsonlite::fromJSON(file.path(outputFolder,testExportFile)) + table_rows <- DatabaseConnector::renderTranslateQuerySql( + connection, + sql = "select count(*) from @resultsDatabaseSchema.@writeTableName;", + resultsDatabaseSchema = resultsDatabaseSchemaEunomia, + writeTableName = writeTableName, + targetDialect = connectionDetailsEunomia$dbms, + snakeCaseToCamelCase = TRUE + ) + expect_true(length(results$CheckResults) == table_rows) }) \ No newline at end of file diff --git a/vignettes/SqlOnly.rmd b/vignettes/SqlOnly.rmd index 9fabcce0..dcaad7f1 100644 --- a/vignettes/SqlOnly.rmd +++ b/vignettes/SqlOnly.rmd @@ -171,49 +171,18 @@ for (dqdSqlFile in dqdSqlFiles) { ) } -# Get results -checkResults <- DatabaseConnector::querySql( - c, - SqlRender::render( - "SELECT * FROM @resultsDatabaseSchema.@writeTableName", - resultsDatabaseSchema = resultsDatabaseSchema, - writeTableName = writeTableName - ), - snakeCaseToCamelCase = TRUE -) -DatabaseConnector::disconnect(c) - -# convert check ID column name to correct format -colnames(checkResults)[colnames(checkResults) == "checkid"] ="checkId" - -# Get overview of DQD results -library(DataQualityDashboard) -overview <- DataQualityDashboard:::.summarizeResults(checkResults = checkResults) - -# Create results object, adding fake metadata -result <- list( - startTimestamp = Sys.time(), - endTimestamp = Sys.time(), - executionTime = "", - Metadata = data.frame( - cdmSourceName = cdmSourceName, - cdmSourceAbbreviation = cdmSourceName, - cdmHolder = "", - sourceDescription = "", - sourceDocumentationReference = "", - cdmEtlReference = "", - sourceReleaseDate = "", - cdmReleaseDate = "", - cdmVersion = cdmVersion, - cdmVersionConceptId = 0, - vocabularyVersion = "", - dqdVersion = as.character(packageVersion("DataQualityDashboard")) - ), - Overview = overview, - CheckResults = checkResults -) +# Extract results table to JSON file for viewing or secondary use + +DataQualityDashboard::writeDBResultsToJson( + c, + connectionDetails, + resultsDatabaseSchema, + cdmDatabaseSchema, + writeTableName, + jsonOutputFolder, + jsonOutputFile + ) -DataQualityDashboard:::.writeResultsToJson(result, jsonOutputFolder, jsonOutputFile) jsonFilePath <- R.utils::getAbsolutePath(file.path(jsonOutputFolder, jsonOutputFile)) DataQualityDashboard::viewDqDashboard(jsonFilePath) From 4aa1ddefe192644cd360a17714ca02d9fde770ce Mon Sep 17 00:00:00 2001 From: jshoughtaling Date: Sat, 21 Oct 2023 13:58:52 -0400 Subject: [PATCH 3/3] Remove unused start and end times --- R/writeDBResultsTo.R | 6 ------ 1 file changed, 6 deletions(-) diff --git a/R/writeDBResultsTo.R b/R/writeDBResultsTo.R index a0617c82..66134ae5 100644 --- a/R/writeDBResultsTo.R +++ b/R/writeDBResultsTo.R @@ -34,8 +34,6 @@ writeDBResultsToJson <- function(connection, writeTableName, outputFolder, outputFile) { - startTime <- Sys.time() - metadata <- DatabaseConnector::renderTranslateQuerySql( connection, sql = "select * from @cdmDatabaseSchema.cdm_source;", @@ -63,10 +61,6 @@ writeDBResultsToJson <- function(connection, checkResults = checkResults ) - endTime <- Sys.time() - - delta <- startTime - endTime - # Quick patch for non-camel-case column name names(checkResults)[names(checkResults) == "checkid"] <- "checkId"