From a4b4aed4526813df76299579b734654e1b6b5ada Mon Sep 17 00:00:00 2001 From: michaelgfalk Date: Thu, 15 Aug 2024 12:36:20 +1000 Subject: [PATCH] graceful failure of API requests --- DESCRIPTION | 3 +- NAMESPACE | 1 + NEWS.md | 2 ++ R/generator-query-type.R | 6 ++-- R/get-diff.R | 14 +++++--- R/get-query-results.R | 12 ++++--- R/list-query-type.R | 4 ++- R/prop-query-type.R | 4 ++- R/query-category-members.R | 5 ++- R/query-page-properties.R | 4 ++- R/request-gracefully.R | 45 ++++++++++++++++++++++++ R/wiki-action-request.R | 6 ++-- man/get_diff.Rd | 14 +++++--- man/get_query_results.Rd | 12 ++++--- man/gracefully.Rd | 44 +++++++++++++++++++++++ man/query_by_.Rd | 5 ++- man/query_category_members.Rd | 6 +++- man/query_generate_pages.Rd | 7 ++-- man/query_list_pages.Rd | 5 ++- man/query_page_properties.Rd | 5 ++- man/wiki_action_request.Rd | 7 ++-- man/wikkitidy-package.Rd | 1 + tests/testthat/test-request-gracefully.R | 16 +++++++++ 23 files changed, 195 insertions(+), 33 deletions(-) create mode 100644 R/request-gracefully.R create mode 100644 man/gracefully.Rd create mode 100644 tests/testthat/test-request-gracefully.R diff --git a/DESCRIPTION b/DESCRIPTION index d1cd7bd..e564bcd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -26,7 +26,8 @@ Imports: rlang (>= 0.4.11), stringr, tibble, - vctrs + vctrs, + webfakes Suggests: covr, igraph, diff --git a/NAMESPACE b/NAMESPACE index 9401598..b6f90e1 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -62,6 +62,7 @@ export(get_xtools_page_info) export(get_xtools_page_links) export(get_xtools_page_prose) export(get_xtools_page_top_editors) +export(gracefully) export(list_all_generators) export(list_all_list_modules) export(list_all_property_modules) diff --git a/NEWS.md b/NEWS.md index fa0b96b..071db4a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # wikkitidy (development version) +* New `gracefully()` function added to allow graceful failure of http requests, as per CRAN policy. All API calls to Wikipedia query modules are now wrapped in `gracefully()` in the examples. + # wikkitidy 0.1.12 * `tidyr` moved from Imports to Suggests diff --git a/R/generator-query-type.R b/R/generator-query-type.R index 949c70a..5668254 100644 --- a/R/generator-query-type.R +++ b/R/generator-query-type.R @@ -23,7 +23,7 @@ #' @param ... <[`dynamic-dots`][rlang::dyn-dots]> Additional parameters to the #' generator #' -#' @return [query_generate_pages]: The modfied request, which can be passed to [next_batch] or +#' @return [query_generate_pages]: The modified request, which can be passed to [next_batch] or #' [retrieve_all] as appropriate. #' #' [list_all_generators]: a [tibble][tibble::tbl_df] of all the available generator @@ -34,11 +34,13 @@ #' pages' properties should be generated. #' @export #' +#' @seealso [gracefully()] +#' #' @examples #' # Search for articles about seagulls #' seagulls <- wiki_action_request() %>% #' query_generate_pages("search", gsrsearch = "seagull") %>% -#' next_batch() +#' gracefully(next_batch) #' #' seagulls query_generate_pages <- function(.req, generator, ...) { diff --git a/R/get-diff.R b/R/get-diff.R index 305fbab..8e12b2f 100644 --- a/R/get-diff.R +++ b/R/get-diff.R @@ -37,10 +37,16 @@ #' "revisions", #' rvlimit = 2, rvprop = "ids", rvdir = "older" #' ) %>% -#' next_result() %>% -#' tidyr::unnest(cols = c(revisions)) %>% -#' dplyr::mutate(diffs = get_diff(from = parentid, to = revid)) -#' revisions +#' gracefully(next_result) +#' +#' if (tibble::is_tibble(revisions)) { +#' revisions <- revisions %>% +#' tidyr::unnest(cols = c(revisions)) %>% +#' dplyr::mutate(diffs = get_diff(from = parentid, to = revid)) +#' +#' print(revisions) +#' } +#' get_diff <- function(from, to, language = "en", simplify = TRUE) { if (!rlang::is_scalar_logical(simplify)) { rlang::abort("`simplify` must be either TRUE or FALSE") diff --git a/R/get-query-results.R b/R/get-query-results.R index aa1fbea..d67b80e 100644 --- a/R/get-query-results.R +++ b/R/get-query-results.R @@ -34,15 +34,19 @@ #' preview <- wiki_action_request() %>% #' query_by_title("Steve Wozniak") %>% #' query_page_properties("categories", cllimit = 40) %>% -#' next_result() +#' gracefully(next_result) #' preview #' -#' all_results <- retrieve_all(preview) +#' all_results <- preview %>% +#' gracefully(retrieve_all) #' all_results #' #' # tidyr is useful for list-columns. -#' all_results %>% -#' tidyr::unnest(cols=c(categories), names_sep = "_") +#' if (tibble::is_tibble(all_results)) { +#' all_results %>% +#' tidyr::unnest(cols=c(categories), names_sep = "_") +#' } +#' NULL #' @rdname get_query_results diff --git a/R/list-query-type.R b/R/list-query-type.R index c507f41..195f22f 100644 --- a/R/list-query-type.R +++ b/R/list-query-type.R @@ -24,6 +24,8 @@ #' @return An HTTP response: an S3 list with class httr2_request #' @export #' +#' @seealso [gracefully()] +#' #' @examples #' # Get the ten most recently added pages in Category:Physics #' physics_pages <- wiki_action_request() %>% @@ -31,7 +33,7 @@ #' cmsort = "timestamp", #' cmdir = "desc", cmtitle = "Category:Physics" #' ) %>% -#' next_batch() +#' gracefully(next_batch) #' #' physics_pages query_list_pages <- function(.req, list, ...) { diff --git a/R/prop-query-type.R b/R/prop-query-type.R index 1650d6c..45b311e 100644 --- a/R/prop-query-type.R +++ b/R/prop-query-type.R @@ -22,12 +22,14 @@ #' @return A request object of type `pages/query/action_api/httr2_request`. To #' perform the query, pass the object to [next_batch] or [retrieve_all] #' +#' @seealso [gracefully()] +#' #' @examples #' # Retrieve the categories for Charles Harpur's Wikipedia page #' resp <- wiki_action_request() %>% #' query_by_title("Charles Harpur") %>% #' query_page_properties("categories") %>% -#' next_batch() +#' gracefully(next_batch) NULL #' @rdname query_by_ diff --git a/R/query-category-members.R b/R/query-category-members.R index 6668841..442a11f 100644 --- a/R/query-category-members.R +++ b/R/query-category-members.R @@ -46,10 +46,13 @@ #' be passed to [igraph::graph_from_data_frame] for network analysis. #' @export #' +#' @seealso [gracefully()] +#' #' @examples #' # Get the first 10 pages in 'Category:Physics' on English Wikipedia #' physics_members <- wiki_action_request() %>% -#' query_category_members("Physics") %>% next_batch() +#' query_category_members("Physics") %>% +#' gracefully(next_batch) #' physics_members #' #' diff --git a/R/query-page-properties.R b/R/query-page-properties.R index 61078df..41385e2 100644 --- a/R/query-page-properties.R +++ b/R/query-page-properties.R @@ -20,6 +20,8 @@ #' @return An HTTP response: an S3 list with class httr2_request #' @export #' +#' @seealso [gracefully()] +#' #' @examples #' # Search for articles about seagulls and retrieve their number of #' # watchers @@ -27,7 +29,7 @@ #' resp <- wiki_action_request() %>% #' query_generate_pages("search", gsrsearch = "seagull") %>% #' query_page_properties("info", inprop = "watchers") %>% -#' next_batch() %>% +#' gracefully(next_batch) %>% #' dplyr::select(pageid, ns, title, watchers) #' resp query_page_properties <- function(.req, property, ...) { diff --git a/R/request-gracefully.R b/R/request-gracefully.R new file mode 100644 index 0000000..735bc10 --- /dev/null +++ b/R/request-gracefully.R @@ -0,0 +1,45 @@ +#' Gracefully request a resource from Wikipedia +#' +#' The main purpose of this function is to enable examples using live resources +#' in the documentation. Examples must not throw errors, according to CRAN +#' policy. If you wrap a requesting method in `gracefully`, then any +#' errors of type `httr2_http` will be caught and no error will be thrown. +#' +#' +#' @param request_object A `httr2_request` object describing a query to a +#' Wikimedia Action API +#' @param request_method The desired function for performing the request, +#' typically one of those in [get_query_results] +#' +#' @return The output of `request_method` called on `request_object`, if the +#' request was successful. Otherwise a `httr2_response` object with details +#' of the failed request. +#' @export +#' +#' @examplesIf rlang::is_installed("webfakes") +#' # This fails without throwing an error +#' req <- httr2::request(httr2::example_url()) |> +#' httr2::req_url_path("/status/404") +#' +#' resp <- gracefully(req, httr2::req_perform) +#' +#' print(resp) +#' +#' # This request succeeds +#' req <- httr2::request(httr2::example_url()) +#' +#' resp <- gracefully(req, httr2::req_perform) +#' +#' print(resp) +gracefully <- function(request_object, request_method) { + tryCatch( + request_method(request_object), + httr2_http = function(cnd) show_bad_response(cnd) + ) +} + +show_bad_response <- function(cnd) { + cli::cli_alert("The query you tried was unsuccessful. See the response below.") + print(cnd$resp) + return(cnd$resp) +} diff --git a/R/wiki-action-request.R b/R/wiki-action-request.R index 28284b7..a3b0d89 100644 --- a/R/wiki-action-request.R +++ b/R/wiki-action-request.R @@ -10,7 +10,7 @@ #' #' [wikkitidy] provides an ergonomic API for the Action API's [Query #' modules](https://www.mediawiki.org/wiki/API:Query). These modules are most -#' useful for researchers, because they allow you to explore the stucture of +#' useful for researchers, because they allow you to explore the structure of #' Wikipedia and its back pages. You can obtain a list of available modules in #' your R console using [list_all_property_modules()], [list_all_list_modules()] #' and [list_all_generators()], @@ -41,6 +41,8 @@ #' combined with a [query_by_] query. #' @export #' +#' @seealso [gracefully()] +#' #' @examples #' # List the first 10 pages in the category 'Australian historians' #' historians <- wiki_action_request() %>% @@ -49,7 +51,7 @@ #' cmtitle = "Category:Australian_historians", #' cmlimit = 10 #' ) %>% -#' next_batch() +#' gracefully(next_batch) #' historians wiki_action_request <- function(..., action = "query", language = "en") { base_url <- glue::glue("https://{language}.wikipedia.org/w/api.php") diff --git a/man/get_diff.Rd b/man/get_diff.Rd index 62b6a5b..7024d56 100644 --- a/man/get_diff.Rd +++ b/man/get_diff.Rd @@ -49,8 +49,14 @@ revisions <- wiki_action_request() \%>\% "revisions", rvlimit = 2, rvprop = "ids", rvdir = "older" ) \%>\% - next_result() \%>\% - tidyr::unnest(cols = c(revisions)) \%>\% - dplyr::mutate(diffs = get_diff(from = parentid, to = revid)) -revisions + gracefully(next_result) + +if (tibble::is_tibble(revisions)) { + revisions <- revisions \%>\% + tidyr::unnest(cols = c(revisions)) \%>\% + dplyr::mutate(diffs = get_diff(from = parentid, to = revid)) + + print(revisions) +} + } diff --git a/man/get_query_results.Rd b/man/get_query_results.Rd index adc7105..58bd3ea 100644 --- a/man/get_query_results.Rd +++ b/man/get_query_results.Rd @@ -48,13 +48,17 @@ Thus the three functions for \code{next_result()}, \code{next_batch()} and preview <- wiki_action_request() \%>\% query_by_title("Steve Wozniak") \%>\% query_page_properties("categories", cllimit = 40) \%>\% - next_result() + gracefully(next_result) preview -all_results <- retrieve_all(preview) +all_results <- preview \%>\% + gracefully(retrieve_all) all_results # tidyr is useful for list-columns. -all_results \%>\% - tidyr::unnest(cols=c(categories), names_sep = "_") +if (tibble::is_tibble(all_results)) { + all_results \%>\% + tidyr::unnest(cols=c(categories), names_sep = "_") +} + } diff --git a/man/gracefully.Rd b/man/gracefully.Rd new file mode 100644 index 0000000..060228a --- /dev/null +++ b/man/gracefully.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/request-gracefully.R +\name{gracefully} +\alias{gracefully} +\title{Gracefully request a resource from Wikipedia} +\usage{ +gracefully(request_object, request_method) +} +\arguments{ +\item{request_object}{A \code{httr2_request} object describing a query to a +Wikimedia Action API} + +\item{request_method}{The desired function for performing the request, +typically one of those in \link{get_query_results}} +} +\value{ +The output of \code{request_method} called on \code{request_object}, if the +request was successful. Otherwise a \code{httr2_response} object with details +of the failed request. +} +\description{ +The main purpose of this function is to enable examples using live resources +in the documentation. Examples must not throw errors, according to CRAN +policy. If you wrap a requesting method in \code{gracefully}, then any +errors of type \code{httr2_http} will be caught and no error will be thrown. +} +\examples{ +\dontshow{if (rlang::is_installed("webfakes")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +# This fails without throwing an error +req <- httr2::request(httr2::example_url()) |> + httr2::req_url_path("/status/404") + +resp <- gracefully(req, httr2::req_perform) + +print(resp) + +# This request succeeds +req <- httr2::request(httr2::example_url()) + +resp <- gracefully(req, httr2::req_perform) + +print(resp) +\dontshow{\}) # examplesIf} +} diff --git a/man/query_by_.Rd b/man/query_by_.Rd index 81b9b5f..7beb466 100644 --- a/man/query_by_.Rd +++ b/man/query_by_.Rd @@ -43,5 +43,8 @@ query to find pages that meet certain criteria using \link{query_list_pages} or resp <- wiki_action_request() \%>\% query_by_title("Charles Harpur") \%>\% query_page_properties("categories") \%>\% - next_batch() + gracefully(next_batch) +} +\seealso{ +\code{\link[=gracefully]{gracefully()}} } diff --git a/man/query_category_members.Rd b/man/query_category_members.Rd index 1bf845a..01e93a2 100644 --- a/man/query_category_members.Rd +++ b/man/query_category_members.Rd @@ -79,7 +79,8 @@ beneath them, until it can find no more subcategories. \examples{ # Get the first 10 pages in 'Category:Physics' on English Wikipedia physics_members <- wiki_action_request() \%>\% - query_category_members("Physics") \%>\% next_batch() + query_category_members("Physics") \%>\% + gracefully(next_batch) physics_members @@ -92,3 +93,6 @@ tree tree_graph <- igraph::graph_from_data_frame(tree$edges, vertices = tree$nodes) tree_graph } +\seealso{ +\code{\link[=gracefully]{gracefully()}} +} diff --git a/man/query_generate_pages.Rd b/man/query_generate_pages.Rd index 4a52bd7..bd43c63 100644 --- a/man/query_generate_pages.Rd +++ b/man/query_generate_pages.Rd @@ -22,7 +22,7 @@ used, though not all.} generator} } \value{ -\link{query_generate_pages}: The modfied request, which can be passed to \link{next_batch} or +\link{query_generate_pages}: The modified request, which can be passed to \link{next_batch} or \link{retrieve_all} as appropriate. \link{list_all_generators}: a \link[tibble:tbl_df-class]{tibble} of all the available generator @@ -52,7 +52,10 @@ For instance, to set a limit of 10 to the number of pages returned by the # Search for articles about seagulls seagulls <- wiki_action_request() \%>\% query_generate_pages("search", gsrsearch = "seagull") \%>\% - next_batch() + gracefully(next_batch) seagulls } +\seealso{ +\code{\link[=gracefully]{gracefully()}} +} diff --git a/man/query_list_pages.Rd b/man/query_list_pages.Rd index 5b308cf..5efff92 100644 --- a/man/query_list_pages.Rd +++ b/man/query_list_pages.Rd @@ -45,7 +45,10 @@ physics_pages <- wiki_action_request() \%>\% cmsort = "timestamp", cmdir = "desc", cmtitle = "Category:Physics" ) \%>\% - next_batch() + gracefully(next_batch) physics_pages } +\seealso{ +\code{\link[=gracefully]{gracefully()}} +} diff --git a/man/query_page_properties.Rd b/man/query_page_properties.Rd index e42b1d5..0979ab7 100644 --- a/man/query_page_properties.Rd +++ b/man/query_page_properties.Rd @@ -41,7 +41,10 @@ property module as the \code{genenerator}. resp <- wiki_action_request() \%>\% query_generate_pages("search", gsrsearch = "seagull") \%>\% query_page_properties("info", inprop = "watchers") \%>\% - next_batch() \%>\% + gracefully(next_batch) \%>\% dplyr::select(pageid, ns, title, watchers) resp } +\seealso{ +\code{\link[=gracefully]{gracefully()}} +} diff --git a/man/wiki_action_request.Rd b/man/wiki_action_request.Rd index b4e9c35..016082e 100644 --- a/man/wiki_action_request.Rd +++ b/man/wiki_action_request.Rd @@ -45,7 +45,7 @@ server. } \details{ \link{wikkitidy} provides an ergonomic API for the Action API's \href{https://www.mediawiki.org/wiki/API:Query}{Query modules}. These modules are most -useful for researchers, because they allow you to explore the stucture of +useful for researchers, because they allow you to explore the structure of Wikipedia and its back pages. You can obtain a list of available modules in your R console using \code{\link[=list_all_property_modules]{list_all_property_modules()}}, \code{\link[=list_all_list_modules]{list_all_list_modules()}} and \code{\link[=list_all_generators]{list_all_generators()}}, @@ -58,6 +58,9 @@ historians <- wiki_action_request() \%>\% cmtitle = "Category:Australian_historians", cmlimit = 10 ) \%>\% - next_batch() + gracefully(next_batch) historians } +\seealso{ +\code{\link[=gracefully]{gracefully()}} +} diff --git a/man/wikkitidy-package.Rd b/man/wikkitidy-package.Rd index e758a96..5ad951d 100644 --- a/man/wikkitidy-package.Rd +++ b/man/wikkitidy-package.Rd @@ -14,6 +14,7 @@ Access 'Wikipedia' through the several 'MediaWiki' APIs (\url{https://www.mediaw Useful links: \itemize{ \item \url{https://wikihistories.github.io/wikkitidy/} + \item \url{https://github.com/wikihistories/wikkitidy} \item Report bugs at \url{https://github.com/wikihistories/wikkitidy/issues} } diff --git a/tests/testthat/test-request-gracefully.R b/tests/testthat/test-request-gracefully.R new file mode 100644 index 0000000..e3b4aeb --- /dev/null +++ b/tests/testthat/test-request-gracefully.R @@ -0,0 +1,16 @@ +test_that("A bad request fails gracefully", { + expect_no_error( + httr2::request(httr2::example_url()) |> + httr2::req_url_path("/status/404") |> + gracefully(httr2::req_perform) + ) +}) + +test_that("A good request succeeds silently", { + expect_equal( + httr2::request(httr2::example_url()) |> + gracefully(httr2::req_perform) |> + httr2::resp_status(), + 200 + ) +})