Merge pull request #319 from njtierney/add-impute-fixed-zero-261

Add impute_fixed, impute_zero, and impute_factor
njtierney · Apr 10, 2023 · 1a1efce · 1a1efce
2 parents 0da8ccd + 634dd30
commit 1a1efce
Show file tree

Hide file tree

Showing 15 changed files with 528 additions and 16 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -97,7 +97,10 @@ Collate:
     'gg-miss-var.R'
     'gg-miss-which.R'
     'helpers.R'
+    'impute-factor.R'
+    'impute-fixed.R'
     'impute-median.R'
+    'impute-zero.R'
     'impute_below.R'
     'impute_mean.R'
     'label-miss.R'

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,5 +1,10 @@
 # Generated by roxygen2: do not edit by hand
 
+S3method(impute_factor,character)
+S3method(impute_factor,default)
+S3method(impute_factor,factor)
+S3method(impute_factor,shade)
+S3method(impute_fixed,default)
 S3method(impute_mean,default)
 S3method(impute_mean,factor)
 S3method(impute_median,default)
@@ -76,6 +81,8 @@ export(impute_below)
 export(impute_below_all)
 export(impute_below_at)
 export(impute_below_if)
+export(impute_factor)
+export(impute_fixed)
 export(impute_mean)
 export(impute_mean_all)
 export(impute_mean_at)
@@ -84,6 +91,7 @@ export(impute_median)
 export(impute_median_all)
 export(impute_median_at)
 export(impute_median_if)
+export(impute_zero)
 export(is_na)
 export(is_shade)
 export(label_miss_1d)

diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,9 @@
 # naniar (development version)
 
+## New
+
+- implement `impute_fixed`, `impute_zero`, and `impute_factor`. notably these do not implement "scoped variants" which were previously implemented - for example, `impute_fixed_if` etc. This is in favour of using the new `across` workflow within `dplyr`, and it is easier to maintain. #261
+
 # naniar 1.0.0
 
 Version 1.0.0 of naniar is to signify that this release is associated with

diff --git a/R/impute-factor.R b/R/impute-factor.R
@@ -0,0 +1,86 @@
+#' Impute a factor value into a vector with missing values
+#'
+#' For imputing fixed factor levels. It adds the new imputed value to the end
+#'   of the levels of the vector. We generally recommend to impute using other
+#'   model based approaches. See the `simputation` package, for example
+#'   [simputation::impute_lm()].
+#'
+#' @param x vector
+#' @param value factor to impute
+#'
+#' @return vector with a factor values replaced
+#' @export
+#' @name impute_factor
+#'
+#' @examples
+#'
+#' vec <- factor(LETTERS[1:10])
+#'
+#' vec[sample(1:10, 3)] <- NA
+#'
+#' vec
+#'
+#' impute_factor(vec, "wat")
+#'
+#' library(dplyr)
+#'
+#' dat <- tibble(
+#'   num = rnorm(10),
+#'   int = rpois(10, 5),
+#'   fct = factor(LETTERS[1:10])
+#' ) %>%
+#'   mutate(
+#'     across(
+#'       everything(),
+#'       \(x) set_prop_miss(x, prop = 0.25)
+#'     )
+#'   )
+#'
+#' dat
+#'
+#' dat %>%
+#'   nabular() %>%
+#'   mutate(
+#'     num = impute_fixed(num, -9999),
+#'     int = impute_zero(int),
+#'     fct = impute_factor(fct, "out")
+#'   )
+#'
+impute_factor <- function(x, value) UseMethod("impute_factor")
+
+#' @export
+#' @rdname impute_factor
+impute_factor.default <- function(x, value){
+  vctrs::vec_assert(x, ptype = character())
+}
+
+#' @export
+#' @rdname impute_factor
+impute_factor.factor <- function(x, value){
+
+  x <- forcats::fct_expand(x, value)
+
+  x[is.na(x)] <- factor(value)
+
+  x
+}
+
+#' @export
+#' @rdname impute_factor
+impute_factor.character <- function(x, value){
+
+  x <- forcats::fct_expand(x, value)
+
+  x[is.na(x)] <- factor(value)
+
+  x
+}
+
+#' @export
+#' @rdname impute_factor
+impute_factor.shade <- function(x, value){
+
+  #do nothing
+  x
+
+}
diff --git a/R/impute-fixed.R b/R/impute-fixed.R
@@ -0,0 +1,57 @@
+#' Impute a fixed value into a vector with missing values
+#'
+#' This can be useful if you are imputing specific values, however we would
+#'   generally recommend to impute using other model based approaches. See
+#'   the `simputation` package, for example [simputation::impute_lm()].
+#'
+#' @param x vector
+#' @param value value to impute
+#'
+#' @return vector with a fixed values replaced
+#' @export
+#' @name impute_fixed
+#'
+#' @examples
+#'
+#' vec <- rnorm(10)
+#'
+#' vec[sample(1:10, 3)] <- NA
+#'
+#' vec
+#'
+#' impute_fixed(vec, -999)
+#'
+#' library(dplyr)
+#'
+#' dat <- tibble(
+#'   num = rnorm(10),
+#'   int = rpois(10, 5),
+#'   fct = factor(LETTERS[1:10])
+#' ) %>%
+#'   mutate(
+#'     across(
+#'       everything(),
+#'       \(x) set_prop_miss(x, prop = 0.25)
+#'     )
+#'   )
+#'
+#' dat
+#'
+#' dat %>%
+#'   nabular() %>%
+#'   mutate(
+#'     num = impute_fixed(num, -9999),
+#'     int = impute_zero(int),
+#'     fct = impute_factor(fct, "out")
+#'   )
+#'
+impute_fixed <- function(x, value) UseMethod("impute_fixed")
+
+#' @export
+#' @rdname impute_fixed
+impute_fixed.default <- function(x, value){
+
+  x[is.na(x)] <- value
+
+  x
+}
diff --git a/R/impute-zero.R b/R/impute-zero.R
@@ -0,0 +1,53 @@
+#' Impute zero into a vector with missing values
+#'
+#' This can be useful if you are imputing specific values, however we would
+#'   generally recommend to impute using other model based approaches. See
+#'   the `simputation` package, for example [simputation::impute_lm()].
+#'
+#' @param x vector
+#'
+#' @return vector with a fixed values replaced
+#' @export
+#'
+#' @examples
+#'
+#' vec <- rnorm(10)
+#'
+#' vec[sample(1:10, 3)] <- NA
+#'
+#' vec
+#'
+#' impute_zero(vec)
+#'
+#' library(dplyr)
+#'
+#' library(dplyr)
+#'
+#' dat <- tibble(
+#'   num = rnorm(10),
+#'   int = rpois(10, 5),
+#'   fct = factor(LETTERS[1:10])
+#' ) %>%
+#'   mutate(
+#'     across(
+#'       everything(),
+#'       \(x) set_prop_miss(x, prop = 0.25)
+#'     )
+#'   )
+#'
+#' dat
+#'
+#' dat %>%
+#'   nabular() %>%
+#'   mutate(
+#'     num = impute_fixed(num, -9999),
+#'     int = impute_zero(int),
+#'     fct = impute_factor(fct, "out")
+#'   )
+#'
+#' @rdname impute_zero
+impute_zero <- function(x){
+
+  impute_fixed(x = x, value = 0)
+
+}
diff --git a/R/shadow-shifters.R b/R/shadow-shifters.R
@@ -123,7 +123,7 @@ shadow_shift.numeric <- function(x,
 
 #' @export
 shadow_shift.factor <- function(x, ...){
-  forcats::fct_explicit_na(x, na_level = "missing")
+  forcats::fct_na_value_to_level(x, level = "missing")
 }
 
 #' @export

diff --git a/R/utils.R b/R/utils.R
@@ -199,7 +199,7 @@ diag_na <- function(size = 5){
 
 coerce_fct_na_explicit <- function(x){
   if (is.factor(x) & anyNA(x)) {
-    forcats::fct_explicit_na(x, na_level = "NA")
+    forcats::fct_na_value_to_level(x, level = "NA")
   } else {
     x
   }

diff --git a/man/impute_factor.Rd b/man/impute_factor.Rd
diff --git a/man/impute_fixed.Rd b/man/impute_fixed.Rd