r-lib · MichaelChirico · Mar 7, 2025 · Mar 7, 2025 · Mar 7, 2025 · Mar 7, 2025
diff --git a/.dev/ast_fuzz_test.R b/.dev/ast_fuzz_test.R
@@ -54,7 +54,9 @@ pkgload::load_all()
 #   of getting top-level exclusions done for 'nofuzz start|end' ranges, except
 #   maybe if it enabled us to reuse lintr's own exclude() system.
 # therefore we take this approach: pass over the test suite first and comment out
-#   any tests/units that have been marked 'nofuzz'. restore later.
+#   any tests/units that have been marked 'nofuzz'. restore later. one consequence
+#   is there's no support for fuzzer-specific exclusion, e.g. we fully disable
+#   the unnecessary_placeholder_linter() tests because |> and _ placeholders differ.
 test_restorations <- list()
 for (test_file in list.files("tests/testthat", pattern = "^test-", full.names = TRUE)) {
   xml <- read_xml(xmlparsedata::xml_parse_data(parse(test_file, keep.source = TRUE)))
@@ -114,15 +116,15 @@ failures <- reporter$failures$as_list()
 valid_failure <- vapply(
   failures,
   function(failure) {
-    if (grepl('(column_number|ranges|line) .* did not match', failure$message)) {
+    if (grepl("(column_number|ranges|line) .* did not match", failure$message)) {
       return(TRUE)
     }
     FALSE
   },
   logical(1L)
 )
-if (!all(valid_failure)) {
-  failures <- failures[!valid_failure]
+failures <- failures[!valid_failure]
+if (length(failures) > 0L) {
   names(failures) <- vapply(failures, `[[`, "test", FUN.VALUE = character(1L))
   cat("Some fuzzed tests failed unexpectedly!\n")
   print(failures)

diff --git a/.dev/maybe_fuzz_content.R b/.dev/maybe_fuzz_content.R
@@ -14,48 +14,66 @@ maybe_fuzz_content <- function(file, lines) {
   new_file
 }
 
-function_lambda_fuzzer <- function(pd, lines) {
-  fun_tokens <- c(`'\\\\'` = "\\", `FUNCTION` = "function")
-  fun_idx <- which(pd$token %in% names(fun_tokens))
-  n_fun <- length(fun_idx)
+# skip errors for e.g. Rmd files, and ignore warnings.
+#   We could use get_source_expressions(), but with little benefit & much slower.
+# also avoid over-use of 'nofuzz' induced by some incompatible swaps, e.g. not all '%>%' can be
+#   swapped to '|>' (if '.' is used, or if RHS is not an allowed simple call)
+error_or_parse_data <- function(f) {
+  tryCatch(getParseData(suppressWarnings(parse(f, keep.source = TRUE))), error = identity)
+}
 
-  if (n_fun == 0L) {
-    return(invisible())
-  }
+simple_swap_fuzzer <- function(pd_filter, replacements) {
+  function(pd, lines) {
+    idx <- which(pd_filter(pd))
+    n <- length(idx)
+
+    if (n == 0L) {
+      return(invisible())
+    }
 
-  pd$new_text <- NA_character_
-  pd$new_text[fun_idx] <- sample(fun_tokens, n_fun, replace = TRUE)
+    pd$new_text <- NA_character_
+    pd$new_text[idx] <- sample(replacements, n, replace = TRUE)
 
-  for (ii in rev(fun_idx)) {
-    if (pd$text[ii] == pd$new_text[ii]) next
-    # Tried, with all rex(), hit a bug: https://github.com/r-lib/rex/issues/96
-    ptn = paste0("^(.{", pd$col1[ii] - 1L, "})", rex::rex(pd$text[ii]))
-    lines[pd$line1[ii]] <- rex::re_substitutes(lines[pd$line1[ii]], ptn, paste0("\\1", rex::rex(pd$new_text[ii])))
+    for (ii in rev(idx)) {
+      if (pd$text[ii] == pd$new_text[ii]) next
+      # Tried, with all rex(), hit a bug: https://github.com/r-lib/rex/issues/96
+      ptn = paste0("^(.{", pd$col1[ii] - 1L, "})", rex::rex(pd$text[ii]))
+      lines[pd$line1[ii]] <- rex::re_substitutes(lines[pd$line1[ii]], ptn, paste0("\\1", rex::rex(pd$new_text[ii])))
+    }
+    lines
   }
-  lines
 }
 
+function_lambda_fuzzer <- simple_swap_fuzzer(
+  \(pd) pd$token %in% c("'\\\\'", "FUNCTION"),
+  replacements = c("\\", "function")
+)
+
+pipe_fuzzer <- simple_swap_fuzzer(
+  \(pd) (pd$token == "SPECIAL" & pd$text == "%>%") | pd$token == "PIPE",
+  replacements = c("%>%", "|>")
+)
+
 # we could also consider just passing any test where no fuzzing takes place,
 #   i.e. letting the other GHA handle whether unfuzzed tests pass as expected.
 apply_fuzzers <- function(f) {
-  # skip errors for e.g. Rmd files, and ignore warnings.
-  #   We could use get_source_expressions(), but with little benefit & much slower.
-  pd <- tryCatch(getParseData(suppressWarnings(parse(f, keep.source = TRUE))), error = identity)
+  pd <- error_or_parse_data(f)
   if (inherits(pd, "error")) {
     return(invisible())
   }
 
-  reparse <- FALSE
-  lines <- readLines(f)
-  for (fuzzer in list(function_lambda_fuzzer)) {
-    if (reparse) {
-      pd <- getParseData(parse(f, keep.source = TRUE))
-      lines <- readLines(f)
-    }
+  unedited <- lines <- readLines(f)
+  for (fuzzer in list(function_lambda_fuzzer, pipe_fuzzer)) {
     updated_lines <- fuzzer(pd, lines)
-    reparse <- !is.null(updated_lines)
-    if (!reparse) next # skip some I/O if we can
+    if (is.null(updated_lines)) next # skip some I/O if we can
     writeLines(updated_lines, f)
+    # check if our attempted edit introduced some error
+    pd <- error_or_parse_data(f)
+    if (inherits(pd, "error")) {
+      writeLines(unedited, f)
+      return(invisible())
+    }
+    lines <- readLines(f)
   }
 
   invisible()