Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a second fuzzer for pipe equivalency #2819

Open
wants to merge 8 commits into
base: fuzz
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions .dev/ast_fuzz_test.R
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,9 @@ pkgload::load_all()
# of getting top-level exclusions done for 'nofuzz start|end' ranges, except
# maybe if it enabled us to reuse lintr's own exclude() system.
# therefore we take this approach: pass over the test suite first and comment out
# any tests/units that have been marked 'nofuzz'. restore later.
# any tests/units that have been marked 'nofuzz'. restore later. one consequence
# is there's no support for fuzzer-specific exclusion, e.g. we fully disable
# the unnecessary_placeholder_linter() tests because |> and _ placeholders differ.
test_restorations <- list()
for (test_file in list.files("tests/testthat", pattern = "^test-", full.names = TRUE)) {
xml <- read_xml(xmlparsedata::xml_parse_data(parse(test_file, keep.source = TRUE)))
Expand Down Expand Up @@ -114,15 +116,15 @@ failures <- reporter$failures$as_list()
valid_failure <- vapply(
failures,
function(failure) {
if (grepl('(column_number|ranges|line) .* did not match', failure$message)) {
if (grepl("(column_number|ranges|line) .* did not match", failure$message)) {
return(TRUE)
}
FALSE
},
logical(1L)
)
if (!all(valid_failure)) {
failures <- failures[!valid_failure]
failures <- failures[!valid_failure]
if (length(failures) > 0L) {
names(failures) <- vapply(failures, `[[`, "test", FUN.VALUE = character(1L))
cat("Some fuzzed tests failed unexpectedly!\n")
print(failures)
Expand Down
72 changes: 45 additions & 27 deletions .dev/maybe_fuzz_content.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,48 +14,66 @@ maybe_fuzz_content <- function(file, lines) {
new_file
}

function_lambda_fuzzer <- function(pd, lines) {
fun_tokens <- c(`'\\\\'` = "\\", `FUNCTION` = "function")
fun_idx <- which(pd$token %in% names(fun_tokens))
n_fun <- length(fun_idx)
# skip errors for e.g. Rmd files, and ignore warnings.
# We could use get_source_expressions(), but with little benefit & much slower.
# also avoid over-use of 'nofuzz' induced by some incompatible swaps, e.g. not all '%>%' can be
# swapped to '|>' (if '.' is used, or if RHS is not an allowed simple call)
error_or_parse_data <- function(f) {
tryCatch(getParseData(suppressWarnings(parse(f, keep.source = TRUE))), error = identity)
}

if (n_fun == 0L) {
return(invisible())
}
simple_swap_fuzzer <- function(pd_filter, replacements) {
function(pd, lines) {
idx <- which(pd_filter(pd))
n <- length(idx)

if (n == 0L) {
return(invisible())
}

pd$new_text <- NA_character_
pd$new_text[fun_idx] <- sample(fun_tokens, n_fun, replace = TRUE)
pd$new_text <- NA_character_
pd$new_text[idx] <- sample(replacements, n, replace = TRUE)

for (ii in rev(fun_idx)) {
if (pd$text[ii] == pd$new_text[ii]) next
# Tried, with all rex(), hit a bug: https://github.com/r-lib/rex/issues/96
ptn = paste0("^(.{", pd$col1[ii] - 1L, "})", rex::rex(pd$text[ii]))
lines[pd$line1[ii]] <- rex::re_substitutes(lines[pd$line1[ii]], ptn, paste0("\\1", rex::rex(pd$new_text[ii])))
for (ii in rev(idx)) {
if (pd$text[ii] == pd$new_text[ii]) next
# Tried, with all rex(), hit a bug: https://github.com/r-lib/rex/issues/96
ptn = paste0("^(.{", pd$col1[ii] - 1L, "})", rex::rex(pd$text[ii]))
lines[pd$line1[ii]] <- rex::re_substitutes(lines[pd$line1[ii]], ptn, paste0("\\1", rex::rex(pd$new_text[ii])))
}
lines
}
lines
}

function_lambda_fuzzer <- simple_swap_fuzzer(
\(pd) pd$token %in% c("'\\\\'", "FUNCTION"),
replacements = c("\\", "function")
)

pipe_fuzzer <- simple_swap_fuzzer(
\(pd) (pd$token == "SPECIAL" & pd$text == "%>%") | pd$token == "PIPE",
replacements = c("%>%", "|>")
)

# we could also consider just passing any test where no fuzzing takes place,
# i.e. letting the other GHA handle whether unfuzzed tests pass as expected.
apply_fuzzers <- function(f) {
# skip errors for e.g. Rmd files, and ignore warnings.
# We could use get_source_expressions(), but with little benefit & much slower.
pd <- tryCatch(getParseData(suppressWarnings(parse(f, keep.source = TRUE))), error = identity)
pd <- error_or_parse_data(f)
if (inherits(pd, "error")) {
return(invisible())
}

reparse <- FALSE
lines <- readLines(f)
for (fuzzer in list(function_lambda_fuzzer)) {
if (reparse) {
pd <- getParseData(parse(f, keep.source = TRUE))
lines <- readLines(f)
}
unedited <- lines <- readLines(f)
for (fuzzer in list(function_lambda_fuzzer, pipe_fuzzer)) {
updated_lines <- fuzzer(pd, lines)
reparse <- !is.null(updated_lines)
if (!reparse) next # skip some I/O if we can
if (is.null(updated_lines)) next # skip some I/O if we can
writeLines(updated_lines, f)
# check if our attempted edit introduced some error
pd <- error_or_parse_data(f)
if (inherits(pd, "error")) {
writeLines(unedited, f)
return(invisible())
}
lines <- readLines(f)
}

invisible()
Expand Down
Loading
Loading