Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ Description: A Tool for Semi-Automating the Statistical Disclosure Control of Re
License: MIT + file LICENSE
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.3
SystemRequirements: Python (>= 3.10)
Imports:
reticulate,
Expand All @@ -32,3 +31,4 @@ Language: en-US
URL: https://github.com/AI-SDC/ACRO-R
BugReports: https://github.com/AI-SDC/ACRO-R/issues
VignetteBuilder: knitr
Config/roxygen2/version: 8.0.0
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,7 @@ export(acro_remove_output)
export(acro_rename_output)
export(acro_surv_func)
export(acro_table)
export(create_factors)
export(is_excluded)
export(is_invalid)
export(to_pandas_categorical)
2 changes: 1 addition & 1 deletion R/acro_init.R
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ acro_init <- function(config = "default", suppress = FALSE, envname = acro_venv,
}

# import the acro package and instantiate an object
acro <- reticulate::import("acro", delay_load = TRUE)
acro <- reticulate::import("acro", delay_load = TRUE, convert = FALSE)
acroEnv$ac <- acro$ACRO(config = config, suppress = suppress)

invisible(acroEnv$ac)
Expand Down
77 changes: 68 additions & 9 deletions R/acro_tables.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ acro_crosstab <- function(index, columns, values = NULL, aggfunc = NULL) {
if (is.null(acroEnv$ac)) {
stop("ACRO has not been initialised. Please first call acro_init()")
}
table <- acroEnv$ac$crosstab(index, columns, values = values, aggfunc = aggfunc)
py_table <- acroEnv$ac$crosstab(index, columns, values = values, aggfunc = aggfunc)
table <- reticulate::py_to_r(py_table)
return(table)
}

Expand All @@ -22,12 +23,14 @@ acro_crosstab <- function(index, columns, values = NULL, aggfunc = NULL) {
#' @param columns Values to group by in the columns.
#' @param dnn The names to be given to the dimensions in the result
#' @param deparse.level Controls how the default `dnn` is constructed.
#' @param exclude levels to remove for all factors in index/columns
#' @param useNA whether to include NA values in the table
#' @param ... Any other parameters.
#'
#' @return Cross tabulation of the data
#' @export

acro_table <- function(index, columns, dnn = NULL, deparse.level = 0, ...) {
acro_table <- function(index, columns, dnn = NULL, deparse.level = 0, useNA = "no", exclude = NULL, ...) {
if (is.null(acroEnv$ac)) {
stop("ACRO has not been initialised. Please first call acro_init().")
}
Expand Down Expand Up @@ -55,20 +58,73 @@ acro_table <- function(index, columns, dnn = NULL, deparse.level = 0, ...) {
acroEnv$col_names <- list("")
}
)
} else if (deparse.level == 2) {
acroEnv$row_names <- list(deparse((substitute(index))))
acroEnv$col_names <- list(deparse(substitute(columns)))
}
} else {
acroEnv$row_names <- list(dnn[1])
acroEnv$col_names <- list(dnn[2])
}

table <- acroEnv$ac$crosstab(index, columns, rownames = acroEnv$row_names, colnames = acroEnv$col_names)
# Handling the exclude parameter
if (useNA != "no" && !is.null(exclude)) {
if (any(is.na(exclude))) warning("'exclude' containing NA and 'useNA' != \"no\"' are a bit contradicting")

# Remove the NA and NaN from the exclude list, if they exist
exclude <- exclude[!(is.na(exclude) | is.nan(exclude))]
if (length(exclude) == 0) exclude <- NULL # nocov
}

if (!is.null(exclude)) {
# Exclude everything in the exclude list from the data
keep_mask <- !(is_excluded(index, exclude) | is_excluded(columns, exclude))

index <- index[keep_mask]
columns <- columns[keep_mask]

# Delete any dropped levels
if (is.factor(index)) index <- droplevels(index)
if (is.factor(columns)) columns <- droplevels(columns)
}

# Handling the useNA parameter
if (useNA == "no") {
# Remove any NA or NaN from the data
keep_mask <- !(is_invalid(index) | is_invalid(columns))

index <- index[keep_mask]
columns <- columns[keep_mask]
}

# Create factors
index <- create_factors(index, useNA)
columns <- create_factors(columns, useNA)

# Manually convert index and columns to pandas categorical to convert the R fcators to python categories
pd <- reticulate::import("pandas", convert = FALSE)
index <- to_pandas_categorical(index, pd)
columns <- to_pandas_categorical(columns, pd)

py_table <- acroEnv$ac$crosstab(index, columns, rownames = acroEnv$row_names, colnames = acroEnv$col_names)
# Check for any unused arguments
if (length(list(...)) > 0) {
warning("Unused arguments were provided: ", paste0(names(list(...)), collapse = ", "), "\n", "Please use the help command to learn more about the function.")
}
# Reset the index and keep the old indexes in a column
py_table <- py_table$reset_index()

# Manually translate the table to R
r_dataframe <- reticulate::py_to_r(py_table)
table <- as.matrix(r_dataframe[, -1])

# Convert the string "NA" to the R empty values <NA>
index_names <- as.character(r_dataframe[[1]])
column_names <- colnames(r_dataframe)[-1]

index_names[index_names %in% "NA"] <- NA
column_names[column_names %in% "NA"] <- NA

rownames(table) <- index_names
colnames(table) <- column_names

return(table)
}

Expand All @@ -87,7 +143,8 @@ acro_pivot_table <- function(data, values = NULL, index = NULL, columns = NULL,
if (is.null(acroEnv$ac)) {
stop("ACRO has not been initialised. Please first call acro_init()")
}
table <- acroEnv$ac$pivot_table(data, values = values, index = index, columns = columns, aggfunc = aggfunc)
py_table <- acroEnv$ac$pivot_table(data, values = values, index = index, columns = columns, aggfunc = aggfunc)
table <- reticulate::py_to_r(py_table)
return(table)
}

Expand All @@ -107,7 +164,8 @@ acro_hist <- function(data, column, breaks = 10, freq = TRUE, col = NULL, filena
if (is.null(acroEnv$ac)) {
stop("ACRO has not been initialised. Please first call acro_init()")
}
histogram <- acroEnv$ac$hist(data = data, column = column, bins = as.integer(breaks), density = freq, color = col, filename = filename)
py_histogram <- acroEnv$ac$hist(data = data, column = column, bins = as.integer(breaks), density = freq, color = col, filename = filename)
histogram <- reticulate::py_to_r(py_histogram)
# Load the saved histogram
image <- png::readPNG(histogram)
grid::grid.raster(image)
Expand All @@ -128,7 +186,8 @@ acro_surv_func <- function(time, status, output, filename = "kaplan-meier.png")
if (is.null(acroEnv$ac)) {
stop("ACRO has not been initialised. Please first call acro_init()")
}
results <- acroEnv$ac$surv_func(time = time, status = status, output = output, filename = filename)
py_results <- acroEnv$ac$surv_func(time = time, status = status, output = output, filename = filename)
results <- reticulate::py_to_r(py_results)
if (output == "plot") {
# Load the saved survival plot
image <- png::readPNG(results[[2]])
Expand Down
80 changes: 80 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#' Identify empty values; NA or NaN in an object
#'
#' @param Values to check for empty values in it
#'
#' @returns logical vector that shows the places of the empty values
#' @export

is_invalid <- function(Values) {
# Check for NA or NaN in the object
is.na(Values) | tolower(Values) %in% c("na", "nan")
}

#' Identify values to be excluded from an object
#'
#' @param Values to exclude from
#' @param exclude_list Items to exclude from the values
#'
#' @returns logical vector that shows the places of the items that needs to be excluded
#' @export

is_excluded <- function(Values, exclude_list) {
# Catch everything in the exclude list
mask <- Values %in% exclude_list

# Catch NA if it is in the exclude list
# We are separating the checks for NaN and NA because is.na(x) flags both the NAs and the NaNs as TRUE.
# If the user wants to exclude one and not the other is.na() will not be able to do that
if (any(is.na(exclude_list) & !is.nan(exclude_list))) {
mask <- mask | (is.na(Values) & !is.nan(Values))
}

# Catch NA if it is in the exclude list
if (any(is.nan(exclude_list))) {
mask <- mask | is.nan(Values) # nocov
}

return(mask)
}

#' Convert data to factors and manage NA levels
#'
#' @param Values to create factors on
#' @param useNA whether to include NA values in the table
#'
#' @returns R vector factor
#' @export

create_factors <- function(Values, useNA) {
# Check for NAs
char_values <- as.character(Values)
is_true_na <- is.na(char_values)

# Create the factors
if (!is.factor(Values)) Values <- as.factor(Values) # nocov

# Handle the useNA parameter
if (useNA == "always") {
levels(Values) <- union(levels(Values), "NA") # Always force NA
} else if (useNA == "ifany" && any(is_true_na)) {
levels(Values) <- union(levels(Values), "NA") # Only if NA exists
}
return(Values)
}

#' Convert an R factor to a Pandas Categorical
#'
#' @param Values R factor vector to be converted
#' @param pd Reference to the Python `pandas` module
#'
#' @returns A Python `pandas.Categorical` object
#' @export

to_pandas_categorical <- function(Values, pd) {
# Build the Pandas Categorical
pd$Categorical(
values = as.character(Values),
categories = levels(Values),
ordered = is.ordered(Values)
)
}
File renamed without changes.
File renamed without changes.
File renamed without changes.
14 changes: 13 additions & 1 deletion man/acro_table.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions man/create_factors.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions man/is_excluded.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 17 additions & 0 deletions man/is_invalid.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions man/to_pandas_categorical.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading