diff --git a/R/checkSex.R b/R/checkSex.R index 4090bbbb..9eb8181e 100644 --- a/R/checkSex.R +++ b/R/checkSex.R @@ -24,7 +24,7 @@ #' @param ped A dataframe representing the pedigree data with a 'sex' column. #' @param code_male The current code used to represent males in the 'sex' column. #' @param code_female The current code used to represent females in the 'sex' column. If both are NULL, no recoding is performed. -#' @param code_unknown The current code used to represent unknown sex values in the 'sex' column. +#' @param code_unknown The current code used to represent unknown or ambiguous sex in the 'sex' column. Can be NA to indicate that missing values should be treated as unknown. If NULL and both code_male and code_female are provided, values not matching either will be inferred as unknown. #' @param verbose A logical flag indicating whether to print progress and validation messages to the console. #' @param repair A logical flag indicating whether to attempt repairs on the sex coding. #' @param momID The column name for maternal IDs. Default is "momID". diff --git a/tests/testthat/test-checkSex.R b/tests/testthat/test-checkSex.R index 335aee62..427d37b4 100644 --- a/tests/testthat/test-checkSex.R +++ b/tests/testthat/test-checkSex.R @@ -86,3 +86,185 @@ test_that("Functions handle missing values gracefully", { expect_silent(repairSex(ped_with_na, verbose = FALSE, code_male = "M")) expect_silent(recodeSex(ped_with_na, verbose = FALSE, code_male = "M", code_female = "F")) }) + + +# Test Case 5: Handle code_unknown parameter with explicit value +test_that("recodeSex handles code_unknown parameter when explicitly provided", { + # Create pedigree with unknown sex codes + ped <- data.frame( + ID = c(1, 2, 3, 4, 5, 6), + sex = c("M", "F", "M", "F", "U", "U"), + dadID = c(NA, NA, 1, 1, NA, NA), + momID = c(NA, NA, 2, 2, NA, NA) + ) + + # Test with code_unknown = "U" + recoded_ped <- recodeSex(ped, + code_male = "M", + code_female = "F", + code_unknown = "U", + recode_male = "Male", + recode_female = "Female", + recode_unknown = "Unknown" + ) + + # Check that unknown codes are recoded correctly + expect_equal(recoded_ped$sex[5], "Unknown") + expect_equal(recoded_ped$sex[6], "Unknown") + expect_equal(recoded_ped$sex[1], "Male") + expect_equal(recoded_ped$sex[2], "Female") +}) + + +# Test Case 6: Handle code_unknown when it's NA +test_that("recodeSex handles code_unknown = NA correctly", { + # Create pedigree where NA represents unknown sex + ped <- data.frame( + ID = c(1, 2, 3, 4, 5), + sex = c("M", "F", "M", "F", NA), + dadID = c(NA, NA, 1, 1, NA), + momID = c(NA, NA, 2, 2, NA) + ) + + # Test with code_unknown = NA + recoded_ped <- recodeSex(ped, + code_male = "M", + code_female = "F", + code_unknown = NA, + recode_male = "Male", + recode_female = "Female", + recode_unknown = "Unknown" + ) + + # Check that NA values are recoded to "Unknown" + expect_equal(recoded_ped$sex[5], "Unknown") + expect_equal(recoded_ped$sex[1], "Male") + expect_equal(recoded_ped$sex[2], "Female") +}) + + +# Test Case 7: Infer unknown values from data when code_unknown not provided +test_that("recodeSex infers unknown values when code_unknown is not provided", { + # Create pedigree with values that are neither male nor female + ped <- data.frame( + ID = c(1, 2, 3, 4, 5, 6), + sex = c("M", "F", "M", "F", "X", "?"), + dadID = c(NA, NA, 1, 1, NA, NA), + momID = c(NA, NA, 2, 2, NA, NA) + ) + + # Test without code_unknown - should infer "X" and "?" as unknown + recoded_ped <- recodeSex(ped, + code_male = "M", + code_female = "F", + recode_male = "Male", + recode_female = "Female", + recode_unknown = "Unknown" + ) + + # Check that values not in code_male/code_female are recoded to unknown + expect_equal(recoded_ped$sex[5], "Unknown") + expect_equal(recoded_ped$sex[6], "Unknown") + expect_equal(recoded_ped$sex[1], "Male") + expect_equal(recoded_ped$sex[2], "Female") +}) + + +# Test Case 8: Test recode_unknown parameter variations +test_that("recodeSex respects recode_unknown parameter", { + ped <- data.frame( + ID = c(1, 2, 3, 4, 5), + sex = c("M", "F", "M", "F", "U"), + dadID = c(NA, NA, 1, 1, NA), + momID = c(NA, NA, 2, 2, NA) + ) + + # Test with custom recode_unknown value + recoded_ped <- recodeSex(ped, + code_male = "M", + code_female = "F", + code_unknown = "U", + recode_male = "1", + recode_female = "0", + recode_unknown = "9" + ) + + expect_equal(recoded_ped$sex[5], "9") + expect_equal(recoded_ped$sex[1], "1") + expect_equal(recoded_ped$sex[2], "0") +}) + + +# Test Case 9: Test code_unknown with only code_male provided +test_that("recodeSex handles code_unknown with only code_male", { + ped <- data.frame( + ID = c(1, 2, 3, 4), + sex = c("M", "F", "M", "U"), + dadID = c(NA, NA, 1, NA), + momID = c(NA, NA, 2, NA) + ) + + # Test with only code_male and code_unknown + recoded_ped <- recodeSex(ped, + code_male = "M", + code_unknown = "U", + recode_male = "Male", + recode_female = "Female", + recode_unknown = "Unknown" + ) + + # Check recoding: M->Male, F->Female (inferred), U->Unknown + expect_equal(recoded_ped$sex[1], "Male") + expect_equal(recoded_ped$sex[2], "Female") + expect_equal(recoded_ped$sex[4], "Unknown") +}) + + +# Test Case 10: Test code_unknown with only code_female provided +test_that("recodeSex handles code_unknown with only code_female", { + ped <- data.frame( + ID = c(1, 2, 3, 4), + sex = c("M", "F", "F", "U"), + dadID = c(NA, NA, NA, NA), + momID = c(NA, NA, NA, NA) + ) + + # Test with only code_female and code_unknown + recoded_ped <- recodeSex(ped, + code_female = "F", + code_unknown = "U", + recode_male = "Male", + recode_female = "Female", + recode_unknown = "Unknown" + ) + + # Check recoding: F->Female, M->Male (inferred), U->Unknown + expect_equal(recoded_ped$sex[1], "Male") + expect_equal(recoded_ped$sex[2], "Female") + expect_equal(recoded_ped$sex[4], "Unknown") +}) + + +# Test Case 11: Test numeric codes with code_unknown +test_that("recodeSex handles numeric code_unknown values", { + ped <- data.frame( + ID = c(1, 2, 3, 4, 5), + sex = c(1, 0, 1, 0, 9), + dadID = c(NA, NA, 1, 1, NA), + momID = c(NA, NA, 2, 2, NA) + ) + + # Test with numeric codes + recoded_ped <- recodeSex(ped, + code_male = 1, + code_female = 0, + code_unknown = 9, + recode_male = "M", + recode_female = "F", + recode_unknown = "U" + ) + + expect_equal(recoded_ped$sex[5], "U") + expect_equal(recoded_ped$sex[1], "M") + expect_equal(recoded_ped$sex[2], "F") +})