#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# Filename : func_all.R
# Use      : Convenient functions for EDA
# Author   : Tomas Sou
# Created  : 2025-08-29
# Updated  : 2025-11-13
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# Notes
# na
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# Updates
# na
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# Reexports

#' @export
dplyr::mutate
#' @export
dplyr::filter
#' @export
dplyr::select
#' @export
dplyr::where
#' @export
dplyr::across

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#' Copy files and rename with date
#'
#' Copy files to destination and rename with date and a tag as desired.
#'
#' @param ... `<chr>` A vector of file paths of the source files to copy and rename.
#' @param des `<chr>` Destination folder. "." to rename files at the current location.
#' @param tag `<chr>` Tag to the filename.
#' @param td `<lgl>` `TRUE` to add today (yymmdd) to the filename.
#' @returns A logical vector indicating if the operation succeeded for each of the files.
#' @export
#' @examples
#' \dontrun{
#' # Copy a file to home directory
#' tmp = tempdir()
#' fc("f1.R","f2.R",des=tmp)
#' }
fc = function(...,des="",tag="",td=TRUE){
  # Copy
  fpath = c(...)
  fname = basename(fpath)
  fstem = tools::file_path_sans_ext(fname)
  fext = tools::file_ext(fname)
  file.copy(fpath,des,overwrite=TRUE)
  # Rename
  today = NULL
  if(td) today = paste0("-",format(Sys.time(),"%y%m%d"))
  fname_to = paste0(fstem,tag,today,".",fext)
  fpath1 = file.path(des,fname)
  fpath2 = file.path(des,fname_to)
  file.rename(fpath1,fpath2)
}

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#' flextable wrapper
#'
#' Sugar function for default flextable output.
#'
#' @param d `<dfr>` A data frame.
#' @param fnote `<chr>` Footnote.
#' @param ttl `<chr>` Title.
#' @param sig `<int>` Number of significant digits to compute.
#' @param dig `<int>` Number of decimal places to display.
#' @param src `<int>` Either 1 or 2 to add source label over 1 or 2 lines.
#' @param omit `<chr>` Text to omit from the source label.
#' @returns A flextable object.
#' @export
#' @examples
#' mtcars |> head() |> ft()
#' mtcars |> head() |> ft(src=1)
#' mtcars |> head() |> ft("Footnote")
#' mtcars |> head() |> ft("Footnote",src=1)
#' mtcars |> head() |> ft(sig=2,dig=1)
ft = function(d, fnote=NULL, ttl=NULL, sig=8, dig=2, src=0, omit=""){
  labsrc = NULL
  if(src %in% c(1,2)) labsrc = paste0(label_src(src,omit))
  if(!is.null(fnote)) labsrc = paste0("\n",labsrc)
  lab = fnote
  if(!is.null(labsrc)) lab = paste0(fnote,labsrc)
  out = d |>
    dplyr::mutate(dplyr::across(dplyr::where(~ is.numeric(.x) && is.double(.x)), ~signif(.x,sig))) |>
    flextable::flextable() |>
    flextable::colformat_double(digits=dig) |>
    flextable::autofit() |>
    flextable::add_header_lines(ttl) |>
    flextable::add_footer_lines(lab) |>
    flextable::align(align="left", part="all")
  return(out)
}

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#' flextable defaults
#'
#' Sugar function to set flextable defaults.
#' The arguments are passed to [flextable::set_flextable_defaults()].
#'
#' @param show `<lgl>` `TRUE` to show values after the update.
#' @param font `<chr>` Font family - for `font.family`.
#' @param fsize `<int>` Font size (in point) - for `font.size`.
#' @param pad `<int>` Padding space around text - for `padding`.
#' @param na `<chr>` A value to display instead of NA - for `na_str`
#' @param nan `<chr>` A value to display instead of NaN - for `nan_str`
#' @param ... Additional arguments to pass to [flextable::set_flextable_defaults()]
#' @returns A list containing previous default values.
#' @seealso [flextable::set_flextable_defaults()].
#' @export
#' @examples
#' \dontrun{
#' ft_def()
#' }
ft_def = function(show=FALSE, font="Calibri Light", fsize=10, pad=3, na="", nan="", ...){
  out = flextable::set_flextable_defaults(
    font.family = font,
    font.size = fsize,
    padding = pad,
    na_str = na,
    nan_str = nan,
    ...
  )
  if(show) print(flextable::get_flextable_defaults())
  invisible(out)
}

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#' Generate hex colour codes
#'
#' Generate a vector of hex colour codes for the desired number of colours.
#' Colours are generated by evenly splitting hue in the range `[0,360]`
#' in the HCL colour space using [grDevices::hcl].
#' The output is meant to follow the default colours used in [ggplot2].
#'
#' @param n `<int>` Number of colours to output.
#' @param show `<lgl>` `TRUE` to show the output colours.
#' @returns A vector of hex colour codes that can be used for plotting.
#' @export
#' @examples
#' hexn(6,FALSE)
#' hexn(4,TRUE)
hexn = function(n,show=FALSE){
  hues = seq(15, 375, length=n+1)
  out = grDevices::hcl(h=hues, c=100, l=65)[1:n]
  if(show) scales::show_col(out)
  return(out)
}

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#' kable wrapper
#'
#' Sugar function for default kable output.
#'
#' @param d `<dfr>` A data frame.
#' @param fnote `<chr>` Footnote.
#' @param cap `<chr>` Caption.
#' @param sig `<int>` Number of significant digits to compute.
#' @param dig `<int>` Number of decimal places to display.
#' @param src  `<int>` Either 1 or 2 to add source label over 1 or 2 lines.
#' @param omit `<chr>` Text to omit from the source label.
#' @returns A kable object.
#' @export
#' @examples
#' mtcars |> head() |> kb()
#' mtcars |> head() |> kb(src=1)
#' mtcars |> head() |> kb("Footnote")
#' mtcars |> head() |> kb("Footnote",src=1)
#' mtcars |> head() |> kb(sig=2,dig=1)
kb = function(d, fnote=NULL, cap=NULL, sig=8, dig=2, src=0, omit=""){
  labsrc = NULL
  if(src %in% c(1,2)) labsrc = paste0(label_src(src,omit))
  if(!is.null(fnote)) labsrc = paste0("\n",labsrc)
  lab = fnote
  if(!is.null(labsrc)) lab = paste0(fnote,labsrc)
  d |>
    dplyr::mutate(dplyr::across(dplyr::where(~ is.numeric(.x) && is.double(.x)), ~signif(.x,sig))) |>
    kableExtra::kbl(caption=cap,digits=dig) |>
    kableExtra::kable_classic(full_width=FALSE) |>
    kableExtra::footnote(lab,general_title="")
}

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#' Generate source file label
#'
#' Generate a label with the current source file path and run time,
#' assuming that the source file is in the current working directory.
#' In interactive sessions, the function is designed to work in a script file in RStudio
#' and uses `rstudioapi` to get the file path.
#' It will return empty if run in the console directly.
#'
#' @param span `<int>` Number of lines: either 1 or 2.
#' @param omit `<chr>` Text to omit from the label.
#' @param tz `<lgl>` `FALSE` to exclude time stamp.
#' @param fname `<lgl>` `TRUE` to return the file name only.
#' @returns A label showing the source file path with a time stamp.
#' @export
#' @examples
#' label_src()
#' label_src(tz=FALSE)
#' label_src(fname=TRUE)
label_src = function(span=2,omit="",tz=TRUE,fname=FALSE){
  loc_src = getwd()
  if(interactive()) loc_src = rstudioapi::getActiveDocumentContext()$path |> dirname()
  fname_src = knitr::current_input() |> gsub(".rmarkdown",".qmd",x=_)
  if(interactive()) fname_src = rstudioapi::getActiveDocumentContext()$path |> basename()
  fpath_src  = file.path(loc_src,fname_src)
  labtz = paste0("\nRun: ",format(Sys.time(),usetz=TRUE))
  if(!tz) labtz = NULL
  lab1 = paste0("Source:",fpath_src,labtz)
  lab2 = paste0("Source:",loc_src,"/\n",fname_src,labtz)
  out = lab1
  if(span==2) out = lab2
  if(fname) out = fname_src
  out = out |> gsub(omit,"",x=_)
  return(out)
}

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#' Generate time stamp label
#'
#' Generate a label with a time stamp indicating the run time.
#'
#' @param omit `<chr>` Text to omit from the label.
#' @returns A label with time stamp.
#' @export
#' @examples
#' label_tz()
label_tz = function(omit=""){
  out = paste0("Run: ",format(Sys.time(),usetz=TRUE)) |> gsub(omit,"",x=_)
  return(out)
}

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#' Summarise continuous variables by group
#'
#' Summarise all continuous variables by group.
#' Non-numeric variables will be dropped.
#'
#' @param d `<dfr>` A data frame.
#' @param cols `<var>` Optional. Columns to summarise as unquoted names.
#' @param ... `<var>` Optional. Columns to group by as unquoted names.
#' @param pct `<num>` A vector of two indicating the percentiles to compute.
#' @param xname  `<chr>` Characters to omit in output column names.
#' @returns A data frame of summarised variables.
#' @export
#' @examples
#' d = mtcars |> dplyr::mutate(vs=factor(vs), am=factor(am))
#' d |> summ_by()
#' d |> summ_by(pct=c(0.1,0.9))
#' d |> summ_by(mpg)
#' d |> summ_by(mpg,vs)
#' d |> summ_by(mpg,vs,am)
#' d |> summ_by(c(mpg,disp))
#' d |> summ_by(c(mpg,disp),vs)
#' d |> summ_by(c(mpg,disp),vs,xname="mpg_")
#' # Grouping without column selection is possible but rarely useful in large dataset
#' d |> summ_by(,vs)
summ_by = function(d, cols, ..., pct=c(0.25,0.75), xname=""){
  if(!missing(cols)) d = d |> dplyr::select(...,{{cols}})
  d. = d |> dplyr::group_by(...)
  gps = d. |> attr("groups")
  if(is.null(gps)) d. = d |>
    dplyr::select(dplyr::where(is.numeric)) |>
    tidyr::pivot_longer(dplyr::everything(),names_to="name") |>
    dplyr::group_by(dplyr::pick(dplyr::contains("name")))
  if(is.null(gps)){
    catv = d |> dplyr::select(dplyr::where(~!is.numeric(.x)))
    message("NB: Non-numeric variables are dropped.")
    message("Dropped: ", paste(names(catv), collapse=" "))
  }
  if(is.null(gps) & xname=="") xname = paste0(names(d.),"_",collapse="|")
  if(!is.null(gps)) gps = gps |> dplyr::select(-dplyr::last_col()) |> names()
  x = d. |> dplyr::select(dplyr::where(is.numeric)) |> ncol() - length(gps)
  if(x==1 & xname=="") xname = paste0(names(d.),"_",collapse="|")
  out = d. |>
    dplyr::summarise(
      dplyr::across(
        dplyr::where(is.numeric),
        list(
          n = ~length(.x),
          nNA = ~sum(is.na(.x)),
          Mean = ~mean(.x, na.rm=TRUE),
          SD  = ~sd(.x, na.rm=TRUE),
          Min = ~min(.x, na.rm=TRUE),
          Plo = ~quantile(.x, pct[1], na.rm=TRUE),
          Med = ~median(.x, na.rm=TRUE),
          Pup = ~quantile(.x, pct[2], na.rm=TRUE),
          Max = ~max(.x, na.rm=TRUE)
        )
      )
    ) |>
    dplyr::rename_with(~gsub(xname,"",.x)) |>
    dplyr::rename_with(~gsub("Plo",paste0("P",pct[1]*100), .x), dplyr::contains("Plo")) |>
    dplyr::rename_with(~gsub("Pup",paste0("P",pct[2]*100), .x), dplyr::contains("Pup"))
  return(out)
}

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#' Summarise categorical variables
#'
#' Summarise all categorical variables.
#' Numeric variables will be dropped.
#'
#' @param d A data frame.
#' @param ... `<var>` Optional. Columns to summarise.
#' @param var `<var/int>` (name or index) Optional. A variable to extract as a data frame.
#' @returns A list containing summaries for all categorical variables or
#'   a data frame showing the summary of a selected variable.
#' @export
#' @examples
#' d = mtcars |> dplyr::mutate(dplyr::across(c(cyl,vs,am,gear,carb),factor))
#' d |> summ_cat()
#' d |> summ_cat(cyl,vs)
#' d |> summ_cat(var=cyl)
#' d |> summ_cat(var=1)
summ_cat = function(d,...,var){
  x = d |> dplyr::select(dplyr::where(is.numeric))
  if(!missing(...)) d = d |> dplyr::select(...)
  message("NB: Numeric variables are dropped.")
  message("Dropped: ", paste(names(x), collapse=" "))
  out = d |>
    dplyr::select(-dplyr::where(is.numeric)) |>
    lapply(janitor::tabyl) |>
    lapply(janitor::adorn_totals)
  for (i in seq_along(out)){
    out[[i]] = out[[i]] |> dplyr::rename(!!names(out[i]):=1)
  }
  if(!missing(var)) out = out |> listr::list_extract({{var}})
  return(out)
}
