Set up the packages

library(future)
library(dplyr)
library(purrr)
#library(microbenchmark) # it is needed if recalculating the comparison
library(Rcurvep)

datasets from Rcurvep package

data("zfishdev_all") # two endpoints, each endpoint includes 32 chemicals/curves
data("zfishdev_act") # simulated curves based on zfishdev_all, each chemical has 100 simulated curves
data("zfishdev") # four endpoints, each endpoint includes 3 chemicals/curves

When no preferred BMR and would like to do a exhaustive search

This is a computationally intensive procedure. n_sample = 100 is preferred but for demonstration, n_sample = 10 is used. Expressions are used to delay the execution of commands.

In combi_run_rcurvep function, functions from furrr package are embedded. Use future::plan() to control the types of calculation.


# sequential run
seq_run_multi <- expression(
  
  # set up the plan
  future::plan(sequential),
  
  # calculation
  combi_run_rcurvep(
    zfishdev_all, 
    n_sample = 10,
    TRSH = seq(5, 95, by = 5),
    RNGE = 1000000,
    keep_sets = "act_set",
    seed = 300
  )
)


# parallel run
par_run_multi <- expression(
  
  # set up the plan
  future::plan(multisession, workers = 10),
  
  # calculation
  combi_run_rcurvep(
    zfishdev_all, 
    n_sample = 10,
    TRSH = seq(5, 95, by = 5),
    RNGE = 1000000,
    keep_sets = "act_set",
    seed = 300
  ),
  
  # re-set the plan back
  future::plan(sequential)
)

Calculate 10 times and compare the results

Due to the long time, the results are pasted below. Parallel calculation is faster.


run_speed_multi_rcurvep <- microbenchmark(
  eval(seq_run_multi),
  eval(par_run_multi),
  times = 10
)

#> run_speed_multi_rcurvep
#Unit: seconds
#                expr      min       lq     mean   median       uq      max neval
# eval(seq_run_multi) 61.06341 61.12504 61.79744 61.43494 62.17374 63.99470    10
# eval(par_run_multi) 18.87097 19.36093 19.74137 19.50655 20.42096 20.97378    10

When preferred BMRs are available for endpoints

Get the BMRs for each endpoint

bmr_out <- estimate_dataset_bmr(zfishdev_act, plot = FALSE)
bmrd <- bmr_out$outcome

Join the BMRs to the concentration-response data

inp_tb <- bmrd |>
  nest_join(
    zfishdev_all, by = c("endpoint"), keep = TRUE, name = "data"
  ) |>
  select(RNGE, endpoint, bmr_exp, data)
# input_data for combi_run_rcurvep
rmarkdown::paged_table(inp_tb)

ABCDEFGHIJ0123456789

RNGE <dbl>	endpoint <chr>	bmr_exp <dbl>	data <list>
1e+06	percent_affected_96	20	<tibble[,5]>
1e+06	percent_mortality_96	50	<tibble[,5]>

Set up the expressions

n_sample = 1000 is preferred but for demonstration, n_sample = 100 is used.

For sequential run, purrr::pmap is used. For parallel run, furrr::future_pmap is used

# sequential run
seq_run_bmr <- expression(
  future::plan(sequential),
  pmap(inp_tb, ~ combi_run_rcurvep(..4, TRSH = ..3, RNGE = ..1, n_samples = 100, seed = 300, keep_sets = c("act_set")))
)

# parallel run
par_run_bmr <- expression(
  
  future::plan(multisession, workers = 10),
  
  # calculation
  # there is no need to use future_pmap here 
  pmap(inp_tb, ~ combi_run_rcurvep(..4, TRSH = ..3, RNGE = ..1, n_samples = 100, seed = 300, keep_sets = c("act_set"))),

  future::plan(sequential)
)

Calculate 10 times and compare the results

Due to the long time, the results are pasted below. Parallel calculation is faster.

run_speed_bmr_rcurvep <- microbenchmark(
  eval(seq_run_bmr),
  eval(par_run_bmr),
  times = 10
)

#> run_speed_bmr_rcurvep
#Unit: seconds
#              expr      min       lq     mean   median       uq      max neval
# eval(seq_run_bmr) 35.51327 35.59489 35.79890 35.81629 35.88001 36.28173    10
# eval(par_run_bmr) 14.78751 15.52596 16.19503 16.14672 16.50997 17.82284    10

Fitting based on simulated curves using run_fit

In run_fit function, functions from furrr package are embedded. Use future::plan() to control the types of calculation.

Set up the expressions

n_sample = 1000 is preferred but for demonstration, n_sample = 100 is used. Also, create_dataset function is used to convert the incidence data into response data.


# sequential run
seq_fit_hill_boot <- expression(
  
  future::plan(sequential),
  run_fit(create_dataset(zfishdev), hill_pdir = 1, n_samples = 100, modls = "hill")

)

# parallel run
seq_fit_hill_boot <- expression(
  
  future::plan(multisession, workers = 10),
  
   # calculation
  run_fit(create_dataset(zfishdev), hill_pdir = 1, n_samples = 100, modls = "hill"),
  future::plan(sequential)
)

Calculate 10 times and compare the results

Due to the long time, the results are pasted below. Parallel calculation is faster.

run_speed_fit_hill_boot <- microbenchmark(
  eval(seq_fit_hill_boot),
  eval(par_fit_hill_boot),
  times = 10
)

#> run_speed_fit_hill_boot
#Unit: seconds
#               expr      min       lq     mean   median       uq      max neval
# eval(seq_fit_hill_boot) 60.75111 63.42611 64.02762 63.97692 65.46656 66.83198    10
# eval(par_fit_hill_boot) 34.81743 36.88777 37.43076 37.41199 38.88405 39.40711    10

Fitting based on simulated curves using run_fit and pmap

We can also use similar syntax as combi_run_rcurvep to run multiple datasets.


# make the incidence data as response data
inp_tb_resp <- inp_tb |> mutate(data = map(data, create_dataset))

# sequential run
seq_fit_hill_multi <- expression(
  future::plan(sequential),
  
  pmap(inp_tb_resp, ~ run_fit(..4, modls = "hill", hill_pdir = ifelse(..3 < 0, -1, 1), n_samples = 100, keep_sets = c("fit_set")), .options = furrr_options(seed = 2023))
)
  
# parallel run
para_fit_hill_multi <- expression(
  future::plan(multisession, workers = 10), 
  
  # calculation, no need to use future_pmap
  pmap(inp_tb_resp, ~ run_fit(..4, modls = "hill", hill_pdir = ifelse(..3 < 0, -1, 1), n_samples = 100, keep_sets = c("fit_set")), .options = furrr_options(seed = 2023)),
  future::plan(sequential)
)

Calculate 10 times and compare the results

Due to the long time, the results are pasted below. Parallel calculation is faster.

run_speed_fit_hill_multi <- microbenchmark(
  eval(seq_fit_hill_multi),
  eval(para_fit_hill_multi),
  times = 10
)

#> run_speed_fit_hill_multi
#Unit: seconds
#                      expr       min        lq      mean   median       uq      max neval
#  eval(seq_fit_hill_multi) 219.04359 220.59658 222.32948 222.2282 224.4493 225.2394    10
# eval(para_fit_hill_multi)  97.04507  98.85834  99.67153 100.1014 100.9218 101.0854    10

Fitting based on original data

In run_fit function, functions from furrr package are embedded. Use future::plan() to control the types of calculation.

Two parameters are tested: hill and cc2. hill is 3-parameter Hill equation implemented using R. cc2 is 4-parameter Hill equation implemented using Java.

The dataset - respd_1 - includes 3000 curves, is not available in the package. If the dataset is too small, it might not worthwhile to start the parallel computing.

Use `modls = hill` parameter


# sequential run
seq_fit_hill_ori <- expression(
  
  future::plan(sequential),
  run_fit(respd_1,  modls = "hill")

)

# parallel run
par_fit_hill_ori <- expression(
  
  future::plan(multisession, workers = 5),
  run_fit(respd_1,  modls = "hill"),
  future::plan(sequential)
)

Calculate 5 times and compare the results

Due to the long time, the results are pasted below. Parallel calculation is faster.

run_speed_hit_hill_ori <- microbenchmark(
  eval(seq_fit_hill_ori),
  eval(par_fit_hill_ori),
  times = 5
)

#> run_speed_hit_hill_ori
#Unit: seconds
#                   expr       min        lq      mean    median       uq       max neval
# eval(seq_fit_hill_ori) 112.12160 112.30511 112.35622 112.40213 112.4308 112.52144     5
# eval(par_fit_hill_ori)  62.92156  63.04108  63.51158  63.60182  63.9130  64.08043     5

Use `modls = cc2` parameter


# sequential run
seq_fit_cc2 <- expression(
  
  future::plan(sequential),
  run_fit(respd_1,  modls = "cc2")

)

# parallel run
par_fit_cc2 <- expression(
  
  future::plan(multisession, workers = 5),
  run_fit(respd_1,  modls = "cc2"),
  future::plan(sequential)
)

Calculate 5 times and compare the results

Due to the long time, the results are pasted below. Parallel calculation does not improve much. It could be because the cc2 is implemented using Java.

run_speed_fit_cc2 <- microbenchmark(
  eval(seq_fit_cc2),
  eval(par_fit_cc2),
  times = 5
)

#> run_speed_fit_cc2
#Unit: seconds
#              expr      min       lq     mean   median       uq      max neval
# eval(seq_fit_cc2) 68.37777 68.39599 68.46936 68.45011 68.45746 68.66547     5
# eval(par_fit_cc2) 58.07689 58.31388 59.17766 59.01783 59.07421 61.40546     5

Parallel Computing Examples Using Rcurvep

Set up the packages

datasets from Rcurvep package

When no preferred BMR and would like to do a exhaustive search

Calculate 10 times and compare the results

When preferred BMRs are available for endpoints

Get the BMRs for each endpoint

Join the BMRs to the concentration-response data

Set up the expressions

Calculate 10 times and compare the results

Fitting based on simulated curves using run_fit

Set up the expressions

Calculate 10 times and compare the results

Fitting based on simulated curves using run_fit and pmap

Calculate 10 times and compare the results

Fitting based on original data

Use modls = hill parameter

Calculate 5 times and compare the results

Use modls = cc2 parameter

Calculate 5 times and compare the results

Use `modls = hill` parameter

Use `modls = cc2` parameter