#' Gutenberg metadata about each work
#'
#' Selected fields of metadata about each of the Project Gutenberg works.
#'
#' @details To find the date on which this metadata was last updated, run
#'   `attr(gutenberg_metadata, "date_updated")`.
#'
#' @format A [tibble::tibble()] with one row for each work in Project
#'   Gutenberg and the following columns:
#' \describe{
#'   \item{gutenberg_id}{Numeric ID, used to retrieve works from
#'   Project Gutenberg}
#'   \item{title}{Title}
#'   \item{author}{Author, if a single one given. Given as last name
#'   first (e.g. "Doyle, Arthur Conan")}
#'   \item{gutenberg_author_id}{Project Gutenberg author ID}
#'   \item{language}{Language ISO 639 code, separated by / if multiple. Two
#'   letter code if one exists, otherwise three letter. See
#'   <https://en.wikipedia.org/wiki/List_of_ISO_639-2_codes>}
#'   \item{gutenberg_bookshelf}{Which collection or collections this
#'   is found in, separated by / if multiple}
#'   \item{rights}{Generally one of three options: "Public domain in the USA."
#'   (the most common by far), "Copyrighted. Read the copyright notice inside
#'   this book for details.", or "None"}
#'   \item{has_text}{Whether there is a file containing digits followed by
#'   `.txt` in Project Gutenberg for this record (as opposed to, for
#'   example, audiobooks). If not, cannot be retrieved with
#'   [gutenberg_download()]}
#' }
#' @docType data
#' @keywords datasets
#'
#' @examplesIf interactive()
#'
#' library(dplyr)
#' library(stringr)
#'
#' gutenberg_metadata
#'
#' gutenberg_metadata |>
#'   count(author, sort = TRUE)
#'
#' # Look for Shakespeare, excluding collections (containing "Works") and
#' # translations
#' shakespeare_metadata <- gutenberg_metadata |>
#'   filter(
#'     author == "Shakespeare, William",
#'     language == "en",
#'     !str_detect(title, "Works"),
#'     has_text,
#'     !str_detect(rights, "Copyright")
#'   ) |>
#'   distinct(title)
#'
#' # Note that the gutenberg_works() function filters for English
#' # non-copyrighted works and does de-duplication by default:
#'
#' shakespeare_metadata2 <- gutenberg_works(
#'   author == "Shakespeare, William",
#'   !str_detect(title, "Works")
#' )
#'
#' # See date last updated
#' attr(gutenberg_metadata, "date_updated")
#'
#' @seealso [gutenberg_works()], [gutenberg_authors], [gutenberg_subjects]
"gutenberg_metadata"


#' Gutenberg metadata about the subject of each work
#'
#' Gutenberg metadata about the subject of each work, particularly
#' Library of Congress Classifications (lcc) and Library of Congress
#' Subject Headings (lcsh).
#'
#' @format A [tibble::tibble()] with one row for each pairing
#' of work and subject, with columns:
#' \describe{
#'   \item{gutenberg_id}{ID describing a work that can be joined with
#'   [gutenberg_metadata]}
#'   \item{subject_type}{Either "lcc" (Library of Congress Classification) or
#'   "lcsh" (Library of Congress Subject Headings)}
#'   \item{subject}{Subject}
#' }
#' @docType data
#' @keywords datasets
#'
#' @details Find more information about Library of Congress Categories
#' here: <https://www.loc.gov/catdir/cpso/lcco/>, and about
#' Library of Congress Subject Headings here:
#' <https://id.loc.gov/authorities/subjects.html>.
#'
#' To find the date on which this metadata was last updated,
#' run `attr(gutenberg_subjects, "date_updated")`.
#'
#' @examplesIf interactive()
#'
#' library(dplyr)
#' library(stringr)
#'
#' gutenberg_subjects |>
#'   filter(subject_type == "lcsh") |>
#'   count(subject, sort = TRUE)
#'
#' sherlock_holmes_subjects <- gutenberg_subjects |>
#'   filter(str_detect(subject, "Holmes, Sherlock"))
#'
#' sherlock_holmes_subjects
#'
#' sherlock_holmes_metadata <- gutenberg_works() |>
#'   filter(author == "Doyle, Arthur Conan") |>
#'   semi_join(sherlock_holmes_subjects, by = "gutenberg_id")
#'
#' sherlock_holmes_metadata
#'
#' \donttest{
#' holmes_books <- gutenberg_download(sherlock_holmes_metadata$gutenberg_id)
#'
#' holmes_books
#' }
#'
#' # See date last updated
#' attr(gutenberg_subjects, "date_updated")
#'
#' @seealso [gutenberg_metadata], [gutenberg_authors]
"gutenberg_subjects"


#' Metadata about Project Gutenberg authors
#'
#' Data frame with metadata about each author of a Project
#' Gutenberg work. Although the Project Gutenberg raw data
#' also includes metadata on contributors, editors, illustrators,
#' etc., this dataset contains only people who have been the
#' single author of at least one work.
#'
#' @details To find the date on which this metadata was last updated,
#' run `attr(gutenberg_authors, "date_updated")`.
#'
#' @format A [tibble::tibble()] with one row for each
#' author, with the columns:
#' \describe{
#'   \item{gutenberg_author_id}{Unique identifier for the author that can
#'   be used to join with the [gutenberg_metadata] dataset}
#'   \item{author}{The `agent_name` field from the original metadata}
#'   \item{alias}{Alias}
#'   \item{birthdate}{Year of birth}
#'   \item{deathdate}{Year of death}
#'   \item{wikipedia}{Link to Wikipedia article on the author. If there
#'   are multiple, they are "|"-delimited}
#'   \item{aliases}{Character vector of aliases. If there
#'   are multiple, they are "/"-delimited}
#' }
#' @docType data
#' @keywords datasets
#'
#' @examples
#'
#' # See date last updated
#' attr(gutenberg_authors, "date_updated")
#'
#' @seealso [gutenberg_metadata], [gutenberg_subjects]
"gutenberg_authors"

#' Metadata about Project Gutenberg languages
#'
#' Data frame with metadata about the languages of each Project
#' Gutenberg work.
#'
#' @details To find the date on which this metadata was last updated,
#' run `attr(gutenberg_languages, "date_updated")`.
#'
#' @format A [tibble::tibble()] with one row for each
#' work-language pair, with the columns:
#' \describe{
#'   \item{gutenberg_id}{Unique identifier for the work that can
#'   be used to join with the [gutenberg_metadata] dataset}
#'   \item{language}{Language ISO 639 code. Two
#'   letter code if one exists, otherwise three letter.}
#'   \item{total_languages}{Number of languages for this work.}
#' }
#' @docType data
#' @keywords datasets
#'
#' @examples
#'
#' # See date last updated
#' attr(gutenberg_languages, "date_updated")
#'
#' @seealso [gutenberg_metadata], [gutenberg_subjects]
"gutenberg_languages"


#' Sample Book Downloads
#'
#' A [tibble::tibble()] of book text for two sample books, generated using
#' [gutenberg_download()].
#'
#' @details This code was used to download the books:
#' `gutenberg_download(c(109, 105), meta_fields = c("title", "author"))`
#'
#' @format A [tibble::tibble()] with one row for each
#' line of text from each book, with columns:
#' \describe{
#'   \item{gutenberg_id}{Unique identifier for the work that can
#'   be used to join with the [gutenberg_metadata] dataset.}
#'   \item{text}{A character vector of lines of text.}
#'   \item{title}{The title of this work.}
#'   \item{author}{The author of this work.}
#' }
#' @docType data
#' @keywords datasets
"sample_books"
