% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tr_masked.R
\name{masked_targets_pred}
\alias{masked_targets_pred}
\title{Get the predictability of a target word (or phrase) given a left and right
context}
\usage{
masked_targets_pred(
  prev_contexts,
  targets,
  after_contexts,
  log.p = getOption("pangoling.log.p"),
  ignore_regex = "",
  model = getOption("pangoling.masked.default"),
  checkpoint = NULL,
  add_special_tokens = NULL,
  config_model = NULL,
  config_tokenizer = NULL
)
}
\arguments{
\item{prev_contexts}{Left context of the target word in left-to-right written
languages.}

\item{targets}{Target words.}

\item{after_contexts}{Right context of the target in left-to-right written
languages.}

\item{log.p}{Base of the logarithm used for the output predictability values.
If \code{TRUE} (default), the natural logarithm (base \emph{e}) is used.
If \code{FALSE}, the raw probabilities are returned.
Alternatively, \code{log.p} can be set to a numeric value specifying
the base of the logarithm (e.g., \code{2} for base-2 logarithms).
To get surprisal in bits (rather than predictability), set
\code{log.p = 1/2}.}

\item{ignore_regex}{Can ignore certain characters when calculating the log
probabilities. For example \verb{^[[:punct:]]$} will ignore
all punctuation  that stands alone in a token.}

\item{model}{Name of a pre-trained model or folder. One should be able to use
models based on "bert". See
\href{https://huggingface.co/models?other=bert}{hugging face website}.}

\item{checkpoint}{Folder of a checkpoint.}

\item{add_special_tokens}{Whether to include special tokens. It has the
same default as the
\href{https://huggingface.co/docs/transformers/v4.25.1/en/model_doc/auto#transformers.AutoTokenizer}{AutoTokenizer}
method in Python.}

\item{config_model}{List with other arguments that control how the
model from Hugging Face is accessed.}

\item{config_tokenizer}{List with other arguments that control how the
tokenizer from Hugging Face is accessed.}
}
\value{
A named vector of predictability values (by default the natural
logarithm of the word probability).
}
\description{
Get the predictability (by default the natural logarithm of the word
probability) of a vector of target words (or phrase) given a
vector of left and of right contexts using a masked transformer.
}
\details{
A masked language model (also called BERT-like, or encoder model) is a type
of large language model  that can be used to predict the content of a mask
in a sentence.

If not specified, the masked model that will be used is the one set in
specified in the global option \code{pangoling.masked.default}, this can be
accessed via \code{getOption("pangoling.masked.default")} (by default
"bert-base-uncased"). To change the default option
use \code{options(pangoling.masked.default = "newmaskedmodel")}.

A list of possible masked can be found in
\href{https://huggingface.co/models?pipeline_tag=fill-mask}{Hugging Face website}

Using the  \code{config_model} and \code{config_tokenizer} arguments, it's possible to
control how the model and tokenizer from Hugging Face is accessed, see the
python method
\href{https://huggingface.co/docs/transformers/v4.25.1/en/model_doc/auto#transformers.AutoProcessor.from_pretrained}{\code{from_pretrained}}
for details. In case of errors check the status of
\url{https://status.huggingface.co/}
}
\section{More examples}{

See the
\href{https://docs.ropensci.org/pangoling/articles/intro-bert.html}{online article}
in pangoling website for more examples.
}

\examples{
\dontshow{if (installed_py_pangoling()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
masked_targets_pred(
  prev_contexts = c("The", "The"),
  targets = c("apple", "pear"),
  after_contexts = c(
    "doesn't fall far from the tree.",
    "doesn't fall far from the tree."
  ),
  model = "bert-base-uncased"
)
\dontshow{\}) # examplesIf}
}
\seealso{
Other masked model functions: 
\code{\link{masked_tokens_pred_tbl}()}
}
\concept{masked model functions}
