% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/localSuppression.R
\name{localSuppression}
\alias{localSuppression}
\alias{kAnon}
\title{Local Suppression to obtain k-anonymity}
\usage{
localSuppression(obj, k = 2, importance = NULL, combs = NULL, ...)

kAnon(obj, k = 2, importance = NULL, combs = NULL, ...)
}
\arguments{
\item{obj}{a \code{sdcMicroObj-class} object or a \code{data.frame}}

\item{k}{Threshold for \emph{k}-anonymity}

\item{importance}{Numeric vector of values between 1 and \emph{n} (\code{n = length(keyVars)}).
This vector defines the "importance" of variables for local suppression.
Variables with \code{importance = 1} will, if possible, not be suppressed;
variables with \code{importance = n} will be prioritized for suppression.}

\item{combs}{Numeric vector. If specified, the algorithm provides \emph{k}-anonymity
for each combination of \emph{n} key variables (with \emph{n} being the value of the ith
element of this parameter). For example, \code{combs = c(4,3)} means that \emph{k}-anonymity
will be provided for all combinations of 4 and then 3 key variables.
It is possible to assign different \emph{k} values for each combination by supplying \code{k} as a vector.
If \code{k} has only one value, it will be used for all subsets.}

\item{...}{see additional arguments below:
\itemize{
\item \strong{\code{keyVars}}: Names or indices of categorical key variables (for data.frame method)
\item \strong{\code{strataVars}}: Name or index of the variable used for stratification.
\emph{k}-anonymity is ensured within each category of this variable.
\item \strong{\code{alpha}}: Numeric value between 0 and 1 specifying how much keys with missing
values (\code{NA}s) contribute to the calculation of \code{fk} and \code{Fk}.
Default is \code{1}. Used only in the \code{data.frame} method.
\item \strong{\code{nc}}: Maximum number of cores used for stratified computations.
Default is \code{1}. Parallelization is ignored on Windows.
}}
}
\value{
A modified dataset with suppressions that meets \emph{k}-anonymity based on
the specified key variables, or the modified \code{sdcMicroObj-class} object.
}
\description{
Algorithm to achieve \emph{k}-anonymity by performing local suppression.
}
\details{
The algorithm provides a \emph{k}-anonymized data set by suppressing values in key
variables. The algorithm tries to find an optimal solution to suppress as
few values as possible and considers the specified importance vector. If not
specified, the importance vector is constructed in a way such that key
variables with a high number of characteristics are considered less
important than key variables with a low number of characteristics.

The implementation provides \emph{k}-anonymity per strata, if slot \code{strataVar} has
been set in \code{sdcMicroObj-class} or if parameter \code{strataVar} is
used when applying the \code{data.frame} method. For details, see the examples provided.

For the parameter \code{alpha}:
\itemize{
\item \code{alpha = 1} counts all \emph{wildcard matches} (i.e. \code{NA}s match everything).
\item \code{alpha = 0} assumes missing values form their own categories.
}

These are two extremes. With \code{alpha = 0}, frequencies are likely underestimated when
\code{NA}s are present. If \code{combs} is used with \code{alpha = 0}, the heuristic nature of \code{kAnon()}
may lead to technically correct, but not always intuitively understandable frequency evaluations.
}
\note{
Deprecated methods \code{localSupp2} and \code{localSupp2Wrapper} are no longer available
in \code{sdcMicro} versions > 4.5.0.
\code{kAnon()} is a more intuitive term for local suppression, since the goal is to achieve \emph{k}-anonymity.
}
\examples{
\donttest{
data(francdat)

## Local Suppression
localS <- localSuppression(francdat, keyVar = c(4, 5, 6))
localS
plot(localS)

## for objects of class sdcMicro, no stratification
data(testdata2)
kv <- c("urbrur", "roof", "walls", "water", "electcon", "relat", "sex")
sdc <- createSdcObj(testdata2, keyVars = kv, w = "sampling_weight")
sdc <- localSuppression(sdc)

## for objects of class sdcMicro, with stratification
testdata2$ageG <- cut(testdata2$age, 5, labels = paste0("AG", 1:5))
sdc <- createSdcObj(
  dat = testdata2,
  keyVars = kv,
  w = "sampling_weight",
  strataVar = "ageG"
)
sdc <- localSuppression(sdc, nc = 1)

## it is also possible to provide k-anonymity for subsets of key-variables
## with different parameter k!
## in this case we want to provide 10-anonymity for all combinations
## of 5 key variables, 20-anonymity for all combinations with 4 key variables
## and 30-anonymity for all combinations of 3 key variables.
sdc <- createSdcObj(testdata2, keyVars = kv, w = "sampling_weight")
combs <- 5:3
k <- c(10, 20, 30)
sdc <- localSuppression(sdc, k = k, combs = combs)

## data.frame method (no stratification)
inp <- testdata2[, c(kv, "ageG")]
ls <- localSuppression(inp, keyVars = 1:7)
print(ls)
plot(ls)

## data.frame method (with stratification)
ls <- kAnon(inp, keyVars = 1:7, strataVars = 8)
print(ls)
plot(ls)
}
}
\references{
Templ, M. \emph{Statistical Disclosure Control for Microdata: Methods and Applications in R.}
Springer International Publishing, 287 pages, 2017. ISBN: 978-3-319-50272-4.
\doi{10.1007/978-3-319-50272-4}

Templ, M., Kowarik, A., Meindl, B.
\emph{Statistical Disclosure Control for Micro-Data Using the R Package sdcMicro.}
Journal of Statistical Software, \strong{67}(4), 1–36, 2015. \doi{10.18637/jss.v067.i04}
}
\author{
Bernhard Meindl, Matthias Templ
}
\keyword{manip}
