Greedy algorithm for EM-PCA including robust methods

impPCA(
  x,
  method = "classical",
  m = 1,
  eps = 0.5,
  k = ncol(x) - 1,
  maxit = 100,
  boot = FALSE,
  verbose = TRUE
)

Arguments

x

data.frame or matrix

method

"classical" or "mcd" (robust estimation)

m

number of multiple imputations (only if parameter boot equals TRUE)

eps

threshold for convergence

k

number of principal components for reconstruction of x

maxit

maximum number of iterations

boot

residual bootstrap (if TRUE)

verbose

TRUE/FALSE if additional information about the imputation process should be printed

Value

the imputed data set. If boot = FALSE this is a data.frame. If boot = TRUE this is a list where each list element contains a data.frame.

References

Serneels, Sven and Verdonck, Tim (2008). Principal component analysis for data containing outliers and missing elements. Computational Statistics and Data Analysis, Elsevier, vol. 52(3), pages 1712-1727

See also

Other imputation methods: hotdeck(), irmi(), kNN(), matchImpute(), medianSamp(), rangerImpute(), regressionImp(), sampleCat(), xgboostImpute()

Author

Matthias Templ

Examples


data(Animals, package = "MASS")
Animals$brain[19] <- Animals$brain[19] + 0.01
Animals <- log(Animals)
colnames(Animals) <- c("log(body)", "log(brain)")
Animals_na <- Animals
probs <- abs(Animals$`log(body)`^2)
probs <- rep(0.5, nrow(Animals))
probs[c(6,16,26)] <- 0
set.seed(1234)
Animals_na[sample(1:nrow(Animals), 10, prob = probs), "log(brain)"] <- NA
w <- is.na(Animals_na$`log(brain)`)
impPCA(Animals_na)
#> 
#> Iterations: 4
#>                    log.body.  log.brain.
#> Mountain beaver   0.30010459 2.559846909
#> Cow               6.14203741 5.539886546
#> Grey wolf         3.59264385 4.239410471
#> Goat              3.31998733 4.100325126
#> Guinea pig        0.03922071 1.704748092
#> Dipliodocus       9.36734412 3.912023005
#> Asian elephant    7.84267147 8.434463544
#> Donkey            5.23164323 5.075483616
#> Horse             6.25575004 6.484635236
#> Potar monkey      2.30258509 4.744932128
#> Cat               1.19392247 3.242592351
#> Giraffe           6.27098843 6.522092798
#> Gorilla           5.33271879 6.006353160
#> Human             4.12713439 7.185387016
#> African elephant  8.80297346 6.897261778
#> Triceratops       9.14846497 4.248495242
#> Rhesus monkey     1.91692261 3.384605074
#> Kangaroo          3.55534806 4.220385446
#> Golden hamster   -2.12026354 0.009950331
#> Mouse            -3.77226106 0.482484594
#> Rabbit            0.91629073 2.493205453
#> Sheep             4.01638302 5.164785974
#> Jaguar            4.60517019 5.056245805
#> Chimpanzee        3.95431592 4.423903710
#> Rat              -1.27296568 0.641853886
#> Brachiosaurus    11.37366340 5.040194096
#> Mole             -2.10373423 1.098612289
#> Pig               5.25749537 5.192956851
impPCA(Animals_na, method = "mcd")
#> 
#> Iterations: 4
#>                    log.body.  log.brain.
#> Mountain beaver   0.30010459 2.661346676
#> Cow               6.14203741 5.898181543
#> Grey wolf         3.59264385 4.485641169
#> Goat              3.31998733 4.334570597
#> Guinea pig        0.03922071 1.704748092
#> Dipliodocus       9.36734412 3.912023005
#> Asian elephant    7.84267147 8.434463544
#> Donkey            5.23164323 5.393760199
#> Horse             6.25575004 6.484635236
#> Potar monkey      2.30258509 4.744932128
#> Cat               1.19392247 3.242592351
#> Giraffe           6.27098843 6.522092798
#> Gorilla           5.33271879 6.006353160
#> Human             4.12713439 7.185387016
#> African elephant  8.80297346 7.372524173
#> Triceratops       9.14846497 4.248495242
#> Rhesus monkey     1.91692261 3.557175695
#> Kangaroo          3.55534806 4.464976724
#> Golden hamster   -2.12026354 0.009950331
#> Mouse            -3.77226106 0.404974416
#> Rabbit            0.91629073 2.493205453
#> Sheep             4.01638302 5.164785974
#> Jaguar            4.60517019 5.056245805
#> Chimpanzee        3.95431592 4.686032513
#> Rat              -1.27296568 0.641853886
#> Brachiosaurus    11.37366340 5.040194096
#> Mole             -2.10373423 1.098612289
#> Pig               5.25749537 5.192956851
impPCA(Animals_na, boot = TRUE, m = 10)
#> 
#> Iterations: 0
#> [[1]]
#>                    log(body)   log(brain)
#> Mountain beaver   0.30010459  1.878184201
#> Cow               6.14203741  6.040688495
#> Grey wolf         3.59264385  3.917338289
#> Goat              3.31998733  4.140043835
#> Guinea pig        0.03922071  1.704748092
#> Dipliodocus       9.36734412  3.912023005
#> Asian elephant    7.84267147  8.434463544
#> Donkey            5.23164323  4.954909469
#> Horse             6.25575004  6.484635236
#> Potar monkey      2.30258509  4.744932128
#> Cat               1.19392247  3.242592351
#> Giraffe           6.27098843  6.522092798
#> Gorilla           5.33271879  6.006353160
#> Human             4.12713439  7.185387016
#> African elephant  8.80297346  8.521827599
#> Triceratops       9.14846497  4.248495242
#> Rhesus monkey     1.91692261  2.464195041
#> Kangaroo          3.55534806  3.843745091
#> Golden hamster   -2.12026354  0.009950331
#> Mouse            -3.77226106 -0.761991838
#> Rabbit            0.91629073  2.493205453
#> Sheep             4.01638302  5.164785974
#> Jaguar            4.60517019  5.056245805
#> Chimpanzee        3.95431592  4.212594823
#> Rat              -1.27296568  0.641853886
#> Brachiosaurus    11.37366340  5.040194096
#> Mole             -2.10373423  1.098612289
#> Pig               5.25749537  5.192956851
#> 
#> [[2]]
#>                    log(body)   log(brain)
#> Mountain beaver   0.30010459  1.181836843
#> Cow               6.14203741  5.366625360
#> Grey wolf         3.59264385  3.824685300
#> Goat              3.31998733  4.846799916
#> Guinea pig        0.03922071  1.704748092
#> Dipliodocus       9.36734412  3.912023005
#> Asian elephant    7.84267147  8.434463544
#> Donkey            5.23164323  5.173695148
#> Horse             6.25575004  6.484635236
#> Potar monkey      2.30258509  4.744932128
#> Cat               1.19392247  3.242592351
#> Giraffe           6.27098843  6.522092798
#> Gorilla           5.33271879  6.006353160
#> Human             4.12713439  7.185387016
#> African elephant  8.80297346  8.591903154
#> Triceratops       9.14846497  4.248495242
#> Rhesus monkey     1.91692261  2.750022336
#> Kangaroo          3.55534806  3.278033922
#> Golden hamster   -2.12026354  0.009950331
#> Mouse            -3.77226106 -0.864548966
#> Rabbit            0.91629073  2.493205453
#> Sheep             4.01638302  5.164785974
#> Jaguar            4.60517019  5.056245805
#> Chimpanzee        3.95431592  5.298036702
#> Rat              -1.27296568  0.641853886
#> Brachiosaurus    11.37366340  5.040194096
#> Mole             -2.10373423  1.098612289
#> Pig               5.25749537  5.192956851
#> 
#> [[3]]
#>                    log(body)   log(brain)
#> Mountain beaver   0.30010459  1.651861374
#> Cow               6.14203741  5.693343527
#> Grey wolf         3.59264385  3.836527440
#> Goat              3.31998733  4.545429179
#> Guinea pig        0.03922071  1.704748092
#> Dipliodocus       9.36734412  3.912023005
#> Asian elephant    7.84267147  8.434463544
#> Donkey            5.23164323  5.211254891
#> Horse             6.25575004  6.484635236
#> Potar monkey      2.30258509  4.744932128
#> Cat               1.19392247  3.242592351
#> Giraffe           6.27098843  6.522092798
#> Gorilla           5.33271879  6.006353160
#> Human             4.12713439  7.185387016
#> African elephant  8.80297346  8.346468324
#> Triceratops       9.14846497  4.248495242
#> Rhesus monkey     1.91692261  2.619706071
#> Kangaroo          3.55534806  4.684149761
#> Golden hamster   -2.12026354  0.009950331
#> Mouse            -3.77226106 -0.964171430
#> Rabbit            0.91629073  2.493205453
#> Sheep             4.01638302  5.164785974
#> Jaguar            4.60517019  5.056245805
#> Chimpanzee        3.95431592  3.313874732
#> Rat              -1.27296568  0.641853886
#> Brachiosaurus    11.37366340  5.040194096
#> Mole             -2.10373423  1.098612289
#> Pig               5.25749537  5.192956851
#> 
#> [[4]]
#>                    log(body)   log(brain)
#> Mountain beaver   0.30010459  2.190778973
#> Cow               6.14203741  4.673170380
#> Grey wolf         3.59264385  3.929574203
#> Goat              3.31998733  5.619348552
#> Guinea pig        0.03922071  1.704748092
#> Dipliodocus       9.36734412  3.912023005
#> Asian elephant    7.84267147  8.434463544
#> Donkey            5.23164323  6.168997896
#> Horse             6.25575004  6.484635236
#> Potar monkey      2.30258509  4.744932128
#> Cat               1.19392247  3.242592351
#> Giraffe           6.27098843  6.522092798
#> Gorilla           5.33271879  6.006353160
#> Human             4.12713439  7.185387016
#> African elephant  8.80297346  8.355967746
#> Triceratops       9.14846497  4.248495242
#> Rhesus monkey     1.91692261  3.367971233
#> Kangaroo          3.55534806  3.047194877
#> Golden hamster   -2.12026354  0.009950331
#> Mouse            -3.77226106 -2.646088544
#> Rabbit            0.91629073  2.493205453
#> Sheep             4.01638302  5.164785974
#> Jaguar            4.60517019  5.056245805
#> Chimpanzee        3.95431592  4.875691993
#> Rat              -1.27296568  0.641853886
#> Brachiosaurus    11.37366340  5.040194096
#> Mole             -2.10373423  1.098612289
#> Pig               5.25749537  5.192956851
#> 
#> [[5]]
#>                    log(body)  log(brain)
#> Mountain beaver   0.30010459 1.437228020
#> Cow               6.14203741 5.421917911
#> Grey wolf         3.59264385 5.055510201
#> Goat              3.31998733 4.872945341
#> Guinea pig        0.03922071 1.704748092
#> Dipliodocus       9.36734412 3.912023005
#> Asian elephant    7.84267147 8.434463544
#> Donkey            5.23164323 5.042239271
#> Horse             6.25575004 6.484635236
#> Potar monkey      2.30258509 4.744932128
#> Cat               1.19392247 3.242592351
#> Giraffe           6.27098843 6.522092798
#> Gorilla           5.33271879 6.006353160
#> Human             4.12713439 7.185387016
#> African elephant  8.80297346 7.772566846
#> Triceratops       9.14846497 4.248495242
#> Rhesus monkey     1.91692261 3.145461968
#> Kangaroo          3.55534806 4.096803929
#> Golden hamster   -2.12026354 0.009950331
#> Mouse            -3.77226106 0.424953991
#> Rabbit            0.91629073 2.493205453
#> Sheep             4.01638302 5.164785974
#> Jaguar            4.60517019 5.056245805
#> Chimpanzee        3.95431592 3.699772656
#> Rat              -1.27296568 0.641853886
#> Brachiosaurus    11.37366340 5.040194096
#> Mole             -2.10373423 1.098612289
#> Pig               5.25749537 5.192956851
#> 
#> [[6]]
#>                    log(body)  log(brain)
#> Mountain beaver   0.30010459 2.929080341
#> Cow               6.14203741 6.259072311
#> Grey wolf         3.59264385 4.326760917
#> Goat              3.31998733 3.885405283
#> Guinea pig        0.03922071 1.704748092
#> Dipliodocus       9.36734412 3.912023005
#> Asian elephant    7.84267147 8.434463544
#> Donkey            5.23164323 5.825933329
#> Horse             6.25575004 6.484635236
#> Potar monkey      2.30258509 4.744932128
#> Cat               1.19392247 3.242592351
#> Giraffe           6.27098843 6.522092798
#> Gorilla           5.33271879 6.006353160
#> Human             4.12713439 7.185387016
#> African elephant  8.80297346 7.203267907
#> Triceratops       9.14846497 4.248495242
#> Rhesus monkey     1.91692261 2.877937889
#> Kangaroo          3.55534806 4.696434909
#> Golden hamster   -2.12026354 0.009950331
#> Mouse            -3.77226106 0.204152296
#> Rabbit            0.91629073 2.493205453
#> Sheep             4.01638302 5.164785974
#> Jaguar            4.60517019 5.056245805
#> Chimpanzee        3.95431592 4.463764592
#> Rat              -1.27296568 0.641853886
#> Brachiosaurus    11.37366340 5.040194096
#> Mole             -2.10373423 1.098612289
#> Pig               5.25749537 5.192956851
#> 
#> [[7]]
#>                    log(body)   log(brain)
#> Mountain beaver   0.30010459  3.262027472
#> Cow               6.14203741  4.352152845
#> Grey wolf         3.59264385  4.203536486
#> Goat              3.31998733  5.342360662
#> Guinea pig        0.03922071  1.704748092
#> Dipliodocus       9.36734412  3.912023005
#> Asian elephant    7.84267147  8.434463544
#> Donkey            5.23164323  5.187623996
#> Horse             6.25575004  6.484635236
#> Potar monkey      2.30258509  4.744932128
#> Cat               1.19392247  3.242592351
#> Giraffe           6.27098843  6.522092798
#> Gorilla           5.33271879  6.006353160
#> Human             4.12713439  7.185387016
#> African elephant  8.80297346  6.991229905
#> Triceratops       9.14846497  4.248495242
#> Rhesus monkey     1.91692261  3.666516710
#> Kangaroo          3.55534806  4.208965619
#> Golden hamster   -2.12026354  0.009950331
#> Mouse            -3.77226106 -0.927663596
#> Rabbit            0.91629073  2.493205453
#> Sheep             4.01638302  5.164785974
#> Jaguar            4.60517019  5.056245805
#> Chimpanzee        3.95431592  4.473128599
#> Rat              -1.27296568  0.641853886
#> Brachiosaurus    11.37366340  5.040194096
#> Mole             -2.10373423  1.098612289
#> Pig               5.25749537  5.192956851
#> 
#> [[8]]
#>                    log(body)   log(brain)
#> Mountain beaver   0.30010459  2.603303630
#> Cow               6.14203741  5.841903318
#> Grey wolf         3.59264385  4.289496745
#> Goat              3.31998733  4.128130296
#> Guinea pig        0.03922071  1.704748092
#> Dipliodocus       9.36734412  3.912023005
#> Asian elephant    7.84267147  8.434463544
#> Donkey            5.23164323  4.858821630
#> Horse             6.25575004  6.484635236
#> Potar monkey      2.30258509  4.744932128
#> Cat               1.19392247  3.242592351
#> Giraffe           6.27098843  6.522092798
#> Gorilla           5.33271879  6.006353160
#> Human             4.12713439  7.185387016
#> African elephant  8.80297346  7.363643714
#> Triceratops       9.14846497  4.248495242
#> Rhesus monkey     1.91692261  3.716546707
#> Kangaroo          3.55534806  4.649452884
#> Golden hamster   -2.12026354  0.009950331
#> Mouse            -3.77226106 -0.370302889
#> Rabbit            0.91629073  2.493205453
#> Sheep             4.01638302  5.164785974
#> Jaguar            4.60517019  5.056245805
#> Chimpanzee        3.95431592  4.230584899
#> Rat              -1.27296568  0.641853886
#> Brachiosaurus    11.37366340  5.040194096
#> Mole             -2.10373423  1.098612289
#> Pig               5.25749537  5.192956851
#> 
#> [[9]]
#>                    log(body)   log(brain)
#> Mountain beaver   0.30010459  0.954817536
#> Cow               6.14203741  6.219902726
#> Grey wolf         3.59264385  4.020141597
#> Goat              3.31998733  4.214717841
#> Guinea pig        0.03922071  1.704748092
#> Dipliodocus       9.36734412  3.912023005
#> Asian elephant    7.84267147  8.434463544
#> Donkey            5.23164323  5.961249813
#> Horse             6.25575004  6.484635236
#> Potar monkey      2.30258509  4.744932128
#> Cat               1.19392247  3.242592351
#> Giraffe           6.27098843  6.522092798
#> Gorilla           5.33271879  6.006353160
#> Human             4.12713439  7.185387016
#> African elephant  8.80297346  7.921153056
#> Triceratops       9.14846497  4.248495242
#> Rhesus monkey     1.91692261  3.009945035
#> Kangaroo          3.55534806  4.496043484
#> Golden hamster   -2.12026354  0.009950331
#> Mouse            -3.77226106 -0.029355677
#> Rabbit            0.91629073  2.493205453
#> Sheep             4.01638302  5.164785974
#> Jaguar            4.60517019  5.056245805
#> Chimpanzee        3.95431592  5.189878419
#> Rat              -1.27296568  0.641853886
#> Brachiosaurus    11.37366340  5.040194096
#> Mole             -2.10373423  1.098612289
#> Pig               5.25749537  5.192956851
#> 
#> [[10]]
#>                    log(body)  log(brain)
#> Mountain beaver   0.30010459 3.575996159
#> Cow               6.14203741 6.397487985
#> Grey wolf         3.59264385 5.126497315
#> Goat              3.31998733 4.040741799
#> Guinea pig        0.03922071 1.704748092
#> Dipliodocus       9.36734412 3.912023005
#> Asian elephant    7.84267147 8.434463544
#> Donkey            5.23164323 5.254923911
#> Horse             6.25575004 6.484635236
#> Potar monkey      2.30258509 4.744932128
#> Cat               1.19392247 3.242592351
#> Giraffe           6.27098843 6.522092798
#> Gorilla           5.33271879 6.006353160
#> Human             4.12713439 7.185387016
#> African elephant  8.80297346 6.682541797
#> Triceratops       9.14846497 4.248495242
#> Rhesus monkey     1.91692261 2.307969435
#> Kangaroo          3.55534806 3.839337836
#> Golden hamster   -2.12026354 0.009950331
#> Mouse            -3.77226106 1.026347988
#> Rabbit            0.91629073 2.493205453
#> Sheep             4.01638302 5.164785974
#> Jaguar            4.60517019 5.056245805
#> Chimpanzee        3.95431592 4.063398789
#> Rat              -1.27296568 0.641853886
#> Brachiosaurus    11.37366340 5.040194096
#> Mole             -2.10373423 1.098612289
#> Pig               5.25749537 5.192956851
#> 
impPCA(Animals_na, method = "mcd", boot = TRUE)[[1]]
#> 
#> Iterations: 0
#>                    log(body)  log(brain)
#> Mountain beaver   0.30010459 3.884564244
#> Cow               6.14203741 4.585417779
#> Grey wolf         3.59264385 4.260531472
#> Goat              3.31998733 4.206312967
#> Guinea pig        0.03922071 1.704748092
#> Dipliodocus       9.36734412 3.912023005
#> Asian elephant    7.84267147 8.434463544
#> Donkey            5.23164323 4.352689189
#> Horse             6.25575004 6.484635236
#> Potar monkey      2.30258509 4.744932128
#> Cat               1.19392247 3.242592351
#> Giraffe           6.27098843 6.522092798
#> Gorilla           5.33271879 6.006353160
#> Human             4.12713439 7.185387016
#> African elephant  8.80297346 4.609052430
#> Triceratops       9.14846497 4.248495242
#> Rhesus monkey     1.91692261 3.966259063
#> Kangaroo          3.55534806 4.265288205
#> Golden hamster   -2.12026354 0.009950331
#> Mouse            -3.77226106 3.911051097
#> Rabbit            0.91629073 2.493205453
#> Sheep             4.01638302 5.164785974
#> Jaguar            4.60517019 5.056245805
#> Chimpanzee        3.95431592 4.443091552
#> Rat              -1.27296568 0.641853886
#> Brachiosaurus    11.37366340 5.040194096
#> Mole             -2.10373423 1.098612289
#> Pig               5.25749537 5.192956851
plot(`log(brain)` ~ `log(body)`, data = Animals, type = "n", ylab = "", xlab="")
mtext(text = "impPCA robust", side = 3)
points(Animals$`log(body)`[!w], Animals$`log(brain)`[!w])
points(Animals$`log(body)`[w], Animals$`log(brain)`[w], col = "grey", pch = 17)
imputed <- impPCA(Animals_na, method = "mcd", boot = TRUE)[[1]]
#> 
#> Iterations: 0
colnames(imputed) <- c("log(body)", "log(brain)")
points(imputed$`log(body)`[w], imputed$`log(brain)`[w], col = "red", pch = 20, cex = 1.4)
segments(x0 = Animals$`log(body)`[w], x1 = imputed$`log(body)`[w], y0 = Animals$`log(brain)`[w],
y1 = imputed$`log(brain)`[w], lty = 2, col = "grey")
legend("topleft", legend = c("non-missings", "set to missing", "imputed values"),
pch = c(1,17,20), col = c("black","grey","red"), cex = 0.7)
mape <- round(100* 1/sum(is.na(Animals_na$`log(brain)`)) * sum(abs((Animals$`log(brain)` -
imputed$`log(brain)`) / Animals$`log(brain)`)), 2)
s2 <- var(Animals$`log(brain)`)
nrmse <- round(sqrt(1/sum(is.na(Animals_na$`log(brain)`)) * sum(abs((Animals$`log(brain)` -
imputed$`log(brain)`) / s2))), 2)
text(x = 8, y = 1.5, labels = paste("MAPE =", mape))
text(x = 8, y = 0.5, labels = paste("NRMSE =", nrmse))