Greedy algorithm for EM-PCA including robust methods
impPCA(
x,
method = "classical",
m = 1,
eps = 0.5,
k = ncol(x) - 1,
maxit = 100,
boot = FALSE,
verbose = TRUE
)
data.frame or matrix
"classical"
or "mcd"
(robust estimation)
number of multiple imputations (only if parameter boot
equals TRUE
)
threshold for convergence
number of principal components for reconstruction of x
maximum number of iterations
residual bootstrap (if TRUE
)
TRUE/FALSE if additional information about the imputation process should be printed
the imputed data set. If boot = FALSE
this is a data.frame.
If boot = TRUE
this is a list where each list element contains a data.frame.
Serneels, Sven and Verdonck, Tim (2008). Principal component analysis for data containing outliers and missing elements. Computational Statistics and Data Analysis, Elsevier, vol. 52(3), pages 1712-1727
Other imputation methods:
hotdeck()
,
irmi()
,
kNN()
,
matchImpute()
,
medianSamp()
,
rangerImpute()
,
regressionImp()
,
sampleCat()
,
xgboostImpute()
data(Animals, package = "MASS")
Animals$brain[19] <- Animals$brain[19] + 0.01
Animals <- log(Animals)
colnames(Animals) <- c("log(body)", "log(brain)")
Animals_na <- Animals
probs <- abs(Animals$`log(body)`^2)
probs <- rep(0.5, nrow(Animals))
probs[c(6,16,26)] <- 0
set.seed(1234)
Animals_na[sample(1:nrow(Animals), 10, prob = probs), "log(brain)"] <- NA
w <- is.na(Animals_na$`log(brain)`)
impPCA(Animals_na)
#>
#> Iterations: 4
#> log.body. log.brain.
#> Mountain beaver 0.30010459 2.559846909
#> Cow 6.14203741 5.539886546
#> Grey wolf 3.59264385 4.239410471
#> Goat 3.31998733 4.100325126
#> Guinea pig 0.03922071 1.704748092
#> Dipliodocus 9.36734412 3.912023005
#> Asian elephant 7.84267147 8.434463544
#> Donkey 5.23164323 5.075483616
#> Horse 6.25575004 6.484635236
#> Potar monkey 2.30258509 4.744932128
#> Cat 1.19392247 3.242592351
#> Giraffe 6.27098843 6.522092798
#> Gorilla 5.33271879 6.006353160
#> Human 4.12713439 7.185387016
#> African elephant 8.80297346 6.897261778
#> Triceratops 9.14846497 4.248495242
#> Rhesus monkey 1.91692261 3.384605074
#> Kangaroo 3.55534806 4.220385446
#> Golden hamster -2.12026354 0.009950331
#> Mouse -3.77226106 0.482484594
#> Rabbit 0.91629073 2.493205453
#> Sheep 4.01638302 5.164785974
#> Jaguar 4.60517019 5.056245805
#> Chimpanzee 3.95431592 4.423903710
#> Rat -1.27296568 0.641853886
#> Brachiosaurus 11.37366340 5.040194096
#> Mole -2.10373423 1.098612289
#> Pig 5.25749537 5.192956851
impPCA(Animals_na, method = "mcd")
#>
#> Iterations: 4
#> log.body. log.brain.
#> Mountain beaver 0.30010459 2.661346676
#> Cow 6.14203741 5.898181543
#> Grey wolf 3.59264385 4.485641169
#> Goat 3.31998733 4.334570597
#> Guinea pig 0.03922071 1.704748092
#> Dipliodocus 9.36734412 3.912023005
#> Asian elephant 7.84267147 8.434463544
#> Donkey 5.23164323 5.393760199
#> Horse 6.25575004 6.484635236
#> Potar monkey 2.30258509 4.744932128
#> Cat 1.19392247 3.242592351
#> Giraffe 6.27098843 6.522092798
#> Gorilla 5.33271879 6.006353160
#> Human 4.12713439 7.185387016
#> African elephant 8.80297346 7.372524173
#> Triceratops 9.14846497 4.248495242
#> Rhesus monkey 1.91692261 3.557175695
#> Kangaroo 3.55534806 4.464976724
#> Golden hamster -2.12026354 0.009950331
#> Mouse -3.77226106 0.404974416
#> Rabbit 0.91629073 2.493205453
#> Sheep 4.01638302 5.164785974
#> Jaguar 4.60517019 5.056245805
#> Chimpanzee 3.95431592 4.686032513
#> Rat -1.27296568 0.641853886
#> Brachiosaurus 11.37366340 5.040194096
#> Mole -2.10373423 1.098612289
#> Pig 5.25749537 5.192956851
impPCA(Animals_na, boot = TRUE, m = 10)
#>
#> Iterations: 0
#> [[1]]
#> log(body) log(brain)
#> Mountain beaver 0.30010459 1.878184201
#> Cow 6.14203741 6.040688495
#> Grey wolf 3.59264385 3.917338289
#> Goat 3.31998733 4.140043835
#> Guinea pig 0.03922071 1.704748092
#> Dipliodocus 9.36734412 3.912023005
#> Asian elephant 7.84267147 8.434463544
#> Donkey 5.23164323 4.954909469
#> Horse 6.25575004 6.484635236
#> Potar monkey 2.30258509 4.744932128
#> Cat 1.19392247 3.242592351
#> Giraffe 6.27098843 6.522092798
#> Gorilla 5.33271879 6.006353160
#> Human 4.12713439 7.185387016
#> African elephant 8.80297346 8.521827599
#> Triceratops 9.14846497 4.248495242
#> Rhesus monkey 1.91692261 2.464195041
#> Kangaroo 3.55534806 3.843745091
#> Golden hamster -2.12026354 0.009950331
#> Mouse -3.77226106 -0.761991838
#> Rabbit 0.91629073 2.493205453
#> Sheep 4.01638302 5.164785974
#> Jaguar 4.60517019 5.056245805
#> Chimpanzee 3.95431592 4.212594823
#> Rat -1.27296568 0.641853886
#> Brachiosaurus 11.37366340 5.040194096
#> Mole -2.10373423 1.098612289
#> Pig 5.25749537 5.192956851
#>
#> [[2]]
#> log(body) log(brain)
#> Mountain beaver 0.30010459 1.181836843
#> Cow 6.14203741 5.366625360
#> Grey wolf 3.59264385 3.824685300
#> Goat 3.31998733 4.846799916
#> Guinea pig 0.03922071 1.704748092
#> Dipliodocus 9.36734412 3.912023005
#> Asian elephant 7.84267147 8.434463544
#> Donkey 5.23164323 5.173695148
#> Horse 6.25575004 6.484635236
#> Potar monkey 2.30258509 4.744932128
#> Cat 1.19392247 3.242592351
#> Giraffe 6.27098843 6.522092798
#> Gorilla 5.33271879 6.006353160
#> Human 4.12713439 7.185387016
#> African elephant 8.80297346 8.591903154
#> Triceratops 9.14846497 4.248495242
#> Rhesus monkey 1.91692261 2.750022336
#> Kangaroo 3.55534806 3.278033922
#> Golden hamster -2.12026354 0.009950331
#> Mouse -3.77226106 -0.864548966
#> Rabbit 0.91629073 2.493205453
#> Sheep 4.01638302 5.164785974
#> Jaguar 4.60517019 5.056245805
#> Chimpanzee 3.95431592 5.298036702
#> Rat -1.27296568 0.641853886
#> Brachiosaurus 11.37366340 5.040194096
#> Mole -2.10373423 1.098612289
#> Pig 5.25749537 5.192956851
#>
#> [[3]]
#> log(body) log(brain)
#> Mountain beaver 0.30010459 1.651861374
#> Cow 6.14203741 5.693343527
#> Grey wolf 3.59264385 3.836527440
#> Goat 3.31998733 4.545429179
#> Guinea pig 0.03922071 1.704748092
#> Dipliodocus 9.36734412 3.912023005
#> Asian elephant 7.84267147 8.434463544
#> Donkey 5.23164323 5.211254891
#> Horse 6.25575004 6.484635236
#> Potar monkey 2.30258509 4.744932128
#> Cat 1.19392247 3.242592351
#> Giraffe 6.27098843 6.522092798
#> Gorilla 5.33271879 6.006353160
#> Human 4.12713439 7.185387016
#> African elephant 8.80297346 8.346468324
#> Triceratops 9.14846497 4.248495242
#> Rhesus monkey 1.91692261 2.619706071
#> Kangaroo 3.55534806 4.684149761
#> Golden hamster -2.12026354 0.009950331
#> Mouse -3.77226106 -0.964171430
#> Rabbit 0.91629073 2.493205453
#> Sheep 4.01638302 5.164785974
#> Jaguar 4.60517019 5.056245805
#> Chimpanzee 3.95431592 3.313874732
#> Rat -1.27296568 0.641853886
#> Brachiosaurus 11.37366340 5.040194096
#> Mole -2.10373423 1.098612289
#> Pig 5.25749537 5.192956851
#>
#> [[4]]
#> log(body) log(brain)
#> Mountain beaver 0.30010459 2.190778973
#> Cow 6.14203741 4.673170380
#> Grey wolf 3.59264385 3.929574203
#> Goat 3.31998733 5.619348552
#> Guinea pig 0.03922071 1.704748092
#> Dipliodocus 9.36734412 3.912023005
#> Asian elephant 7.84267147 8.434463544
#> Donkey 5.23164323 6.168997896
#> Horse 6.25575004 6.484635236
#> Potar monkey 2.30258509 4.744932128
#> Cat 1.19392247 3.242592351
#> Giraffe 6.27098843 6.522092798
#> Gorilla 5.33271879 6.006353160
#> Human 4.12713439 7.185387016
#> African elephant 8.80297346 8.355967746
#> Triceratops 9.14846497 4.248495242
#> Rhesus monkey 1.91692261 3.367971233
#> Kangaroo 3.55534806 3.047194877
#> Golden hamster -2.12026354 0.009950331
#> Mouse -3.77226106 -2.646088544
#> Rabbit 0.91629073 2.493205453
#> Sheep 4.01638302 5.164785974
#> Jaguar 4.60517019 5.056245805
#> Chimpanzee 3.95431592 4.875691993
#> Rat -1.27296568 0.641853886
#> Brachiosaurus 11.37366340 5.040194096
#> Mole -2.10373423 1.098612289
#> Pig 5.25749537 5.192956851
#>
#> [[5]]
#> log(body) log(brain)
#> Mountain beaver 0.30010459 1.437228020
#> Cow 6.14203741 5.421917911
#> Grey wolf 3.59264385 5.055510201
#> Goat 3.31998733 4.872945341
#> Guinea pig 0.03922071 1.704748092
#> Dipliodocus 9.36734412 3.912023005
#> Asian elephant 7.84267147 8.434463544
#> Donkey 5.23164323 5.042239271
#> Horse 6.25575004 6.484635236
#> Potar monkey 2.30258509 4.744932128
#> Cat 1.19392247 3.242592351
#> Giraffe 6.27098843 6.522092798
#> Gorilla 5.33271879 6.006353160
#> Human 4.12713439 7.185387016
#> African elephant 8.80297346 7.772566846
#> Triceratops 9.14846497 4.248495242
#> Rhesus monkey 1.91692261 3.145461968
#> Kangaroo 3.55534806 4.096803929
#> Golden hamster -2.12026354 0.009950331
#> Mouse -3.77226106 0.424953991
#> Rabbit 0.91629073 2.493205453
#> Sheep 4.01638302 5.164785974
#> Jaguar 4.60517019 5.056245805
#> Chimpanzee 3.95431592 3.699772656
#> Rat -1.27296568 0.641853886
#> Brachiosaurus 11.37366340 5.040194096
#> Mole -2.10373423 1.098612289
#> Pig 5.25749537 5.192956851
#>
#> [[6]]
#> log(body) log(brain)
#> Mountain beaver 0.30010459 2.929080341
#> Cow 6.14203741 6.259072311
#> Grey wolf 3.59264385 4.326760917
#> Goat 3.31998733 3.885405283
#> Guinea pig 0.03922071 1.704748092
#> Dipliodocus 9.36734412 3.912023005
#> Asian elephant 7.84267147 8.434463544
#> Donkey 5.23164323 5.825933329
#> Horse 6.25575004 6.484635236
#> Potar monkey 2.30258509 4.744932128
#> Cat 1.19392247 3.242592351
#> Giraffe 6.27098843 6.522092798
#> Gorilla 5.33271879 6.006353160
#> Human 4.12713439 7.185387016
#> African elephant 8.80297346 7.203267907
#> Triceratops 9.14846497 4.248495242
#> Rhesus monkey 1.91692261 2.877937889
#> Kangaroo 3.55534806 4.696434909
#> Golden hamster -2.12026354 0.009950331
#> Mouse -3.77226106 0.204152296
#> Rabbit 0.91629073 2.493205453
#> Sheep 4.01638302 5.164785974
#> Jaguar 4.60517019 5.056245805
#> Chimpanzee 3.95431592 4.463764592
#> Rat -1.27296568 0.641853886
#> Brachiosaurus 11.37366340 5.040194096
#> Mole -2.10373423 1.098612289
#> Pig 5.25749537 5.192956851
#>
#> [[7]]
#> log(body) log(brain)
#> Mountain beaver 0.30010459 3.262027472
#> Cow 6.14203741 4.352152845
#> Grey wolf 3.59264385 4.203536486
#> Goat 3.31998733 5.342360662
#> Guinea pig 0.03922071 1.704748092
#> Dipliodocus 9.36734412 3.912023005
#> Asian elephant 7.84267147 8.434463544
#> Donkey 5.23164323 5.187623996
#> Horse 6.25575004 6.484635236
#> Potar monkey 2.30258509 4.744932128
#> Cat 1.19392247 3.242592351
#> Giraffe 6.27098843 6.522092798
#> Gorilla 5.33271879 6.006353160
#> Human 4.12713439 7.185387016
#> African elephant 8.80297346 6.991229905
#> Triceratops 9.14846497 4.248495242
#> Rhesus monkey 1.91692261 3.666516710
#> Kangaroo 3.55534806 4.208965619
#> Golden hamster -2.12026354 0.009950331
#> Mouse -3.77226106 -0.927663596
#> Rabbit 0.91629073 2.493205453
#> Sheep 4.01638302 5.164785974
#> Jaguar 4.60517019 5.056245805
#> Chimpanzee 3.95431592 4.473128599
#> Rat -1.27296568 0.641853886
#> Brachiosaurus 11.37366340 5.040194096
#> Mole -2.10373423 1.098612289
#> Pig 5.25749537 5.192956851
#>
#> [[8]]
#> log(body) log(brain)
#> Mountain beaver 0.30010459 2.603303630
#> Cow 6.14203741 5.841903318
#> Grey wolf 3.59264385 4.289496745
#> Goat 3.31998733 4.128130296
#> Guinea pig 0.03922071 1.704748092
#> Dipliodocus 9.36734412 3.912023005
#> Asian elephant 7.84267147 8.434463544
#> Donkey 5.23164323 4.858821630
#> Horse 6.25575004 6.484635236
#> Potar monkey 2.30258509 4.744932128
#> Cat 1.19392247 3.242592351
#> Giraffe 6.27098843 6.522092798
#> Gorilla 5.33271879 6.006353160
#> Human 4.12713439 7.185387016
#> African elephant 8.80297346 7.363643714
#> Triceratops 9.14846497 4.248495242
#> Rhesus monkey 1.91692261 3.716546707
#> Kangaroo 3.55534806 4.649452884
#> Golden hamster -2.12026354 0.009950331
#> Mouse -3.77226106 -0.370302889
#> Rabbit 0.91629073 2.493205453
#> Sheep 4.01638302 5.164785974
#> Jaguar 4.60517019 5.056245805
#> Chimpanzee 3.95431592 4.230584899
#> Rat -1.27296568 0.641853886
#> Brachiosaurus 11.37366340 5.040194096
#> Mole -2.10373423 1.098612289
#> Pig 5.25749537 5.192956851
#>
#> [[9]]
#> log(body) log(brain)
#> Mountain beaver 0.30010459 0.954817536
#> Cow 6.14203741 6.219902726
#> Grey wolf 3.59264385 4.020141597
#> Goat 3.31998733 4.214717841
#> Guinea pig 0.03922071 1.704748092
#> Dipliodocus 9.36734412 3.912023005
#> Asian elephant 7.84267147 8.434463544
#> Donkey 5.23164323 5.961249813
#> Horse 6.25575004 6.484635236
#> Potar monkey 2.30258509 4.744932128
#> Cat 1.19392247 3.242592351
#> Giraffe 6.27098843 6.522092798
#> Gorilla 5.33271879 6.006353160
#> Human 4.12713439 7.185387016
#> African elephant 8.80297346 7.921153056
#> Triceratops 9.14846497 4.248495242
#> Rhesus monkey 1.91692261 3.009945035
#> Kangaroo 3.55534806 4.496043484
#> Golden hamster -2.12026354 0.009950331
#> Mouse -3.77226106 -0.029355677
#> Rabbit 0.91629073 2.493205453
#> Sheep 4.01638302 5.164785974
#> Jaguar 4.60517019 5.056245805
#> Chimpanzee 3.95431592 5.189878419
#> Rat -1.27296568 0.641853886
#> Brachiosaurus 11.37366340 5.040194096
#> Mole -2.10373423 1.098612289
#> Pig 5.25749537 5.192956851
#>
#> [[10]]
#> log(body) log(brain)
#> Mountain beaver 0.30010459 3.575996159
#> Cow 6.14203741 6.397487985
#> Grey wolf 3.59264385 5.126497315
#> Goat 3.31998733 4.040741799
#> Guinea pig 0.03922071 1.704748092
#> Dipliodocus 9.36734412 3.912023005
#> Asian elephant 7.84267147 8.434463544
#> Donkey 5.23164323 5.254923911
#> Horse 6.25575004 6.484635236
#> Potar monkey 2.30258509 4.744932128
#> Cat 1.19392247 3.242592351
#> Giraffe 6.27098843 6.522092798
#> Gorilla 5.33271879 6.006353160
#> Human 4.12713439 7.185387016
#> African elephant 8.80297346 6.682541797
#> Triceratops 9.14846497 4.248495242
#> Rhesus monkey 1.91692261 2.307969435
#> Kangaroo 3.55534806 3.839337836
#> Golden hamster -2.12026354 0.009950331
#> Mouse -3.77226106 1.026347988
#> Rabbit 0.91629073 2.493205453
#> Sheep 4.01638302 5.164785974
#> Jaguar 4.60517019 5.056245805
#> Chimpanzee 3.95431592 4.063398789
#> Rat -1.27296568 0.641853886
#> Brachiosaurus 11.37366340 5.040194096
#> Mole -2.10373423 1.098612289
#> Pig 5.25749537 5.192956851
#>
impPCA(Animals_na, method = "mcd", boot = TRUE)[[1]]
#>
#> Iterations: 0
#> log(body) log(brain)
#> Mountain beaver 0.30010459 3.884564244
#> Cow 6.14203741 4.585417779
#> Grey wolf 3.59264385 4.260531472
#> Goat 3.31998733 4.206312967
#> Guinea pig 0.03922071 1.704748092
#> Dipliodocus 9.36734412 3.912023005
#> Asian elephant 7.84267147 8.434463544
#> Donkey 5.23164323 4.352689189
#> Horse 6.25575004 6.484635236
#> Potar monkey 2.30258509 4.744932128
#> Cat 1.19392247 3.242592351
#> Giraffe 6.27098843 6.522092798
#> Gorilla 5.33271879 6.006353160
#> Human 4.12713439 7.185387016
#> African elephant 8.80297346 4.609052430
#> Triceratops 9.14846497 4.248495242
#> Rhesus monkey 1.91692261 3.966259063
#> Kangaroo 3.55534806 4.265288205
#> Golden hamster -2.12026354 0.009950331
#> Mouse -3.77226106 3.911051097
#> Rabbit 0.91629073 2.493205453
#> Sheep 4.01638302 5.164785974
#> Jaguar 4.60517019 5.056245805
#> Chimpanzee 3.95431592 4.443091552
#> Rat -1.27296568 0.641853886
#> Brachiosaurus 11.37366340 5.040194096
#> Mole -2.10373423 1.098612289
#> Pig 5.25749537 5.192956851
plot(`log(brain)` ~ `log(body)`, data = Animals, type = "n", ylab = "", xlab="")
mtext(text = "impPCA robust", side = 3)
points(Animals$`log(body)`[!w], Animals$`log(brain)`[!w])
points(Animals$`log(body)`[w], Animals$`log(brain)`[w], col = "grey", pch = 17)
imputed <- impPCA(Animals_na, method = "mcd", boot = TRUE)[[1]]
#>
#> Iterations: 0
colnames(imputed) <- c("log(body)", "log(brain)")
points(imputed$`log(body)`[w], imputed$`log(brain)`[w], col = "red", pch = 20, cex = 1.4)
segments(x0 = Animals$`log(body)`[w], x1 = imputed$`log(body)`[w], y0 = Animals$`log(brain)`[w],
y1 = imputed$`log(brain)`[w], lty = 2, col = "grey")
legend("topleft", legend = c("non-missings", "set to missing", "imputed values"),
pch = c(1,17,20), col = c("black","grey","red"), cex = 0.7)
mape <- round(100* 1/sum(is.na(Animals_na$`log(brain)`)) * sum(abs((Animals$`log(brain)` -
imputed$`log(brain)`) / Animals$`log(brain)`)), 2)
s2 <- var(Animals$`log(brain)`)
nrmse <- round(sqrt(1/sum(is.na(Animals_na$`log(brain)`)) * sum(abs((Animals$`log(brain)` -
imputed$`log(brain)`) / s2))), 2)
text(x = 8, y = 1.5, labels = paste("MAPE =", mape))
text(x = 8, y = 0.5, labels = paste("NRMSE =", nrmse))