42 Reshaping with tidyr
42.1 Long to wide with pivot_wider()
Using the same example seen in Chapter 26:
dat_long <- data.frame(
Account_ID = c(8001, 8002, 8003, 8004, 8001, 8002, 8003, 8004,
8001, 8002, 8003, 8004, 8001, 8002, 8003, 8004),
Age = c(67.8017038366664, 42.9198507293701, 46.2301756642422,
39.665983196671, 67.8017038366664, 42.9198507293701,
46.2301756642422, 39.665983196671, 67.8017038366664,
42.9198507293701, 46.2301756642422, 39.665983196671,
67.8017038366664, 42.9198507293701, 46.2301756642422,
39.665983196671),
Admission = c("ED", "Planned", "Planned", "ED", "ED", "Planned",
"Planned", "ED", "ED", "Planned", "Planned", "ED", "ED", "Planned",
"Planned", "ED"),
Lab_key = c("RBC", "RBC", "RBC", "RBC", "WBC", "WBC", "WBC", "WBC",
"Hematocrit", "Hematocrit", "Hematocrit", "Hematocrit",
"Hemoglobin", "Hemoglobin", "Hemoglobin", "Hemoglobin"),
Lab_value = c(4.63449321082268, 3.34968550627897, 4.27037213597765,
4.93897736897793, 8374.22887757195, 7612.37380499927,
8759.27855519425, 6972.28096216548, 36.272693147236,
40.5716317809522, 39.9888624177955, 39.8786884058422,
12.6188444991545, 12.1739747363806, 15.1293426442183,
14.8885696185238)
)
dat_long
Account_ID Age Admission Lab_key Lab_value
1 8001 67.80170 ED RBC 4.634493
2 8002 42.91985 Planned RBC 3.349686
3 8003 46.23018 Planned RBC 4.270372
4 8004 39.66598 ED RBC 4.938977
5 8001 67.80170 ED WBC 8374.228878
6 8002 42.91985 Planned WBC 7612.373805
7 8003 46.23018 Planned WBC 8759.278555
8 8004 39.66598 ED WBC 6972.280962
9 8001 67.80170 ED Hematocrit 36.272693
10 8002 42.91985 Planned Hematocrit 40.571632
11 8003 46.23018 Planned Hematocrit 39.988862
12 8004 39.66598 ED Hematocrit 39.878688
13 8001 67.80170 ED Hemoglobin 12.618844
14 8002 42.91985 Planned Hemoglobin 12.173975
15 8003 46.23018 Planned Hemoglobin 15.129343
16 8004 39.66598 ED Hemoglobin 14.888570
dat_long2wide <- pivot_wider(dat_long,
id_cols = c("Account_ID", "Age", "Admission"),
names_from = "Lab_key",
values_from = "Lab_value")
dat_long2wide
# A tibble: 4 × 7
Account_ID Age Admission RBC WBC Hematocrit Hemoglobin
<dbl> <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
1 8001 67.8 ED 4.63 8374. 36.3 12.6
2 8002 42.9 Planned 3.35 7612. 40.6 12.2
3 8003 46.2 Planned 4.27 8759. 40.0 15.1
4 8004 39.7 ED 4.94 6972. 39.9 14.9
42.2 Wide to long with pivot_longer()
set.seed(2022)
dat_wide <- data.frame(Account_ID = c(8001, 8002, 8003, 8004),
Age = rnorm(4, mean = 57, sd = 12),
RBC = rnorm(4, mean = 4.8, sd = 0.5),
WBC = rnorm(4, mean = 7250, sd = 1500),
Hematocrit = rnorm(4, mean = 40.2, sd = 4),
Hemoglobin = rnorm(4, mean = 13.6, sd = 1.5),
Admission = sample(c("ED", "Planned"), size = 4, replace = TRUE))
dat_wide
Account_ID Age RBC WBC Hematocrit Hemoglobin Admission
1 8001 67.80170 4.634493 8374.229 36.27269 12.61884 ED
2 8002 42.91985 3.349686 7612.374 40.57163 12.17397 Planned
3 8003 46.23018 4.270372 8759.279 39.98886 15.12934 Planned
4 8004 39.66598 4.938977 6972.281 39.87869 14.88857 Planned
dat_wide2long <- pivot_longer(dat_wide,
cols = 3:6,
names_to = "Lab_key",
values_to = "Lab_value")
dat_wide2long
# A tibble: 16 × 5
Account_ID Age Admission Lab_key Lab_value
<dbl> <dbl> <chr> <chr> <dbl>
1 8001 67.8 ED RBC 4.63
2 8001 67.8 ED WBC 8374.
3 8001 67.8 ED Hematocrit 36.3
4 8001 67.8 ED Hemoglobin 12.6
5 8002 42.9 Planned RBC 3.35
6 8002 42.9 Planned WBC 7612.
7 8002 42.9 Planned Hematocrit 40.6
8 8002 42.9 Planned Hemoglobin 12.2
9 8003 46.2 Planned RBC 4.27
10 8003 46.2 Planned WBC 8759.
11 8003 46.2 Planned Hematocrit 40.0
12 8003 46.2 Planned Hemoglobin 15.1
13 8004 39.7 Planned RBC 4.94
14 8004 39.7 Planned WBC 6972.
15 8004 39.7 Planned Hematocrit 39.9
16 8004 39.7 Planned Hemoglobin 14.9
Note that the “Age” and “Admission” columns were also reshaped correctly.