import polars as pl
24 Joins
= pl.DataFrame({
a "PID": list(range(1, 10)),
"Hospital": ["UCSF", "HUP", "Stanford",
"Stanford", "UCSF", "HUP",
"HUP", "Stanford", "UCSF"],
"Age": [22, 34, 41, 19, 53, 21, 63, 22, 19],
"Sex": [1, 1, 0, 1, 0, 0, 1, 0, 0]
})
= pl.DataFrame({
b "PID": list(range(6, 13)),
"V1": [153, 89, 112, 228, 91, 190, 101],
"Department": ["Neurology", "Radiology",
"Emergency", "Cardiology",
"Surgery", "Neurology", "Psychiatry"]
})
24.1 Inner join
Usage: df.join(df, left_on, right_on, on, how, suffix)
how
argument default to “inner”:
= "PID") a.join(b, on
shape: (4, 6)
PID | Hospital | Age | Sex | V1 | Department |
---|---|---|---|---|---|
i64 | str | i64 | i64 | i64 | str |
6 | "HUP" | 21 | 0 | 153 | "Neurology" |
7 | "HUP" | 63 | 1 | 89 | "Radiology" |
8 | "Stanford" | 22 | 0 | 112 | "Emergency" |
9 | "UCSF" | 19 | 0 | 228 | "Cardiology" |
24.2 Outer join
= "PID", how = "outer") a.join(b, on
shape: (12, 6)
PID | Hospital | Age | Sex | V1 | Department |
---|---|---|---|---|---|
i64 | str | i64 | i64 | i64 | str |
1 | "UCSF" | 22 | 1 | null | null |
2 | "HUP" | 34 | 1 | null | null |
3 | "Stanford" | 41 | 0 | null | null |
4 | "Stanford" | 19 | 1 | null | null |
5 | "UCSF" | 53 | 0 | null | null |
6 | "HUP" | 21 | 0 | 153 | "Neurology" |
7 | "HUP" | 63 | 1 | 89 | "Radiology" |
8 | "Stanford" | 22 | 0 | 112 | "Emergency" |
9 | "UCSF" | 19 | 0 | 228 | "Cardiology" |
12 | null | null | null | 101 | "Psychiatry" |
11 | null | null | null | 190 | "Neurology" |
10 | null | null | null | 91 | "Surgery" |
24.3 Left outer join
= "PID", how = "left") a.join(b, on
shape: (9, 6)
PID | Hospital | Age | Sex | V1 | Department |
---|---|---|---|---|---|
i64 | str | i64 | i64 | i64 | str |
1 | "UCSF" | 22 | 1 | null | null |
2 | "HUP" | 34 | 1 | null | null |
3 | "Stanford" | 41 | 0 | null | null |
4 | "Stanford" | 19 | 1 | null | null |
5 | "UCSF" | 53 | 0 | null | null |
6 | "HUP" | 21 | 0 | 153 | "Neurology" |
7 | "HUP" | 63 | 1 | 89 | "Radiology" |
8 | "Stanford" | 22 | 0 | 112 | "Emergency" |
9 | "UCSF" | 19 | 0 | 228 | "Cardiology" |
24.4 Right outer join
There is no how = "right"
option, so switch places of DataFrames:
= "PID", how = "left") b.join(a, on
shape: (7, 6)
PID | V1 | Department | Hospital | Age | Sex |
---|---|---|---|---|---|
i64 | i64 | str | str | i64 | i64 |
6 | 153 | "Neurology" | "HUP" | 21 | 0 |
7 | 89 | "Radiology" | "HUP" | 63 | 1 |
8 | 112 | "Emergency" | "Stanford" | 22 | 0 |
9 | 228 | "Cardiology" | "UCSF" | 19 | 0 |
10 | 91 | "Surgery" | null | null | null |
11 | 190 | "Neurology" | null | null | null |
12 | 101 | "Psychiatry" | null | null | null |