Skip to contents
library(arrow)
library(data.table)
library(lme4)
library(lifeTableProcessing)

cfg <- yaml::read_yaml("~/lifeTableProcessing.yml")

Some methods of extending a life table to higher open age intervals (such as iterative methods) require an initial set of life table parameters at these older ages. One method of getting this information is by applying regression parameters calculated from a known quality set of life tables with data out to the desired open age already available. Here, we use life tables from HMD to estimate a set of regression parameters that can be used to extend \(q_x\) and \(a_x\) to higher age groups.

Preparing the Data

First, we must load HMD life table data

id_vars <- c("hmd_loc_id", "year", "sex")

hmd_lts <- read_parquet(
  fs::path(cfg$dir_proj, "data/prepped/hmd_lifetables_5x1.parquet"),
  col_select = c(all_of(id_vars), "age_start", "age_end", "mx", "ax", "qx")
)

Then we can subset to the life tables we want to include in this analysis, dropping location-years that should not be used to inform others

hmd_lts <- hmd_lts[
  year >= 1950 &
    !(hmd_loc_id == "TWN" & year < 1980) &
    !(hmd_loc_id == "EST" & year > 2000) &
    !(hmd_loc_id %in% c("UKR", "BLR", "BGR") & year < 1970) &
    !(hmd_loc_id %in% c("LVA", "LTV") & year %between% c(1958, 1970)) &
    !(hmd_loc_id == "IRL" & year %between% c(1949, 1986)) &
    !(hmd_loc_id == "PRT" & year %between% c(1939, 1971)) &
    !(hmd_loc_id == "RUS" & year %between% c(1958, 1970)) &
    !(hmd_loc_id == "SVK" & year %between% c(1949, 1962)) &
    !(hmd_loc_id == "ESP" & year %between% c(1907, 1961))
]

Finally, we create computed variable for use in the different regressions

hmd_lts[, mx_sq := mx^2]
hmd_lts[, qx_sq := qx^2]

hmd_lts[, qx_logit := demUtils::logit(qx)]

hmd_lts[
  j = `:=`(
    qx_logit_diff_to = qx_logit - shift(qx_logit, type = "lag"),
    qx_logit_diff_from = shift(qx_logit, type = "lead") - qx_logit
  ),
  by = id_vars
]

Regressions

Extend \(q_x\) Using Logit Differences

First we get the \(\text{logit}(q_x)\) differences from the prior and to the next age group for age groups starting between 65 and 100:

model_formula <- qx_logit_diff_from ~ qx_logit_diff_to + (1 | hmd_loc_id)

hmd_extension_qx_diff <- hmd_lts[
  age_start %between% c(65, 100) &
    !is.na(qx_logit_diff_to) & 
    !is.na(qx_logit_diff_from),
  j = .(fit = list(lme4::lmer(formula = model_formula, data = .SD))),
  by = .(sex, age_start, age_end)
]

hmd_extension_qx_diff[, `:=`(
  intercept = unlist(lapply(fit, \(f) fixef(f)[["(Intercept)"]])),
  slope = unlist(lapply(fit, \(f) fixef(f)[["qx_logit_diff_to"]])),
  fit = NULL
)]
sex age_start age_end intercept slope
male 65 70 0.4957160 -0.0085085
male 70 75 0.6151890 -0.1566576
male 75 80 0.8199848 -0.4061215
male 80 85 0.6258410 0.0611741
male 85 90 0.2197281 0.7651048
male 90 95 0.3495559 0.5630764
male 95 100 0.1089061 0.8854771
male 100 105 0.0530741 0.8665673
female 65 70 0.5195930 0.1026040
female 70 75 0.6582319 -0.0456473
female 75 80 1.0487815 -0.5668065
female 80 85 0.6038315 0.1792445
female 85 90 0.0783169 0.9446547
female 90 95 0.2523441 0.7727185
female 95 100 -0.0099588 1.0515176
female 100 105 0.0079646 0.9375573

Extend \(a_x\) using \(m_x\)

Here we fit a model:

\[ a_x \sim 1 + m_x + (m_x)^2 \]

for age groups starting between 65 and 105:

hmd_extension_ax_mx <- hmd_lts[
  age_start %between% c(65, 105),
  j = .(fit = list(lm(ax ~ mx + mx_sq, data = .SD))),
  by = .(sex, age_start, age_end)
]

hmd_extension_ax_mx[, `:=`(
  par_con = unlist(lapply(fit, \(f) coef(f)[["(Intercept)"]])),
  par_mx = unlist(lapply(fit, \(f) coef(f)[["mx"]])),
  par_smx = unlist(lapply(fit, \(f) coef(f)[["mx_sq"]])),
  fit = NULL
)]
sex age_start age_end par_con par_mx par_smx
male 65 70 2.655787 0.6748368 -60.2064445
male 70 75 2.713122 -2.5422057 -0.9346173
male 75 80 2.725458 -2.1149003 -5.0620852
male 80 85 2.827550 -3.6335344 3.6484886
male 85 90 2.965212 -4.6709875 5.6457705
male 90 95 2.718517 -2.2618119 0.0653742
male 95 100 2.475736 -1.2530734 -0.8679438
male 100 105 2.468930 -1.6268839 -0.0702455
male 105 110 2.499842 -1.9481037 0.3692467
female 65 70 2.644214 4.9605623 -188.6903274
female 70 75 2.720471 -1.7708118 -3.5943329
female 75 80 2.761720 -2.4564156 -3.1850617
female 80 85 2.833773 -3.4674317 2.7894367
female 85 90 2.949345 -4.6150126 6.0516197
female 90 95 2.810583 -2.7892982 0.8182935
female 95 100 2.508941 -1.0877904 -1.4234516
female 100 105 2.469973 -1.4549882 -0.3732124
female 105 110 2.510196 -1.9088747 0.2926738

Extend \(a_x\) using \(q_x\)

Here we fit a model:

\[ a_x \sim 1 + q_x + (q_x)^2 \]

for age groups starting between 65 and 105:

hmd_extension_ax_qx <- hmd_lts[
  age_start %between% c(65, 105),
  j = .(fit = list(lm(ax ~ qx + qx_sq, data = .SD))),
  by = .(sex, age_start, age_end)
]

hmd_extension_ax_qx[, `:=`(
  par_con = unlist(lapply(fit, \(f) coef(f)[["(Intercept)"]])),
  par_qx = unlist(lapply(fit, \(f) coef(f)[["qx"]])),
  par_sqx = unlist(lapply(fit, \(f) coef(f)[["qx_sq"]])),
  fit = NULL
)]
sex age_start age_end par_con par_qx par_sqx
male 65 70 2.6468478 0.3383540 -3.6882332
male 70 75 2.7166409 -0.5410589 -0.2611817
male 75 80 2.7293043 -0.3966216 -0.6727435
male 80 85 2.8687304 -0.8953252 -0.0751920
male 85 90 3.0153298 -1.0983996 -0.1156262
male 90 95 1.3570536 3.7462535 -3.6245372
male 95 100 -0.0174649 7.2072049 -5.8022013
male 100 105 -0.8555146 9.6571065 -7.5077644
male 105 110 -2.0107408 13.0535022 -9.8509814
female 65 70 2.6309579 1.4170484 -10.7048077
female 70 75 2.7252613 -0.4328476 -0.0668846
female 75 80 2.7672970 -0.5260225 -0.4021927
female 80 85 2.8447469 -0.7368805 -0.2243046
female 85 90 2.9569706 -0.9531129 -0.1697015
female 90 95 2.1848362 1.6193808 -2.2537524
female 95 100 0.0478187 7.1664051 -5.8188904
female 100 105 -0.8859450 9.6975049 -7.4943327
female 105 110 -2.0428305 13.0788411 -9.8299945