Last updated on December 10, 2024 pm
Logistic 回归的假设
- 假设1:因变量唯一,且为有序多分类变量
 - 假设2:存在一个或多个自变量,可为连续、有序多分类或无序分类变量。
 - 假设3:自变量之间无多重共线性。
 - 假设4:模型满足“比例优势”假设。
 
选择自变量
1 2 3 4 5
   | paste0('"',paste(colnames(data$training_set), collapse = '", "'),'"') str(data$training_set$INFARCT_RELATED_VESSEL)  independent_variable = c(     "SEX", "AGE", "HEIGHT", "WEIGHT", "BMI", "BSA", "HR", "SBP", "DBP",  )
 
  | 
单因素回归

数据归一化
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
   | normalize <- function(x, mean_, sd_) {     return ((x - mean_) / sd_) } res = data.frame() for (iv in independent_variable){     res[iv, 'VarName'] = iv     if(is.factor(data$training_set[[iv]])){         res[iv, 'mean'] = 0         res[iv, 'sd'] = 0     }else{         res[iv, 'mean'] = mean(data$training_set[[iv]], na.rm = T)         res[iv, 'sd'] = sd(data$training_set[[iv]], na.rm = T)         data$training_set[[iv]] = normalize(data$training_set[[iv]], res[iv, 'mean'], res[iv, 'sd'])     } } res readr::write_csv(x = res, file = '../../data/normalize_train.csv')
 
  | 
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
   | normalize <- function(x, min_, max_) {     return ((x - min_) / (max_ - min_)) } res = data.frame() for (iv in independent_variable){     res[iv, 'VarName'] = iv     if(is.factor(data$training_set[[iv]])){         res[iv, 'min'] = 0         res[iv, 'max'] = 0     }else{         res[iv, 'min'] = min(data$training_set[iv], na.rm = T)         res[iv, 'max'] = max(data$training_set[iv], na.rm = T)         data$training_set[[iv]] = normalize(data$training_set[[iv]], res[iv, 'min'], res[iv, 'max'])     } } res readr::write_csv(x = res, file = '../../data/normalize_train.csv')
 
  | 
Logistic 回归

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
   | res = data.frame() for (iv in independent_variable){     lmf <- formula(paste0("IMH~", iv))     m <- glm(lmf, family=binomial(link="logit"), data=data$training_set)     vns = names(coef(m))     vns = vns[2:length(vns)]      for (vn in vns){         res[vn, 'VarName'] = iv         res[vn, 'VarValue'] = vn         res[vn, 'mean'] = exp(coef(m)[vn])          res[vn, 'lower'] = exp(confint(m)[vn,1])         res[vn, 'upper'] = exp(confint(m)[vn,2])         res[vn, 'Pvalue'] = summary(m)$coefficients[vn, "Pr(>|z|)"]     } } res readr::write_csv(x = res, file = '../../data/test_glm_train.csv')
 
  | 
附加 线性回归
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
   | res = data.frame() for (iv in independent_variable){     lmf <- formula(paste0("HEMO_VOLUME~", iv))     m <- lm(lmf, data$training_set)     vns = names(coef(m))     vns = vns[2:length(vns)]      for (vn in vns){         res[vn, 'VarName'] = iv         res[vn, 'VarValue'] = vn         res[vn, 'mean'] = coef(m)[vn]         res[vn, 'lower'] = confint(m)[vn,1]         res[vn, 'upper'] = confint(m)[vn,2]         res[vn, 'Pvalue'] = summary(m)$coefficients[vn, "Pr(>|t|)"]     } } res readr::write_csv(x = res, file = '../../data/test_lm_train.csv')
 
  | 
多重共线性检测
1 2
   | qr(as.matrix(data$training_set[independent_variable]))$rank == length(independent_variable) kappa(cor(as.matrix(data$training_set[independent_variable])), exact= TRUE) < 100
 
  | 
缺失值插补
多因素回归
绘制森林图
1 2 3
   | df <- readRDS('Logistic.rds') options(repr.plot.width=8, repr.plot.height=6) f_forestplot(df, zero = 1)
 
  | 
【迁移】有序分类Logistic回归
https://hexo.limour.top/ordered-classification-logistic-regression