生长曲线 - Daijiang Li

体重

图中 P1 为1%， P25为25%，以此类推。把鼠标放在图上可以看到原始数据。

身高

头围

WHO的小孩生长数据在这里. 接下来我们把这些数据读入R，然后整理下。

# 读取数据
weight.day.girl = read.table(file = "http://www.who.int/childgrowth/standards/wfa_girls_p_exp.txt",
                        header = TRUE)
weight.day.boy = read.table(file = "http://www.who.int/childgrowth/standards/wfa_boys_p_exp.txt",
                        header = TRUE)

head.circ.day.girl = read.table("http://www.who.int/childgrowth/standards/second_set/hcfa_girls_p_exp.txt",
                                header = TRUE)
head.circ.day.boy= read.table("http://www.who.int/childgrowth/standards/second_set/hcfa_boys_p_exp.txt",
                                header = TRUE)

length.day.girl = read.table("http://www.who.int/childgrowth/standards/lhfa_girls_p_exp.txt",
                             header = TRUE)
length.day.boy = read.table("http://www.who.int/childgrowth/standards/lhfa_boys_p_exp.txt",
                                                                       header = TRUE)

# 数据整理
names(length.day.girl)[1]="Age"
names(length.day.boy)[1]="Age"

weight.day.girl$type = "weight_kg"
weight.day.boy$type = "weight_kg"
head.circ.day.girl$type = "head.circ_cm"
head.circ.day.boy$type = "head.circ_cm"
length.day.girl$type = "length_cm"
length.day.boy$type = "length_cm"

weight.day.girl$gender = "female"
weight.day.boy$gender = "male"
head.circ.day.girl$gender = "female"
head.circ.day.boy$gender = "male"
length.day.girl$gender = "female"
length.day.boy$gender = "male"

wlh.perc = rbind(weight.day.girl,
                 weight.day.boy,
                 head.circ.day.girl,
                 head.circ.day.boy,
                 length.day.girl,
                 length.day.boy)
names(wlh.perc)[1] = "Age_day"
names(wlh.perc) # L: Box-cox power, M: median, S: coef of variance

# 需要的R包
library(reshape2)
library(plyr)
library(dplyr)
library(stringr)

wlh.perc = select(wlh.perc, -c(L, M, S))
wlh.perc.long = melt(wlh.perc, id.vars = c(1, 17, 18), variable.name = "percentile")

Zoey的数据整理。

zoey = read.csv("Zoey_growth.csv", na.string = "0 cm", stringsAsFactors = F)
zoey$Time = as.Date(ldply(str_split(zoey$Time, pattern = ","))[,1], format = "%m/%d/%y")
zoey$day = as.numeric(zoey$Time - as.Date("2014-09-15"))
zoey$Length = as.numeric(str_sub(zoey$Length, 1, -4))
zoey$Head.Size = as.numeric(str_sub(zoey$Head.Size, 1, -4))
pod = as.numeric(str_extract(string = zoey$Weight, pattern = "^[0-9]*"))
ounce = as.numeric(str_extract(str_extract(string = zoey$Weight, pattern = "([0-9]*) oz.$"),
                    pattern = "^[0-9]*"))
ounce[is.na(ounce)] = 0
zoey$weight_kg = pod/2.2046 + ounce/35.274

画图

library(ggplot2); theme_set(theme_bw())

days = 0:60
quantile = c("P1", "P25","P50","P75","P99")

# length
height = ggplot(filter(wlh.perc.long, percentile %in% quantile,
              Age_day %in% days, type == "length_cm", gender == "female"), 
       aes(x = Age_day, y = value)) +
  geom_line(aes(color = percentile)) +
  geom_point(data = na.omit(zoey[c("Length", "day")]), aes( x = day, y = Length))

# head size
headsize = ggplot(filter(wlh.perc.long, percentile %in% quantile,
              Age_day %in% days, type == "head.circ_cm", gender == "female"), 
       aes(x = Age_day, y = value)) +
  geom_line(aes(color = percentile)) +
  geom_point(data = na.omit(zoey[c("Head.Size", "day")]), aes( x = day, y = Head.Size))

# weight_kg
weight = ggplot(filter(wlh.perc.long, percentile %in% quantile,
              Age_day %in% days, type == "weight_kg", gender == "female"), 
       aes(x = Age_day, y = value)) +
  geom_line(aes(color = percentile)) +
  geom_point(data = na.omit(zoey[c("weight_kg", "day")]), aes( x = day, y = weight_kg))

# library(plotly)
# py = plotly()
# py$ggplotly(weight)