library(babynames)
babynames %>%
filter(year >= 1980 & sex == 'M')
# A tibble: 428,872 x 5
year sex name n prop
<dbl> <chr> <chr> <int> <dbl>
1 1980 M Michael 68693 0.0370
2 1980 M Christopher 49092 0.0265
3 1980 M Jason 48173 0.0260
4 1980 M David 41923 0.0226
5 1980 M James 39325 0.0212
6 1980 M Matthew 37860 0.0204
7 1980 M Joshua 36060 0.0194
8 1980 M John 35279 0.0190
9 1980 M Robert 34281 0.0185
10 1980 M Joseph 30202 0.0163
# ... with 428,862 more rows
name_dist <- babynames %>%
filter(year >= 1980 & sex == 'M') %>%
count(name, wt=prop, sort=TRUE) %>%
filter(n > .01) %>%
mutate(
Rank=row_number(),
Index=rev(row_number())
)
ggplot(name_dist, aes(x=Index, y=n)) +
geom_col() +
geom_vline(xintercept=median(name_dist$Index), color='red')
l_names <- babynames %>%
filter(year >= 1980 & sex == 'M' & str_detect(name, '^L|[EAIUO]l')) %>%
count(name, sort=TRUE, wt=prop) %>%
filter(n > .0001) %>%
mutate(Rank=row_number(), Index=rev(row_number()))
l_names
# A tibble: 587 x 4 name n Rank Index <chr> <dbl> <int> <int> 1 Alexander 0.274 1 587 2 Logan 0.152 2 586 3 Elijah 0.127 3 585 4 Luke 0.116 4 584 5 Lucas 0.112 5 583 6 Luis 0.102 6 582 7 Alex 0.0979 7 581 8 Liam 0.0935 8 580 9 Landon 0.0711 9 579 10 Levi 0.0599 10 578 # ... with 577 more rows
ggplot(l_names, aes(x=Index, y=n)) +
geom_col() +
geom_vline(aes(xintercept=Index, color=name),
data=filter(l_names, name %in% c('Lev', 'Elliott')))
Lev
🍼💩😴