What’s in a Name?

babynames %>% 
    filter(year >= 1980 & sex == 'M')
# A tibble: 428,872 x 5
    year sex   name            n   prop
   <dbl> <chr> <chr>       <int>  <dbl>
 1  1980 M     Michael     68693 0.0370
 2  1980 M     Christopher 49092 0.0265
 3  1980 M     Jason       48173 0.0260
 4  1980 M     David       41923 0.0226
 5  1980 M     James       39325 0.0212
 6  1980 M     Matthew     37860 0.0204
 7  1980 M     Joshua      36060 0.0194
 8  1980 M     John        35279 0.0190
 9  1980 M     Robert      34281 0.0185
10  1980 M     Joseph      30202 0.0163
# ... with 428,862 more rows

name_dist <- babynames %>% 
    filter(year >= 1980 & sex == 'M') %>% 
    count(name, wt=prop, sort=TRUE) %>% 
    filter(n > .01) %>% 
    mutate(
        Rank=row_number(),
        Index=rev(row_number())
    )

ggplot(name_dist, aes(x=Index, y=n)) + 
    geom_col() + 
    geom_vline(xintercept=median(name_dist$Index), color='red')

l_names <- babynames %>% 
    filter(year >= 1980 & sex == 'M' & str_detect(name, '^L|[EAIUO]l')) %>% 
    count(name, sort=TRUE, wt=prop) %>% 
    filter(n > .0001) %>% 
    mutate(Rank=row_number(), Index=rev(row_number()))

l_names
# A tibble: 587 x 4
   name           n  Rank Index
   <chr>      <dbl> <int> <int>
 1 Alexander 0.274      1   587
 2 Logan     0.152      2   586
 3 Elijah    0.127      3   585
 4 Luke      0.116      4   584
 5 Lucas     0.112      5   583
 6 Luis      0.102      6   582
 7 Alex      0.0979     7   581
 8 Liam      0.0935     8   580
 9 Landon    0.0711     9   579
10 Levi      0.0599    10   578
# ... with 577 more rows

  • Elliott
  • Lev

ggplot(l_names, aes(x=Index, y=n)) + 
    geom_col() + 
    geom_vline(aes(xintercept=Index, color=name), 
               data=filter(l_names, name %in% c('Lev', 'Elliott')))

Lev

Baby Arrives

🍼💩😴