Descriptive statistics

Alan Engel
Last update: 2 May 2022

This task produces a table of descriptive statistics similar to Table 3 in Elbers (2021). Except where noted, the statistics are generated using the R scripts included below.

1985 1990 1995 2000 2005
Sample size (in 1000) 58202 61241 63777 62275 60397
A. Number of occupations
Number of occupations 261 260 261 263 263
Appearing occupations 1 1 2 1
Disappearing occupations 2 0 0 1
B. Labor force participation (%)
Female 39 40 40 41 42
C. Distribution of occupational major groups (%)
A SPECIALIST AND TECHNICAL WORKERS 11 12 13 14 14
B ADMINISTRATIVE AND MANAGERIAL WORKERS 4 4 4 3 2
C CLERICAL WORKERS 18 19 19 20 20
D SALES WORKERS 14 14 15 15 15
E SERVICE WORKERS 7 7 8 9 10
F SECURITY WORKERS 1 1 1 2 2
G AGRICULTURE, FORESTRY AND FISHERY WORKERS 9 7 6 5 5
H TRANSPORT AND COMMUNICATION WORKERS 4 4 4 4 4
I MANUFACTURING AND PRODUCTION WORKERS 20 20 18 17 17
I STATIONARY ENGINE AND CONSTRUCTION MACHINERY OPERATORS AND ELECTRICITY WORKERS 2 2 2 2 2
I MINE WORKERS, CONSTRUCTION WORKERS AND LABOURERS 9 9 10 10 10
D. Female labor force by major groups (%)
A SPECIALIST AND TECHNICAL WORKERS 40 41 42 43 47
B ADMINISTRATIVE AND MANAGERIAL WORKERS 8 9 10 11 12
C CLERICAL WORKERS 57 61 62 62 61
D SALES WORKERS 37 37 37 36 38
E SERVICE WORKERS 63 63 64 65 66
F SECURITY WORKERS 3 3 5 5 5
G AGRICULTURE, FORESTRY AND FISHERY WORKERS 46 45 43 43 41
H TRANSPORT AND COMMUNICATION WORKERS 5 5 5 5 4
I MANUFACTURING AND PRODUCTION WORKERS 38 37 35 34 32
I STATIONARY ENGINE AND CONSTRUCTION MACHINERY OPERATORS AND ELECTRICITY WORKERS 1 1 1 1 1
I MINE WORKERS, CONSTRUCTION WORKERS AND LABOURERS 21 23 24 26 27
#' DescriptiveStatistics.R
#' 2021-12-15
#' Revised 2022-5-2

#' Adapted from
#' Elbers, Benjamin. A Method for Studying Differences in Segregation Across Time and Space. 
#' SocArXiv, 21 Dec. 2018, supplementary materials

library("tidyverse")
library("tidylog", warn.conflicts = FALSE)
library("fst")
library("ggthemes")
library("segregation")
library("cowplot")
library("knitr")
library("kableExtra")

## data is in occgenall
head(occgenall)
#' Harmonized occupation classes to get main group titles
# V02 created 2022-4-19
#' hoc is created in BuildMasterDataset.R.

# make copy to use as Elbers
d <- as_tibble(occgenall) %>%
    mutate(w = n,OccSub = as.character(OccSub),
    OccMain = as.character(OccMain), OccSub=as.character(OccSub),
    OccMid=as.character(OccMid),OccMinor=as.character(OccMinor),
    year = as.integer(year)) %>%
    filter(OccMinor != 1267) # remove Workers not classifiable by occupation
head(d)

##########################
###### descriptives ######
##########################

samplesize <- d %>% filter(!is.na(n)) %>%
    group_by(year) %>%
    summarize(n = sum(w)) %>%
    mutate(var = "Sample size (in 1000)",
        n = round(n / 1000)) %>%
    spread(year, n)

occ1985 <- filter(d, year == 1985) %>% pull(OccMinor) %>% unique
occ1990 <- filter(d, year == 1990) %>% pull(OccMinor) %>% unique
occ1995 <- filter(d, year == 1995) %>% pull(OccMinor) %>% unique
occ2000 <- filter(d, year == 2000) %>% pull(OccMinor) %>% unique
occ2005 <- filter(d, year == 2005) %>% pull(OccMinor) %>% unique
common_occ <- intersect(occ1985 , occ1990 ) %>%
    intersect(occ1995 ) %>% intersect(occ2000 ) %>% 
    intersect(occ2005 )

nocc <- tribble(~var, ~`1985`, ~`1990`, ~`1995`, ~`2000`, ~`2005`,
    "Number of occupations", length(occ1985),
        length(occ1990), length(occ1995), length(occ2000),
    length(occ2005),
    "Appearing occupations", NA,
        length(setdiff(occ1990, occ1985)),
        length(setdiff(occ1995, occ1990)),
        length(setdiff(occ2000, occ1995)),
        length(setdiff(occ2005, occ2000)),
    "Disappearing occupations", NA,
        length(setdiff(occ1985, occ1990)),
        length(setdiff(occ1990, occ1995)),
        length(setdiff(occ1995, occ2000)),
        length(setdiff(occ2000, occ2005))
    )

gender <- d %>% filter(!is.na(w)) %>%
    group_by(year, Gender) %>%
    summarize(n = sum(w)) %>%
    group_by(year) %>%
    mutate(p = round(n / sum(n) * 100)) %>%
    filter(Gender == "FEMALE") %>%
    select(-n) %>%
    spread(year, p) %>%
    rename(var = Gender)

occ <- d %>% filter(!is.na(w)) %>%
    group_by(year, OccMain,OccSub) %>%
    summarize(n = sum(w)) %>%
    group_by(year) %>%
    mutate(p = round(n / sum(n) * 100)) %>%
    select(-n) %>%
    spread(year, p) %>%
    merge(hoc[nchar(hoc$MainTitle)>0,c("OccMain","OccSub","MainTitle")]) %>%
    mutate(OccMain = paste(OccMain,MainTitle,sep=" "))  %>%
    select(-MainTitle,-OccSub) %>%
    rename(var = OccMain)

occ_gender <- d %>% filter(!is.na(w)) %>%
    group_by(year, OccMain,OccSub, Gender) %>%
    summarize(n = sum(w)) %>%
    group_by(year, OccMain,OccSub) %>%
    mutate(p = round(n / sum(n) * 100)) %>%
    filter(Gender== "FEMALE") %>%
    select(-n, -Gender) %>%
    spread(year, p) %>%
    merge(hoc[nchar(hoc$MainTitle)>0,c("OccMain","OccSub","MainTitle")]) %>%
    mutate(OccMain = paste(OccMain,MainTitle,sep=" "))  %>%
    select(-MainTitle,-OccSub) %>%
    rename(var = OccMain)

desc <- bind_rows(samplesize, nocc, gender, occ, occ_gender)
desc[5, "var"] <- "Female"
names(desc)[[1]] <- ""

options(knitr.kable.NA = "")
kable(desc, "latex", booktabs = T) %>%
    kable_styling(latex_options = c("striped", "condensed"),
        position = "center") %>%
    row_spec(0, align = "c") %>%
    group_rows("A. Number of occupations", 2, 4) %>%
    group_rows("B. Labor force participation (%)", 5, 5) %>%
    group_rows("C. Distribution of occupational major groups (%)", 6, 16) %>%
    group_rows("D. Female labor force by major groups (%)", 17, 27) %>%
    cat(file = "Output/desc.tex", sep = "\n")

kable(desc, "html", booktabs = T) %>%
    kable_styling(latex_options = c("striped", "condensed"),
        position = "center") %>%
    row_spec(0, align = "c") %>%
    group_rows("A. Number of occupations", 2, 4) %>%
    group_rows("B. Labor force participation (%)", 5, 5) %>%
    group_rows("C. Distribution of occupational major groups (%)", 6, 16) %>%
    group_rows("D. Female labor force by major groups (%)", 17, 27) %>%
    cat(file = "Output/desc.html", sep = "\n")

References

Elbers, Benjamin (2021). A Method for Studying Differences in Segregation Across Time and Space. Sociological Methods & Research. https://doi.org/10.1177/0049124121986204