This task produces a table of descriptive statistics similar to Table 3 in Elbers (2021). Except where noted, the statistics are generated using the R scripts included below.
1985 | 1990 | 1995 | 2000 | 2005 | |
---|---|---|---|---|---|
Sample size (in 1000) | 58202 | 61241 | 63777 | 62275 | 60397 |
A. Number of occupations | |||||
Number of occupations | 261 | 260 | 261 | 263 | 263 |
Appearing occupations | 1 | 1 | 2 | 1 | |
Disappearing occupations | 2 | 0 | 0 | 1 | |
B. Labor force participation (%) | |||||
Female | 39 | 40 | 40 | 41 | 42 |
C. Distribution of occupational major groups (%) | |||||
A SPECIALIST AND TECHNICAL WORKERS | 11 | 12 | 13 | 14 | 14 |
B ADMINISTRATIVE AND MANAGERIAL WORKERS | 4 | 4 | 4 | 3 | 2 |
C CLERICAL WORKERS | 18 | 19 | 19 | 20 | 20 |
D SALES WORKERS | 14 | 14 | 15 | 15 | 15 |
E SERVICE WORKERS | 7 | 7 | 8 | 9 | 10 |
F SECURITY WORKERS | 1 | 1 | 1 | 2 | 2 |
G AGRICULTURE, FORESTRY AND FISHERY WORKERS | 9 | 7 | 6 | 5 | 5 |
H TRANSPORT AND COMMUNICATION WORKERS | 4 | 4 | 4 | 4 | 4 |
I MANUFACTURING AND PRODUCTION WORKERS | 20 | 20 | 18 | 17 | 17 |
I STATIONARY ENGINE AND CONSTRUCTION MACHINERY OPERATORS AND ELECTRICITY WORKERS | 2 | 2 | 2 | 2 | 2 |
I MINE WORKERS, CONSTRUCTION WORKERS AND LABOURERS | 9 | 9 | 10 | 10 | 10 |
D. Female labor force by major groups (%) | |||||
A SPECIALIST AND TECHNICAL WORKERS | 40 | 41 | 42 | 43 | 47 |
B ADMINISTRATIVE AND MANAGERIAL WORKERS | 8 | 9 | 10 | 11 | 12 |
C CLERICAL WORKERS | 57 | 61 | 62 | 62 | 61 |
D SALES WORKERS | 37 | 37 | 37 | 36 | 38 |
E SERVICE WORKERS | 63 | 63 | 64 | 65 | 66 |
F SECURITY WORKERS | 3 | 3 | 5 | 5 | 5 |
G AGRICULTURE, FORESTRY AND FISHERY WORKERS | 46 | 45 | 43 | 43 | 41 |
H TRANSPORT AND COMMUNICATION WORKERS | 5 | 5 | 5 | 5 | 4 |
I MANUFACTURING AND PRODUCTION WORKERS | 38 | 37 | 35 | 34 | 32 |
I STATIONARY ENGINE AND CONSTRUCTION MACHINERY OPERATORS AND ELECTRICITY WORKERS | 1 | 1 | 1 | 1 | 1 |
I MINE WORKERS, CONSTRUCTION WORKERS AND LABOURERS | 21 | 23 | 24 | 26 | 27 |
#' DescriptiveStatistics.R
#' 2021-12-15
#' Revised 2022-5-2
#' Adapted from
#' Elbers, Benjamin. A Method for Studying Differences in Segregation Across Time and Space.
#' SocArXiv, 21 Dec. 2018, supplementary materials
library("tidyverse")
library("tidylog", warn.conflicts = FALSE)
library("fst")
library("ggthemes")
library("segregation")
library("cowplot")
library("knitr")
library("kableExtra")
## data is in occgenall
head(occgenall)
#' Harmonized occupation classes to get main group titles
# V02 created 2022-4-19
#' hoc is created in BuildMasterDataset.R.
# make copy to use as Elbers
d <- as_tibble(occgenall) %>%
mutate(w = n,OccSub = as.character(OccSub),
OccMain = as.character(OccMain), OccSub=as.character(OccSub),
OccMid=as.character(OccMid),OccMinor=as.character(OccMinor),
year = as.integer(year)) %>%
filter(OccMinor != 1267) # remove Workers not classifiable by occupation
head(d)
##########################
###### descriptives ######
##########################
samplesize <- d %>% filter(!is.na(n)) %>%
group_by(year) %>%
summarize(n = sum(w)) %>%
mutate(var = "Sample size (in 1000)",
n = round(n / 1000)) %>%
spread(year, n)
occ1985 <- filter(d, year == 1985) %>% pull(OccMinor) %>% unique
occ1990 <- filter(d, year == 1990) %>% pull(OccMinor) %>% unique
occ1995 <- filter(d, year == 1995) %>% pull(OccMinor) %>% unique
occ2000 <- filter(d, year == 2000) %>% pull(OccMinor) %>% unique
occ2005 <- filter(d, year == 2005) %>% pull(OccMinor) %>% unique
common_occ <- intersect(occ1985 , occ1990 ) %>%
intersect(occ1995 ) %>% intersect(occ2000 ) %>%
intersect(occ2005 )
nocc <- tribble(~var, ~`1985`, ~`1990`, ~`1995`, ~`2000`, ~`2005`,
"Number of occupations", length(occ1985),
length(occ1990), length(occ1995), length(occ2000),
length(occ2005),
"Appearing occupations", NA,
length(setdiff(occ1990, occ1985)),
length(setdiff(occ1995, occ1990)),
length(setdiff(occ2000, occ1995)),
length(setdiff(occ2005, occ2000)),
"Disappearing occupations", NA,
length(setdiff(occ1985, occ1990)),
length(setdiff(occ1990, occ1995)),
length(setdiff(occ1995, occ2000)),
length(setdiff(occ2000, occ2005))
)
gender <- d %>% filter(!is.na(w)) %>%
group_by(year, Gender) %>%
summarize(n = sum(w)) %>%
group_by(year) %>%
mutate(p = round(n / sum(n) * 100)) %>%
filter(Gender == "FEMALE") %>%
select(-n) %>%
spread(year, p) %>%
rename(var = Gender)
occ <- d %>% filter(!is.na(w)) %>%
group_by(year, OccMain,OccSub) %>%
summarize(n = sum(w)) %>%
group_by(year) %>%
mutate(p = round(n / sum(n) * 100)) %>%
select(-n) %>%
spread(year, p) %>%
merge(hoc[nchar(hoc$MainTitle)>0,c("OccMain","OccSub","MainTitle")]) %>%
mutate(OccMain = paste(OccMain,MainTitle,sep=" ")) %>%
select(-MainTitle,-OccSub) %>%
rename(var = OccMain)
occ_gender <- d %>% filter(!is.na(w)) %>%
group_by(year, OccMain,OccSub, Gender) %>%
summarize(n = sum(w)) %>%
group_by(year, OccMain,OccSub) %>%
mutate(p = round(n / sum(n) * 100)) %>%
filter(Gender== "FEMALE") %>%
select(-n, -Gender) %>%
spread(year, p) %>%
merge(hoc[nchar(hoc$MainTitle)>0,c("OccMain","OccSub","MainTitle")]) %>%
mutate(OccMain = paste(OccMain,MainTitle,sep=" ")) %>%
select(-MainTitle,-OccSub) %>%
rename(var = OccMain)
desc <- bind_rows(samplesize, nocc, gender, occ, occ_gender)
desc[5, "var"] <- "Female"
names(desc)[[1]] <- ""
options(knitr.kable.NA = "")
kable(desc, "latex", booktabs = T) %>%
kable_styling(latex_options = c("striped", "condensed"),
position = "center") %>%
row_spec(0, align = "c") %>%
group_rows("A. Number of occupations", 2, 4) %>%
group_rows("B. Labor force participation (%)", 5, 5) %>%
group_rows("C. Distribution of occupational major groups (%)", 6, 16) %>%
group_rows("D. Female labor force by major groups (%)", 17, 27) %>%
cat(file = "Output/desc.tex", sep = "\n")
kable(desc, "html", booktabs = T) %>%
kable_styling(latex_options = c("striped", "condensed"),
position = "center") %>%
row_spec(0, align = "c") %>%
group_rows("A. Number of occupations", 2, 4) %>%
group_rows("B. Labor force participation (%)", 5, 5) %>%
group_rows("C. Distribution of occupational major groups (%)", 6, 16) %>%
group_rows("D. Female labor force by major groups (%)", 17, 27) %>%
cat(file = "Output/desc.html", sep = "\n")