In [63]:
import pandas as pd
import re
import matplotlib.pyplot as plt
import seaborn
import numpy as np
In [64]:
plt.rc('figure', figsize=(16, 6))
In [65]:
%matplotlib inline
In [66]:
summary = pd.read_csv("atusdata/atussum_2013/atussum_2013.dat")
summary.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 11385 entries, 0 to 11384
Columns: 413 entries, tucaseid to t500107
dtypes: float64(1), int64(412)
memory usage: 36.0 MB
In [67]:
summary.head()
Out[67]:
tucaseid TUFINLWGT TRYHHCHILD TEAGE TESEX PEEDUCA PTDTRACE PEHSPNON GTMETSTA TELFS ... t181501 t181599 t181601 t181801 t189999 t500101 t500103 t500105 t500106 t500107
0 20130101130004 11899905.662034 12 22 2 40 8 2 1 5 ... 0 0 0 0 0 0 0 0 0 0
1 20130101130112 4447638.009513 1 39 1 43 1 2 1 1 ... 0 0 0 0 0 0 0 0 0 0
2 20130101130123 10377056.507734 -1 47 2 40 1 2 1 4 ... 25 0 0 0 0 0 0 0 0 0
3 20130101130611 7731257.992805 -1 50 2 40 1 1 1 1 ... 0 0 0 0 0 0 0 0 0 0
4 20130101130616 4725269.227067 -1 45 2 40 2 2 1 1 ... 0 0 0 0 0 0 0 0 0 0

5 rows × 413 columns

In [68]:
summary =summary.replace(-1, np.nan)
Setting up some initial criteria from the larger sample:
In [69]:
adults_crit = (summary.TEAGE >= 18)
no_children_crit = (summary.TRCHILDNUM == 0)
men_crit = (summary.TESEX == 1)
has_partner = (summary.TRSPPRES <= 2)
has_job = (summary.TELFS <= 2)
partner_has_job = (summary.TESPEMPNOT == 1)
I want to take a look at how much time men and women are spending on household tasks. I'm looking to compare adults with no children to adults with children. I want to see how time spent on household tasks changes for people when they have children. I also want to analyze the relationship between the sexes - a lot of these tasks are (traditionally, conservatively, etc) "women's work" and I wanted to see whether there is more or less gender equality when there are children in the home.
In [70]:
adults_with_no_children = summary[adults_crit & no_children_crit & has_partner]
adults_with_children = summary[adults_crit & ~no_children_crit & has_partner]
men_with_no_children = summary[adults_crit & no_children_crit & men_crit & has_partner]
men_with_children = summary[adults_crit & ~no_children_crit & men_crit & has_partner]
women_with_no_children = summary[adults_crit & no_children_crit & ~men_crit & has_partner]
women_with_children = summary[adults_crit & ~no_children_crit & ~men_crit & has_partner]
In [71]:
def activity_columns(data, activity_code):
    """For the activity code given, return all columns that fall under that activity."""
    col_prefix = "t{}".format(activity_code)
    return [column for column in data.columns if re.match(col_prefix, column)]
In [72]:
def average_minutes(data, activity_code):
    cols = activity_columns(data, activity_code)
    activity_data = data[cols]
    activity_sums = activity_data.sum(axis=1)
    data = data[['TUFINLWGT']]
    data['minutes'] = activity_sums
    data = data.rename(columns={"TUFINLWGT": "weight"})
    data['weighted_minutes'] = data.weight * data.minutes
    return round((data.weighted_minutes.sum() / data.weight.sum()), 2)
In [73]:
all_adult_with_kids_housework = average_minutes(adults_with_children, "02")
In [74]:
all_adult_no_kids_housework = average_minutes(adults_with_no_children, "02")
In [75]:
def avg_getter(idgroup, codelist):
    """Returns a list of average minutes for each coded activity in the codelist for the idgroup"""
    particulars = []
    for x in codelist:
        particulars.append(average_minutes(idgroup, x))
    return particulars
In [76]:
household_dict = {"0201":"Housework", "0202":"Food & Drink Prep", "0203":"Interior Maintenance", "0204":"Exterior Maintenance",
                  "0205":"Lawn, Garden and Plant Care", "020901":"Financial Management"}
In [77]:
household_list = ["0201", "0202", "0203", "0204", "0205", "020901"]
In [78]:
labels = ["Housework", "Food & Drink Prep", "Interior Maintentance", "Exterior Maintenance",
          "Lawn, Garden and Plant Care", "Financial Management", "Other"]
In [79]:
def household_breakdown(idgroup):
    all_household = average_minutes(idgroup, "02")
    particulars = avg_getter(idgroup, household_list)
    other = all_household - sum(particulars)
    particulars.append(other)
    return particulars
In [80]:
x =household_breakdown(men_with_children)
print(x)
[15.76, 24.940000000000001, 4.5599999999999996, 4.5199999999999996, 12.050000000000001, 1.1000000000000001, 16.270000000000003]
In [81]:
mwc_all_household = average_minutes(men_with_children, "02")
mwc_house = household_breakdown(men_with_children)
In [82]:
wwc_all_household = average_minutes(women_with_children, "02")
wwc_house = household_breakdown(women_with_children)
In [83]:
mnc_all_household = average_minutes(men_with_no_children, "02")
mnc_house = household_breakdown(men_with_no_children)
In [84]:
wnc_all_household = average_minutes(women_with_no_children, "02")
wnc_house = household_breakdown(women_with_no_children)
In [85]:
total_housework = pd.Series([mnc_all_household, mwc_all_household, wnc_all_household, wwc_all_household], index=(
    ["Men w/out Children", "Men w/ Children", "Women w/out Children", "Women w/ Children"]))
Let's start out with a basic look at how much total housework men and women do, with and without children. This encompasses all of the activities under the "02" section of the ATUS summary document - "Household Activities." Clearly, some of our assumptions about the existence of "gender roles" in America are visible. Women without children spend around 1.5 times as much time on household activities as men without children. Women with children spend almost twice as much time on household activities as men with children.
Having children in the house actually results in about a 20% decrease in the time that men spend on household activities. The time that women spend stays almost exactly the same, although we'll see later that the breakdown of which household activities they are engaged in changes significantly.
In [86]:
total_housework.plot(kind="bar", figsize=(15,5))
total_df = pd.DataFrame(total_housework)
total_df.columns = ["Minutes"]
print(total_df)
plt.show()
                      Minutes
Men w/out Children     102.78
Men w/ Children         79.20
Women w/out Children   155.92
Women w/ Children      156.34
In [87]:
ratio_men = mnc_all_household / all_adult_no_kids_housework
ratio_women = wnc_all_household / all_adult_no_kids_housework
ratio_men_kids = mwc_all_household / all_adult_with_kids_housework
ratio_women_kids = wwc_all_household /all_adult_with_kids_housework
In [88]:
ratio_men + ratio_women
Out[88]:
1.9933734011403912
In [89]:
ratio_men_kids + ratio_women_kids
Out[89]:
2.0190296588376482
I wanted to look at how things break down when we take a more detailed look at the household tasks on which people are spending their time. I looked at 6 categories: housework(includes interior cleaning, picking up, laundry), food and drink preparation, interior maintenance, exterior maintenance, lawn and garden care, and financial management. I wanted to make sure I looked at some activities that weren't quite as historically gendered as cooking or cleaning. I also tracked an "other" category that consisted simply of all other time that individuals of each group spent on household activities.
In [89]:
 
In [90]:
mwc = pd.Series(mwc_house, index=labels)
mnc = pd.Series(mnc_house, index=labels)
wwc = pd.Series(wwc_house, index=labels)
wnc = pd.Series(wnc_house, index=labels)

housework_df = pd.DataFrame(mnc, index=labels)

housework_df.columns = ["Men w/out Children"]
housework_df['Men w/ Children'] = mwc
housework_df['Women w/out Children'] = wnc
housework_df['Women w/ Children'] = wwc
housework_df
Out[90]:
Men w/out Children Men w/ Children Women w/out Children Women w/ Children
Housework 14.20 15.76 57.24 66.46
Food & Drink Prep 20.57 24.94 52.13 66.72
Interior Maintentance 8.26 4.56 4.41 2.24
Exterior Maintenance 7.19 4.52 2.88 0.92
Lawn, Garden and Plant Care 26.39 12.05 11.37 5.04
Financial Management 1.93 1.10 3.54 1.08
Other 24.24 16.27 24.35 13.88
In [91]:
housework_df.plot(kind="bar", figsize=(18,10))
plt.show()
In [92]:
housework_df.plot(figsize=(18,6))
plt.show()
The data below provides a really interesting look at how things change when children are added to the equation. For men and women, housework and food/drink prep increase. For women, they increase substantially. In all other categories, they spend less time after they have children. When there are children in the house, people focus on what's absolutely necessary: picking up the house and feeding the family. This dynamic is even more evident in women.
In [93]:
housework_df["Child Diff Men"] = housework_df["Men w/ Children"] - housework_df["Men w/out Children"]
housework_df["Child Diff Women"] = housework_df["Women w/ Children"] - housework_df["Women w/out Children"]
housework_df
Out[93]:
Men w/out Children Men w/ Children Women w/out Children Women w/ Children Child Diff Men Child Diff Women
Housework 14.20 15.76 57.24 66.46 1.56 9.22
Food & Drink Prep 20.57 24.94 52.13 66.72 4.37 14.59
Interior Maintentance 8.26 4.56 4.41 2.24 -3.70 -2.17
Exterior Maintenance 7.19 4.52 2.88 0.92 -2.67 -1.96
Lawn, Garden and Plant Care 26.39 12.05 11.37 5.04 -14.34 -6.33
Financial Management 1.93 1.10 3.54 1.08 -0.83 -2.46
Other 24.24 16.27 24.35 13.88 -7.97 -10.47
In [94]:
housework_df[["Child Diff Men", "Child Diff Women"]].plot(kind="bar", figsize=(15,6))
plt.axhline(y=0, color="black")
plt.show()
In [95]:
women_chart = housework_df[["Women w/out Children", "Women w/ Children", "Child Diff Women"]]
women_chart
Out[95]:
Women w/out Children Women w/ Children Child Diff Women
Housework 57.24 66.46 9.22
Food & Drink Prep 52.13 66.72 14.59
Interior Maintentance 4.41 2.24 -2.17
Exterior Maintenance 2.88 0.92 -1.96
Lawn, Garden and Plant Care 11.37 5.04 -6.33
Financial Management 3.54 1.08 -2.46
Other 24.35 13.88 -10.47
In [96]:
wnc.plot(kind="pie", figsize=(8,8), colors = ['b', 'g', 'white', 'orange', 'grey', 'red', 'tan'], autopct='%.2f')
plt.title("Division of household tasks for women with no children")
plt.show()
#print("")
wwc.plot(kind="pie", figsize=(8,8), colors = ['b', 'g', 'white', 'orange', 'grey', 'red', 'tan'], autopct='%.2f')
plt.title("Division of household tasks for women with children")
plt.show()
Let's even the playing field a bit. Let's look at men and women who both work, and how their share of household tasks changes with children.
In [97]:
working_male = summary[adults_crit & no_children_crit & men_crit & has_partner & has_job & partner_has_job]
working_male_kids = summary[adults_crit & ~no_children_crit & men_crit & has_partner & has_job & partner_has_job]
working_female = summary[adults_crit & no_children_crit & ~men_crit & has_partner & has_job & partner_has_job]
working_female_kids = summary[adults_crit & ~no_children_crit & ~men_crit & has_partner & has_job & partner_has_job]
In [98]:
wm_all_household = average_minutes(working_male, "02")
wm_house = household_breakdown(working_male)
In [99]:
wmk_all_household = average_minutes(working_male_kids, "02")
wmk_house = household_breakdown(working_male_kids)
In [100]:
wf_all_household = average_minutes(working_female, "02")
wf_house = household_breakdown(working_female)
In [101]:
wfk_all_household = average_minutes(working_female_kids, "02")
wfk_house = household_breakdown(working_female_kids)
In [102]:
total_working_housework = pd.Series([wm_all_household, wmk_all_household, wf_all_household, wfk_all_household], index=(
    ["Men w/out Children", "Men w/ Children", "Women w/out Children", "Women w/ Children"]))
In [103]:
total_working_housework.plot(kind="bar", figsize=(15,5))
total_working_df = pd.DataFrame(total_working_housework)
total_working_df.columns = ["Minutes"]
print(total_working_df)
plt.title("Minutes per day spent on household activities in families where both parents are employed")
plt.show()
                      Minutes
Men w/out Children      90.85
Men w/ Children         78.73
Women w/out Children   125.47
Women w/ Children      132.35
We can see that when both partners are working, there's a general decrease across the board in terms of total time spent on housework. The exception is men with children, who contrbute the same amount, whether or not both partners are working.
In [104]:
all_total_df = pd.DataFrame(total_housework)
all_total_df.columns = ["Total Housework"]
all_total_df["Total Housework, Both Working"] = total_working_housework
all_total_df.plot(kind="bar", figsize=(15,5))
plt.show()
In [105]:
mwc = pd.Series(mwc_house, index=labels)
mnc = pd.Series(mnc_house, index=labels)
wwc = pd.Series(wwc_house, index=labels)
wnc = pd.Series(wnc_house, index=labels)

wm = pd.Series(wm_house, index=labels)
wmk = pd.Series(wmk_house, index=labels)
wf = pd.Series(wf_house, index=labels)
wfk = pd.Series(wfk_house, index=labels)
large_housework_df = pd.DataFrame(mnc, index=labels)
large_housework_df.columns = ["Men w/out Children"]
large_housework_df['Men w/ Children'] = mwc
large_housework_df['Both Working, Men w/out Children'] = wmk
large_housework_df['Both Working, Men w/ Children'] = wm
large_housework_df['Women w/out Children'] = wnc
large_housework_df['Women w/ Children'] = wwc
large_housework_df['Both Working, Women w/out Children'] = wf
large_housework_df['Both Working, Women w/ Children'] = wfk

large_housework_df
Out[105]:
Men w/out Children Men w/ Children Both Working, Men w/out Children Both Working, Men w/ Children Women w/out Children Women w/ Children Both Working, Women w/out Children Both Working, Women w/ Children
Housework 14.20 15.76 13.81 12.78 57.24 66.46 48.26 53.76
Food & Drink Prep 20.57 24.94 27.04 17.96 52.13 66.72 39.42 55.63
Interior Maintentance 8.26 4.56 4.42 11.16 4.41 2.24 5.44 2.13
Exterior Maintenance 7.19 4.52 6.33 4.39 2.88 0.92 2.76 1.11
Lawn, Garden and Plant Care 26.39 12.05 10.25 21.19 11.37 5.04 7.02 4.72
Financial Management 1.93 1.10 0.75 2.15 3.54 1.08 1.70 0.86
Other 24.24 16.27 16.13 21.22 24.35 13.88 20.87 14.14
In [106]:
large_housework_df.plot(kind="bar", figsize=(18,10))
plt.show()
In [107]:
large_housework_df.plot(figsize=(18,10))
plt.annotate("A", (.5,57))
plt.annotate("B", (2,12))
plt.annotate("C", (5,6))
plt.show()
Some "interesting" notes: (letters reference annotated points on the above graph) A. It's interesting to see where lines cross on this graph. Here we see how available time is most important for time spent doing housework, but when it comes to food and drink prep, both categories of women with children spend the most amount of time. B. This point of working family men with children is an interesting outlier. The data shows that they spend significantly more time doing interior maintenance than any of the other groups visualized above. C. This is an interesting point. Women without children where both parents aren't working do the most financial management of any of the groups charted above. This is interesting, as women with children do much less of the financial management
I tried to merge in the cpsdata to see how things differed by region. But the data I was getting back didn't make any sense.
In [108]:
cpsdata = pd.read_csv("atusdata/atuscps_2013/atuscps_2013.dat")
In [109]:
#cpsdata.head()
In [110]:
cpsdata = cpsdata.rename(columns={'TUCASEID': 'tucaseid'})
In [111]:
#cpsdata.head()
In [112]:
merged = pd.merge(summary, cpsdata, left_on="tucaseid", right_on="tucaseid")
merged.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 29871 entries, 0 to 29870
Columns: 787 entries, tucaseid to PRDASIAN
dtypes: float64(13), int64(774)
memory usage: 179.6 MB
In [113]:
#merged.head()
In [114]:
xadults_crit = (merged.TEAGE >= 18)
xno_children_crit = (merged.TRCHILDNUM == 0)
xmen_crit = (merged.TESEX == 1)
xhas_partner = (merged.TRSPPRES <= 2)
xhas_job = (merged.TELFS <= 2)
xpartner_has_job = (merged.TESPEMPNOT == 1)
northeast = (merged.GEREG == 1)
midwest = (merged.GEREG == 2)
south = (merged.GEREG == 3)
west = (merged.GEREG == 4)
In [115]:
xworking_male = summary[xadults_crit & xno_children_crit & xmen_crit & xhas_partner & xhas_job & xpartner_has_job]
xworking_male_kids = summary[xadults_crit & ~xno_children_crit & xmen_crit & xhas_partner & xhas_job & xpartner_has_job]
xworking_female = summary[xadults_crit & xno_children_crit & ~xmen_crit & xhas_partner & xhas_job & xpartner_has_job]
xworking_female_kids = summary[xadults_crit & ~xno_children_crit & ~xmen_crit & xhas_partner & xhas_job & xpartner_has_job]
In [116]:
ne_male = average_minutes(xworking_male[northeast], "02")
ne_male_kids = average_minutes(xworking_male_kids[northeast], "02")
ne_female = average_minutes(xworking_female[northeast], "02")
ne_female_kids = average_minutes(xworking_female_kids[northeast], "02")
In [117]:
[ne_male, ne_male_kids, ne_female, ne_female_kids]
Out[117]:
[130.33000000000001, 107.36, 107.26000000000001, 103.59]
In [118]:
mw_male = average_minutes(xworking_male[midwest], "02")
mw_male_kids = average_minutes(xworking_male_kids[midwest], "02")
mw_female = average_minutes(xworking_female[midwest], "02")
mw_female_kids = average_minutes(xworking_female_kids[midwest], "02")
In [119]:
[mw_male, mw_male_kids, mw_female, mw_female_kids]
Out[119]:
[89.030000000000001, 113.70999999999999, 112.31, 126.95999999999999]
In [120]:
s_male = average_minutes(xworking_male[south], "02")
s_male_kids = average_minutes(xworking_male_kids[south], "02")
s_female = average_minutes(xworking_female[south], "02")
s_female_kids = average_minutes(xworking_female_kids[south], "02")
In [121]:
[s_male, s_male_kids, s_female, s_female_kids]
Out[121]:
[119.39, 105.68000000000001, 93.920000000000002, 111.38]
In [122]:
w_male = average_minutes(xworking_male[west], "02")
w_male_kids = average_minutes(xworking_male_kids[west], "02")
w_female = average_minutes(xworking_female[west], "02")
w_female_kids = average_minutes(xworking_female_kids[west], "02")
In [123]:
[w_male, w_male_kids, w_female, w_female_kids]
Out[123]:
[84.370000000000005, 111.59999999999999, 85.799999999999997, 105.77]