import pandas as pd
import re
import matplotlib.pyplot as plt
import seaborn
import numpy as np
plt.rc('figure', figsize=(16, 6))
%matplotlib inline
summary = pd.read_csv("atusdata/atussum_2013/atussum_2013.dat")
summary.info()
summary.head()
summary =summary.replace(-1, np.nan)
adults_crit = (summary.TEAGE >= 18)
no_children_crit = (summary.TRCHILDNUM == 0)
men_crit = (summary.TESEX == 1)
has_partner = (summary.TRSPPRES <= 2)
has_job = (summary.TELFS <= 2)
partner_has_job = (summary.TESPEMPNOT == 1)
adults_with_no_children = summary[adults_crit & no_children_crit & has_partner]
adults_with_children = summary[adults_crit & ~no_children_crit & has_partner]
men_with_no_children = summary[adults_crit & no_children_crit & men_crit & has_partner]
men_with_children = summary[adults_crit & ~no_children_crit & men_crit & has_partner]
women_with_no_children = summary[adults_crit & no_children_crit & ~men_crit & has_partner]
women_with_children = summary[adults_crit & ~no_children_crit & ~men_crit & has_partner]
def activity_columns(data, activity_code):
"""For the activity code given, return all columns that fall under that activity."""
col_prefix = "t{}".format(activity_code)
return [column for column in data.columns if re.match(col_prefix, column)]
def average_minutes(data, activity_code):
cols = activity_columns(data, activity_code)
activity_data = data[cols]
activity_sums = activity_data.sum(axis=1)
data = data[['TUFINLWGT']]
data['minutes'] = activity_sums
data = data.rename(columns={"TUFINLWGT": "weight"})
data['weighted_minutes'] = data.weight * data.minutes
return round((data.weighted_minutes.sum() / data.weight.sum()), 2)
all_adult_with_kids_housework = average_minutes(adults_with_children, "02")
all_adult_no_kids_housework = average_minutes(adults_with_no_children, "02")
def avg_getter(idgroup, codelist):
"""Returns a list of average minutes for each coded activity in the codelist for the idgroup"""
particulars = []
for x in codelist:
particulars.append(average_minutes(idgroup, x))
return particulars
household_dict = {"0201":"Housework", "0202":"Food & Drink Prep", "0203":"Interior Maintenance", "0204":"Exterior Maintenance",
"0205":"Lawn, Garden and Plant Care", "020901":"Financial Management"}
household_list = ["0201", "0202", "0203", "0204", "0205", "020901"]
labels = ["Housework", "Food & Drink Prep", "Interior Maintentance", "Exterior Maintenance",
"Lawn, Garden and Plant Care", "Financial Management", "Other"]
def household_breakdown(idgroup):
all_household = average_minutes(idgroup, "02")
particulars = avg_getter(idgroup, household_list)
other = all_household - sum(particulars)
particulars.append(other)
return particulars
x =household_breakdown(men_with_children)
print(x)
mwc_all_household = average_minutes(men_with_children, "02")
mwc_house = household_breakdown(men_with_children)
wwc_all_household = average_minutes(women_with_children, "02")
wwc_house = household_breakdown(women_with_children)
mnc_all_household = average_minutes(men_with_no_children, "02")
mnc_house = household_breakdown(men_with_no_children)
wnc_all_household = average_minutes(women_with_no_children, "02")
wnc_house = household_breakdown(women_with_no_children)
total_housework = pd.Series([mnc_all_household, mwc_all_household, wnc_all_household, wwc_all_household], index=(
["Men w/out Children", "Men w/ Children", "Women w/out Children", "Women w/ Children"]))
total_housework.plot(kind="bar", figsize=(15,5))
total_df = pd.DataFrame(total_housework)
total_df.columns = ["Minutes"]
print(total_df)
plt.show()
ratio_men = mnc_all_household / all_adult_no_kids_housework
ratio_women = wnc_all_household / all_adult_no_kids_housework
ratio_men_kids = mwc_all_household / all_adult_with_kids_housework
ratio_women_kids = wwc_all_household /all_adult_with_kids_housework
ratio_men + ratio_women
ratio_men_kids + ratio_women_kids
mwc = pd.Series(mwc_house, index=labels)
mnc = pd.Series(mnc_house, index=labels)
wwc = pd.Series(wwc_house, index=labels)
wnc = pd.Series(wnc_house, index=labels)
housework_df = pd.DataFrame(mnc, index=labels)
housework_df.columns = ["Men w/out Children"]
housework_df['Men w/ Children'] = mwc
housework_df['Women w/out Children'] = wnc
housework_df['Women w/ Children'] = wwc
housework_df
housework_df.plot(kind="bar", figsize=(18,10))
plt.show()
housework_df.plot(figsize=(18,6))
plt.show()
housework_df["Child Diff Men"] = housework_df["Men w/ Children"] - housework_df["Men w/out Children"]
housework_df["Child Diff Women"] = housework_df["Women w/ Children"] - housework_df["Women w/out Children"]
housework_df
housework_df[["Child Diff Men", "Child Diff Women"]].plot(kind="bar", figsize=(15,6))
plt.axhline(y=0, color="black")
plt.show()
women_chart = housework_df[["Women w/out Children", "Women w/ Children", "Child Diff Women"]]
women_chart
wnc.plot(kind="pie", figsize=(8,8), colors = ['b', 'g', 'white', 'orange', 'grey', 'red', 'tan'], autopct='%.2f')
plt.title("Division of household tasks for women with no children")
plt.show()
#print("")
wwc.plot(kind="pie", figsize=(8,8), colors = ['b', 'g', 'white', 'orange', 'grey', 'red', 'tan'], autopct='%.2f')
plt.title("Division of household tasks for women with children")
plt.show()
working_male = summary[adults_crit & no_children_crit & men_crit & has_partner & has_job & partner_has_job]
working_male_kids = summary[adults_crit & ~no_children_crit & men_crit & has_partner & has_job & partner_has_job]
working_female = summary[adults_crit & no_children_crit & ~men_crit & has_partner & has_job & partner_has_job]
working_female_kids = summary[adults_crit & ~no_children_crit & ~men_crit & has_partner & has_job & partner_has_job]
wm_all_household = average_minutes(working_male, "02")
wm_house = household_breakdown(working_male)
wmk_all_household = average_minutes(working_male_kids, "02")
wmk_house = household_breakdown(working_male_kids)
wf_all_household = average_minutes(working_female, "02")
wf_house = household_breakdown(working_female)
wfk_all_household = average_minutes(working_female_kids, "02")
wfk_house = household_breakdown(working_female_kids)
total_working_housework = pd.Series([wm_all_household, wmk_all_household, wf_all_household, wfk_all_household], index=(
["Men w/out Children", "Men w/ Children", "Women w/out Children", "Women w/ Children"]))
total_working_housework.plot(kind="bar", figsize=(15,5))
total_working_df = pd.DataFrame(total_working_housework)
total_working_df.columns = ["Minutes"]
print(total_working_df)
plt.title("Minutes per day spent on household activities in families where both parents are employed")
plt.show()
all_total_df = pd.DataFrame(total_housework)
all_total_df.columns = ["Total Housework"]
all_total_df["Total Housework, Both Working"] = total_working_housework
all_total_df.plot(kind="bar", figsize=(15,5))
plt.show()
mwc = pd.Series(mwc_house, index=labels)
mnc = pd.Series(mnc_house, index=labels)
wwc = pd.Series(wwc_house, index=labels)
wnc = pd.Series(wnc_house, index=labels)
wm = pd.Series(wm_house, index=labels)
wmk = pd.Series(wmk_house, index=labels)
wf = pd.Series(wf_house, index=labels)
wfk = pd.Series(wfk_house, index=labels)
large_housework_df = pd.DataFrame(mnc, index=labels)
large_housework_df.columns = ["Men w/out Children"]
large_housework_df['Men w/ Children'] = mwc
large_housework_df['Both Working, Men w/out Children'] = wmk
large_housework_df['Both Working, Men w/ Children'] = wm
large_housework_df['Women w/out Children'] = wnc
large_housework_df['Women w/ Children'] = wwc
large_housework_df['Both Working, Women w/out Children'] = wf
large_housework_df['Both Working, Women w/ Children'] = wfk
large_housework_df
large_housework_df.plot(kind="bar", figsize=(18,10))
plt.show()
large_housework_df.plot(figsize=(18,10))
plt.annotate("A", (.5,57))
plt.annotate("B", (2,12))
plt.annotate("C", (5,6))
plt.show()
cpsdata = pd.read_csv("atusdata/atuscps_2013/atuscps_2013.dat")
#cpsdata.head()
cpsdata = cpsdata.rename(columns={'TUCASEID': 'tucaseid'})
#cpsdata.head()
merged = pd.merge(summary, cpsdata, left_on="tucaseid", right_on="tucaseid")
merged.info()
#merged.head()
xadults_crit = (merged.TEAGE >= 18)
xno_children_crit = (merged.TRCHILDNUM == 0)
xmen_crit = (merged.TESEX == 1)
xhas_partner = (merged.TRSPPRES <= 2)
xhas_job = (merged.TELFS <= 2)
xpartner_has_job = (merged.TESPEMPNOT == 1)
northeast = (merged.GEREG == 1)
midwest = (merged.GEREG == 2)
south = (merged.GEREG == 3)
west = (merged.GEREG == 4)
xworking_male = summary[xadults_crit & xno_children_crit & xmen_crit & xhas_partner & xhas_job & xpartner_has_job]
xworking_male_kids = summary[xadults_crit & ~xno_children_crit & xmen_crit & xhas_partner & xhas_job & xpartner_has_job]
xworking_female = summary[xadults_crit & xno_children_crit & ~xmen_crit & xhas_partner & xhas_job & xpartner_has_job]
xworking_female_kids = summary[xadults_crit & ~xno_children_crit & ~xmen_crit & xhas_partner & xhas_job & xpartner_has_job]
ne_male = average_minutes(xworking_male[northeast], "02")
ne_male_kids = average_minutes(xworking_male_kids[northeast], "02")
ne_female = average_minutes(xworking_female[northeast], "02")
ne_female_kids = average_minutes(xworking_female_kids[northeast], "02")
[ne_male, ne_male_kids, ne_female, ne_female_kids]
mw_male = average_minutes(xworking_male[midwest], "02")
mw_male_kids = average_minutes(xworking_male_kids[midwest], "02")
mw_female = average_minutes(xworking_female[midwest], "02")
mw_female_kids = average_minutes(xworking_female_kids[midwest], "02")
[mw_male, mw_male_kids, mw_female, mw_female_kids]
s_male = average_minutes(xworking_male[south], "02")
s_male_kids = average_minutes(xworking_male_kids[south], "02")
s_female = average_minutes(xworking_female[south], "02")
s_female_kids = average_minutes(xworking_female_kids[south], "02")
[s_male, s_male_kids, s_female, s_female_kids]
w_male = average_minutes(xworking_male[west], "02")
w_male_kids = average_minutes(xworking_male_kids[west], "02")
w_female = average_minutes(xworking_female[west], "02")
w_female_kids = average_minutes(xworking_female_kids[west], "02")
[w_male, w_male_kids, w_female, w_female_kids]