import numpy as np
import csv
from pandas import *

# import data
scf = read_csv('/Users/dmatt/Documents/Wonkblog/scf.csv')

# subset by race
white = scf[scf['race']==1]
black = scf[scf['race']==2]
hispanic = scf[scf['race']==3]
other = scf[scf['race']==5]

# subset by education
white_dropout = white[white['educ']==10]
white_hs = white[white['educ']==12]
white_one = white[white['educ']==13]
white_two = white[white['educ']==14]
white_three = white[white['educ']==15]
white_college = white[white['educ']==16]
white_grad = white[white['educ']==17]

black_dropout = black[black['educ']==10]
black_hs = black[black['educ']==12]
black_one = black[black['educ']==13]
black_two = black[black['educ']==14]
black_three = black[black['educ']==15]
black_college = black[black['educ']==16]
black_grad = black[black['educ']==17]

hispanic_dropout = hispanic[hispanic['educ']==10]
hispanic_hs = hispanic[hispanic['educ']==12]
hispanic_one = hispanic[hispanic['educ']==13]
hispanic_two = hispanic[hispanic['educ']==14]
hispanic_three = hispanic[hispanic['educ']==15]
hispanic_college = hispanic[hispanic['educ']==16]
hispanic_grad = hispanic[hispanic['educ']==17]

other_dropout = other[other['educ']==10]
other_hs = other[other['educ']==12]
other_one = other[other['educ']==13]
other_two = other[other['educ']==14]
other_three = other[other['educ']==15]
other_college = other[other['educ']==16]
other_grad = other[other['educ']==17]

# collate subsets into an array
subsets =  [white_dropout, white_hs, white_one, white_two, white_three, white_college, white_grad,
            black_dropout, black_hs, black_one, black_two, black_three, black_college, black_grad,
            hispanic_dropout, hispanic_hs, hispanic_one, hispanic_two, hispanic_three, hispanic_college, hispanic_grad,
            other_dropout, other_hs, other_one, other_two, other_three, other_college, other_grad]

# create array of arrays to hold data
rows = []

# iterate through ages and subsets
for i in range(0,120):

    # create array with age to add to
    rows.append([i])

    for set in subsets:
        # subset to five-year age range
        scratch = set[(set['age'] = i+2) & (set['age'] >= i-2)]
        rows[i].append(scratch['networth'].median())

# columns to print
columns = ["Age", "White Dropout", "White HS", "White One College", "White Two College", "White Three College", "White College", "White Grad",
        "Black Dropout", "Black HS", "Black One College", "Black Two College", "Black Three College", "Black College", "Black Grad",
        "Hispanic Dropout", "Hispanic HS", "Hispanic One College", "Hispanic Two College", "Hispanic Three College", "Hispanic College", "Hispanic Grad",
        "Other Dropout", "Other HS", "Other One College", "Other Two College", "Other Three College", "Other College", "Other Grad"]

# open file
write = csv.writer(open('/Users/dmatt/Documents/Wonkblog/mediannetworth_scf.csv', 'wb'))

# write headers
write.writerow(columns)

for i in rows:
    write.writerow(i)
changed October 10, 2012