Insight into California's Public Schools and Districts Diversity¶

We will use the California Student and school 2009-2018 data to analyze California's Public Schools and Districts Success. Access to database is available via the following link: https://www.cde.ca.gov/ds/sd/sd/#esd

1. California's Public Schools and Districts Diversity¶

we will use the Enrollment Data and English Learner (EL) Data to analyze California's Public Schools and Districts Diversity

#!pip install plotly

import matplotlib.pyplot

import pandas as pd
import numpy as np
import warnings
import plotly.plotly as py
import plotly.graph_objs as go
import plotly as plt
import seaborn as sns
import plotly.figure_factory as ff
import os
import glob
warnings.filterwarnings("ignore")

path = 'C:/Users/AKAYA/Desktop/DataIncubator/2019'
extension = 'txt'
os.chdir(path)
result = [i for i in glob.glob('*.{}'.format(extension))]
print(result)

['EL2009df.txt', 'EL2010df.txt', 'EL2011df.txt', 'EL2012df.txt', 'EL2013df.txt', 'EL2014df.txt', 'EL2015df.txt', 'EL2016df.txt', 'EL2017df.txt', 'EL2018df.txt', 'ELSCA2018.txt', 'Enrl2009dd.txt', 'Enrl2010dd.txt', 'Enrl2011dd.txt', 'Enrl2012dd.txt', 'Enrl2013dd.txt', 'Enrl2014dd.txt', 'Enrl2015dd.txt', 'Enrl2016dd.txt', 'Enrl2017dd.txt', 'Enrl2018dd.txt', 'ESLCA2018.txt', 'expenditures_readme.txt', 'Graduade20162017.txt', 'plot1.txt', 'Plot2.txt', 'web.txt']

#Function to load English Learner (EL) Data
path = 'C:/Users/AKAYA/Desktop/DataIncubator/2019' # use your path
allFiles1 = glob.glob(path + "/*df.txt")
frame = pd.DataFrame()
dfEL = []
for file_ in allFiles1:
    df = pd.read_table(file_,index_col=None, header=0)
    dfEL.append(df)
frame = pd.concat(dfEL)

len(frame)

613107

# Function to check df shape
for i in range (0,len(allFiles1)):
    print(dfEL[i].shape)

(75685, 21)
(58909, 21)
(47748, 21)
(58676, 21)
(58648, 21)
(60775, 21)
(62042, 21)
(63250, 21)
(63714, 21)
(63660, 21)

#quck check to see na/null
for i in range (0,len(allFiles1)):
    print(dfEL[i].isna().sum())

CDS         0
COUNTY      0
DISTRICT    0
SCHOOL      0
LC          0
LANGUAGE    0
KDGN        0
GR_1        0
GR_2        0
GR_3        0
GR_4        0
GR_5        0
GR_6        0
GR_7        0
GR_8        0
GR_9        0
GR_10       0
GR_11       0
GR_12       0
GR_UNGR     0
TOTAL_EL    0
dtype: int64
CDS         0
COUNTY      0
DISTRICT    0
SCHOOL      0
LC          0
LANGUAGE    0
KDGN        0
GR_1        0
GR_2        0
GR_3        0
GR_4        0
GR_5        0
GR_6        0
GR_7        0
GR_8        0
GR_9        0
GR_10       0
GR_11       0
GR_12       0
GR_UNGR     0
TOTAL_EL    0
dtype: int64
CDS         0
COUNTY      0
DISTRICT    0
SCHOOL      0
LC          0
LANGUAGE    0
KDGN        0
GR_1        0
GR_2        0
GR_3        0
GR_4        0
GR_5        0
GR_6        0
GR_7        0
GR_8        0
GR_9        0
GR_10       0
GR_11       0
GR_12       0
GR_UNGR     0
TOTAL_EL    0
dtype: int64
CDS         0
COUNTY      0
DISTRICT    0
SCHOOL      0
LC          0
LANGUAGE    0
KDGN        0
GR_1        0
GR_2        0
GR_3        0
GR_4        0
GR_5        0
GR_6        0
GR_7        0
GR_8        0
GR_9        0
GR_10       0
GR_11       0
GR_12       0
UNGR        0
TOTAL_EL    0
dtype: int64
CDS         0
COUNTY      0
DISTRICT    0
SCHOOL      0
LC          0
LANGUAGE    0
KDGN        0
GR_1        0
GR_2        0
GR_3        0
GR_4        0
GR_5        0
GR_6        0
GR_7        0
GR_8        0
GR_9        0
GR_10       0
GR_11       0
GR_12       0
UNGR        0
TOTAL_EL    0
dtype: int64
CDS         0
COUNTY      0
DISTRICT    0
SCHOOL      0
LC          0
LANGUAGE    0
KDGN        0
GR_1        0
GR_2        0
GR_3        0
GR_4        0
GR_5        0
GR_6        0
GR_7        0
GR_8        0
GR_9        0
GR_10       0
GR_11       0
GR_12       0
UNGR        0
TOTAL_EL    0
dtype: int64
CDS         0
COUNTY      0
DISTRICT    0
SCHOOL      0
LC          0
LANGUAGE    0
KDGN        0
GR_1        0
GR_2        0
GR_3        0
GR_4        0
GR_5        0
GR_6        0
GR_7        0
GR_8        0
GR_9        0
GR_10       0
GR_11       0
GR_12       0
UNGR        0
TOTAL_EL    0
dtype: int64
CDS         0
COUNTY      0
DISTRICT    0
SCHOOL      0
LC          0
LANGUAGE    0
KDGN        0
GR_1        0
GR_2        0
GR_3        0
GR_4        0
GR_5        0
GR_6        0
GR_7        0
GR_8        0
GR_9        0
GR_10       0
GR_11       0
GR_12       0
UNGR        0
TOTAL_EL    0
dtype: int64
CDS         0
COUNTY      0
DISTRICT    0
SCHOOL      0
LC          0
LANGUAGE    0
KDGN        0
GR_1        0
GR_2        0
GR_3        0
GR_4        0
GR_5        0
GR_6        0
GR_7        0
GR_8        0
GR_9        0
GR_10       0
GR_11       0
GR_12       0
UNGR        0
TOTAL_EL    0
dtype: int64
CDS         0
COUNTY      0
DISTRICT    0
SCHOOL      0
LC          0
LANGUAGE    0
KDGN        0
GR_1        0
GR_2        0
GR_3        0
GR_4        0
GR_5        0
GR_6        0
GR_7        0
GR_8        0
GR_9        0
GR_10       0
GR_11       0
GR_12       0
UNGR        0
TOTAL_EL    0
dtype: int64

#Function to load Enrollment Data
path = 'C:/Users/AKAYA/Desktop/DataIncubator/2019' # use your path
allFiles2 = glob.glob(path + "/*dd.txt")
frame = pd.DataFrame()
dfEnrlm = []
for file_ in allFiles2:
    df = pd.read_table(file_,index_col=None, header=0)
    dfEnrlm.append(df)
frame = pd.concat(dfEnrlm)

# Function to check df shape
for i in range (0,len(allFiles2)):
    print(dfEnrlm[i].shape)

(121068, 23)
(124714, 23)
(126082, 23)
(127715, 23)
(127445, 23)
(127130, 23)
(129354, 23)
(129717, 23)
(129813, 23)
(130194, 23)

#quck check to see na/null
for i in range (0,len(allFiles2)):
    print(dfEnrlm[i].isna().sum())

CDS_CODE     0
COUNTY       2
DISTRICT     2
SCHOOL       2
ETHNIC       0
GENDER       0
KDGN         0
GR_1         0
GR_2         0
GR_3         0
GR_4         0
GR_5         0
GR_6         0
GR_7         0
GR_8         0
UNGR_ELM     0
GR_9         0
GR_10        0
GR_11        0
GR_12        0
UNGR_SEC     0
ENR_TOTAL    0
ADULT        0
dtype: int64
CDS_CODE     0
COUNTY       0
DISTRICT     0
SCHOOL       0
ETHNIC       0
GENDER       0
KDGN         0
GR_1         0
GR_2         0
GR_3         0
GR_4         0
GR_5         0
GR_6         0
GR_7         0
GR_8         0
UNGR_ELM     0
GR_9         0
GR_10        0
GR_11        0
GR_12        0
UNGR_SEC     0
ENR_TOTAL    0
ADULT        0
dtype: int64
CDS_CODE     0
COUNTY       0
DISTRICT     0
SCHOOL       0
ETHNIC       0
GENDER       0
KDGN         0
GR_1         0
GR_2         0
GR_3         0
GR_4         0
GR_5         0
GR_6         0
GR_7         0
GR_8         0
UNGR_ELM     0
GR_9         0
GR_10        0
GR_11        0
GR_12        0
UNGR_SEC     0
ENR_TOTAL    0
ADULT        0
dtype: int64
CDS_CODE     0
COUNTY       0
DISTRICT     0
SCHOOL       0
ETHNIC       0
GENDER       0
KDGN         0
GR_1         0
GR_2         0
GR_3         0
GR_4         0
GR_5         0
GR_6         0
GR_7         0
GR_8         0
UNGR_ELM     0
GR_9         0
GR_10        0
GR_11        0
GR_12        0
UNGR_SEC     0
ENR_TOTAL    0
ADULT        0
dtype: int64
CDS_CODE     0
COUNTY       0
DISTRICT     0
SCHOOL       0
ETHNIC       0
GENDER       0
KDGN         0
GR_1         0
GR_2         0
GR_3         0
GR_4         0
GR_5         0
GR_6         0
GR_7         0
GR_8         0
UNGR_ELM     0
GR_9         0
GR_10        0
GR_11        0
GR_12        0
UNGR_SEC     0
ENR_TOTAL    0
ADULT        0
dtype: int64
CDS_CODE     0
COUNTY       0
DISTRICT     0
SCHOOL       0
ETHNIC       0
GENDER       0
KDGN         0
GR_1         0
GR_2         0
GR_3         0
GR_4         0
GR_5         0
GR_6         0
GR_7         0
GR_8         0
UNGR_ELM     0
GR_9         0
GR_10        0
GR_11        0
GR_12        0
UNGR_SEC     0
ENR_TOTAL    0
ADULT        0
dtype: int64
CDS_CODE     0
COUNTY       0
DISTRICT     0
SCHOOL       0
ETHNIC       0
GENDER       0
KDGN         0
GR_1         0
GR_2         0
GR_3         0
GR_4         0
GR_5         0
GR_6         0
GR_7         0
GR_8         0
UNGR_ELM     0
GR_9         0
GR_10        0
GR_11        0
GR_12        0
UNGR_SEC     0
ENR_TOTAL    0
ADULT        0
dtype: int64
CDS_CODE     0
COUNTY       0
DISTRICT     0
SCHOOL       0
ETHNIC       0
GENDER       0
KDGN         0
GR_1         0
GR_2         0
GR_3         0
GR_4         0
GR_5         0
GR_6         0
GR_7         0
GR_8         0
UNGR_ELM     0
GR_9         0
GR_10        0
GR_11        0
GR_12        0
UNGR_SEC     0
ENR_TOTAL    0
ADULT        0
dtype: int64
CDS_CODE     0
COUNTY       0
DISTRICT     0
SCHOOL       0
ETHNIC       0
GENDER       0
KDGN         0
GR_1         0
GR_2         0
GR_3         0
GR_4         0
GR_5         0
GR_6         0
GR_7         0
GR_8         0
UNGR_ELM     0
GR_9         0
GR_10        0
GR_11        0
GR_12        0
UNGR_SEC     0
ENR_TOTAL    0
ADULT        0
dtype: int64
CDS_CODE     0
COUNTY       0
DISTRICT     0
SCHOOL       0
ETHNIC       0
GENDER       0
KDGN         0
GR_1         0
GR_2         0
GR_3         0
GR_4         0
GR_5         0
GR_6         0
GR_7         0
GR_8         0
UNGR_ELM     0
GR_9         0
GR_10        0
GR_11        0
GR_12        0
UNGR_SEC     0
ENR_TOTAL    0
ADULT        0
dtype: int64

#quck check to see na/null
for i in range (0,len(allFiles2)):
    print(dfEnrlm[i].isna().sum())

CDS_CODE     0
COUNTY       2
DISTRICT     2
SCHOOL       2
ETHNIC       0
GENDER       0
KDGN         0
GR_1         0
GR_2         0
GR_3         0
GR_4         0
GR_5         0
GR_6         0
GR_7         0
GR_8         0
UNGR_ELM     0
GR_9         0
GR_10        0
GR_11        0
GR_12        0
UNGR_SEC     0
ENR_TOTAL    0
ADULT        0
dtype: int64
CDS_CODE     0
COUNTY       0
DISTRICT     0
SCHOOL       0
ETHNIC       0
GENDER       0
KDGN         0
GR_1         0
GR_2         0
GR_3         0
GR_4         0
GR_5         0
GR_6         0
GR_7         0
GR_8         0
UNGR_ELM     0
GR_9         0
GR_10        0
GR_11        0
GR_12        0
UNGR_SEC     0
ENR_TOTAL    0
ADULT        0
dtype: int64
CDS_CODE     0
COUNTY       0
DISTRICT     0
SCHOOL       0
ETHNIC       0
GENDER       0
KDGN         0
GR_1         0
GR_2         0
GR_3         0
GR_4         0
GR_5         0
GR_6         0
GR_7         0
GR_8         0
UNGR_ELM     0
GR_9         0
GR_10        0
GR_11        0
GR_12        0
UNGR_SEC     0
ENR_TOTAL    0
ADULT        0
dtype: int64
CDS_CODE     0
COUNTY       0
DISTRICT     0
SCHOOL       0
ETHNIC       0
GENDER       0
KDGN         0
GR_1         0
GR_2         0
GR_3         0
GR_4         0
GR_5         0
GR_6         0
GR_7         0
GR_8         0
UNGR_ELM     0
GR_9         0
GR_10        0
GR_11        0
GR_12        0
UNGR_SEC     0
ENR_TOTAL    0
ADULT        0
dtype: int64
CDS_CODE     0
COUNTY       0
DISTRICT     0
SCHOOL       0
ETHNIC       0
GENDER       0
KDGN         0
GR_1         0
GR_2         0
GR_3         0
GR_4         0
GR_5         0
GR_6         0
GR_7         0
GR_8         0
UNGR_ELM     0
GR_9         0
GR_10        0
GR_11        0
GR_12        0
UNGR_SEC     0
ENR_TOTAL    0
ADULT        0
dtype: int64
CDS_CODE     0
COUNTY       0
DISTRICT     0
SCHOOL       0
ETHNIC       0
GENDER       0
KDGN         0
GR_1         0
GR_2         0
GR_3         0
GR_4         0
GR_5         0
GR_6         0
GR_7         0
GR_8         0
UNGR_ELM     0
GR_9         0
GR_10        0
GR_11        0
GR_12        0
UNGR_SEC     0
ENR_TOTAL    0
ADULT        0
dtype: int64
CDS_CODE     0
COUNTY       0
DISTRICT     0
SCHOOL       0
ETHNIC       0
GENDER       0
KDGN         0
GR_1         0
GR_2         0
GR_3         0
GR_4         0
GR_5         0
GR_6         0
GR_7         0
GR_8         0
UNGR_ELM     0
GR_9         0
GR_10        0
GR_11        0
GR_12        0
UNGR_SEC     0
ENR_TOTAL    0
ADULT        0
dtype: int64
CDS_CODE     0
COUNTY       0
DISTRICT     0
SCHOOL       0
ETHNIC       0
GENDER       0
KDGN         0
GR_1         0
GR_2         0
GR_3         0
GR_4         0
GR_5         0
GR_6         0
GR_7         0
GR_8         0
UNGR_ELM     0
GR_9         0
GR_10        0
GR_11        0
GR_12        0
UNGR_SEC     0
ENR_TOTAL    0
ADULT        0
dtype: int64
CDS_CODE     0
COUNTY       0
DISTRICT     0
SCHOOL       0
ETHNIC       0
GENDER       0
KDGN         0
GR_1         0
GR_2         0
GR_3         0
GR_4         0
GR_5         0
GR_6         0
GR_7         0
GR_8         0
UNGR_ELM     0
GR_9         0
GR_10        0
GR_11        0
GR_12        0
UNGR_SEC     0
ENR_TOTAL    0
ADULT        0
dtype: int64
CDS_CODE     0
COUNTY       0
DISTRICT     0
SCHOOL       0
ETHNIC       0
GENDER       0
KDGN         0
GR_1         0
GR_2         0
GR_3         0
GR_4         0
GR_5         0
GR_6         0
GR_7         0
GR_8         0
UNGR_ELM     0
GR_9         0
GR_10        0
GR_11        0
GR_12        0
UNGR_SEC     0
ENR_TOTAL    0
ADULT        0
dtype: int64

Upon close inspection we find that there 2 missing values in The COUNTY, DISTRICT and SCHOOL columns. We will check missing values by looking at other columns to see missing values impact on the data.

dfEnrlm[0][dfEnrlm[0].COUNTY.isna()]

The only two students' COUNTY, DISTRICT and SCHOOL information is missing so we will remove it during county analysis

English Learner Data Analaysis¶

Function to add ESTotal, MSTotal, HSTotal¶

#function to add df the total  number and percentile of EL at Eelementarty, Middle and High School level
for i in range (0,len(allFiles1)):
    dfEL[i]['EStotal'] = dfEL[i].apply(lambda x: x['KDGN'] + x['GR_1']+x['GR_2']+ x['GR_3']+x['GR_4']+x['GR_4'], axis=1)
    dfEL[i]['MStotal'] = dfEL[i].apply(lambda x: x['GR_6']+x['GR_7']+x['GR_8'], axis=1)
    dfEL[i]['HStotal'] = dfEL[i].apply(lambda x: x['GR_9']+x['GR_10']+x['GR_11']+x['GR_12'], axis=1)

#function to add df the total  number of Enrolment at Eelementarty, Middle and High School level
for i in range (0,len(allFiles2)):
    dfEnrlm[i]['EStotal'] = dfEnrlm[i].apply(lambda x: x['KDGN'] + x['GR_1']+x['GR_2']+ x['GR_3']+x['GR_4']+x['GR_4'], axis=1)
    dfEnrlm[i]['MStotal'] = dfEnrlm[i].apply(lambda x: x['GR_6']+x['GR_7']+x['GR_8'], axis=1)
    dfEnrlm[i]['HStotal'] = dfEnrlm[i].apply(lambda x: x['GR_9']+x['GR_10']+x['GR_11']+x['GR_12'], axis=1)

for i in range (0,len(allFiles1)):
    dfEL[i]['ESpercent']= dfEL[i]['EStotal']/dfEnrlm[i]['EStotal'].sum()
    dfEL[i]['MSpercent']=dfEL[i]['MStotal']/dfEnrlm[i]['MStotal'].sum()
    dfEL[i]['HSpercent']=dfEL[i]['HStotal']/dfEnrlm[i]['HStotal'].sum()

Number for EL in California¶

CAES=[]
CAHS=[]
CAMS=[]
TOTAL_EL=[]
for i in range (0,len(allFiles1)):
    CAES.append(dfEL[i]['EStotal'].sum())
    CAMS.append(dfEL[i]['MStotal'].sum())
    CAHS.append(dfEL[i]['HStotal'].sum())
    TOTAL_EL.append(dfEL[i]['TOTAL_EL'].sum())

data = {'CAES':CAES,'CAMS':CAMS,'CAHS':CAHS,'Total CA EL':TOTAL_EL}
dftotal=pd.DataFrame(data)
dftotal['years']=[2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018]
dftotal

trace_1 = go.Scatter(
                x=dftotal.years,
                y=dftotal['CAES'],
                name = "CA EL Elementary Sch.",
                line = dict(color = 'blue'),
                opacity = 1)
trace_2 = go.Scatter(
                x=dftotal.years,
                y=dftotal['CAMS'],
                name = "CA EL Middle Sch.",
                line = dict(color = 'green'),
                opacity = 0.8)
trace_3 = go.Scatter(
                x=dftotal.years,
                y=dftotal['CAHS'],
                name = "CA EL High Sch.",
                line = dict(color = 'red'),
                opacity = 0.8)
trace_4 = go.Scatter(
                x=dftotal.years,
                y=dftotal['Total CA EL'],
                name = "Total CA EL",
                line = dict(color = 'black'),
                opacity = 0.5)
data = [trace_1,trace_2, trace_3, trace_4]

layout = dict(
    title = "Number of EL California",
    xaxis = dict(
        range = ['2009','2018'])
)

fig = dict(data=data, layout=layout)
py.iplot(fig)

CAESrate=[]
CAHSrate=[]
CAMSrate=[]
TOTAL_ELrate=[]
for i in range (0,len(allFiles1)):
    CAESrate.append(dfEL[i]['ESpercent'].sum())
    CAMSrate.append(dfEL[i]['MSpercent'].sum())
    CAHSrate.append(dfEL[i]['HSpercent'].sum())
    TOTAL_ELrate.append(dfEL[i]['TOTAL_EL'].sum()/dfEnrlm[i]['ENR_TOTAL'].sum())
    data = {'CAESrate':CAESrate,'CAMSrate':CAMSrate,'CAHSrate':CAHSrate,'TOTAL_ELrate':TOTAL_ELrate}
dfrate=pd.DataFrame(data)
dfrate['years']=[2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018]
dfrate

trace_1 = go.Scatter(
                x=dfrate.years,
                y=dfrate['CAESrate'],
                name = "CA EL ratet Elementary Sch.",
                line = dict(color = 'blue'),
                opacity = 1)
trace_2 = go.Scatter(
                x=dfrate.years,
                y=dfrate['CAMSrate'],
                name = "CA ELrate Middle Sch.",
                line = dict(color = 'green'),
                opacity = 0.8)
trace_3 = go.Scatter(
                x=dfrate.years,
                y=dfrate['CAHSrate'],
                name = "CA EL rate High Sch.",
                line = dict(color = 'red'),
                opacity = 0.8)
trace_4 = go.Scatter(
                x=dfrate.years,
                y=dfrate['TOTAL_ELrate'],
                name = "CA EL total rate",
                line = dict(color = 'black'),
                opacity = 0.5)
data = [trace_1,trace_2, trace_3, trace_4]

layout = dict(
    title = "EL rate California",
    xaxis = dict(
        range = ['2009','2018'])
)

fig = dict(data=data, layout=layout)
py.iplot(fig, filename = "Manually Set Range")

save the data for EL maping¶

dfmap = pd.merge(pd.DataFrame(dfEL[9].groupby(['COUNTY']).sum()['TOTAL_EL']),
                  pd.DataFrame(dfEnrlm[9].groupby(['COUNTY']).sum()['ENR_TOTAL']),how='right', 
                                on='COUNTY')
dfmap=dfmap.fillna(0)

df_map = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/minoritymajority.csv')
df_map=df_map.iloc[:,0:3]
df_map=df_map[df_map.STNAME=='California']
df_map=df_map.reset_index(drop=True)

#dfmap.to_csv('dfmap.csv', index=False)
#df_map.to_csv('df_map.csv', index=False)

EL MAP¶

df=pd.read_csv('formap.csv')
df.head()

df['percent']=df['TOTAL_EL']*100/df['ENR_TOTAL']
df.head()

Percentile = df['percent'].tolist()
fips = df['FIPS'].tolist()

colorscale = ["#c6dbef", "#b3d2e9","#9ecae1","#85bcdb","#3082be","#2171b5","#1361a9","#08519c","#0b4083","#08306b"]

fig = ff.create_choropleth(
    fips=fips, values=Percentile, scope=['CA'],
    binning_endpoints=[5, 10, 15, 20, 25, 30, 35, 40], colorscale=colorscale,
    county_outline={'color': 'blue', 'width': 0.3}, round_legend_values=True,
    legend_title='Percent%', title='Percentage of English learners by County'
)
py.iplot(fig)

EL The Home Language Map¶¶

County=pd.read_csv('Counties.csv')
County.head(3)

#In the Alpine there is EL so we will add Alpine as 0 manually and reindex the df to avoid effect to function
#County=County.drop(County.index[[1]]).reset_index(drop=True)

#Function to found county highest EL language for 2010. We will tart from 2nd one due to Alpine there is no EL  
HighestELLanguage10=['Spanish','Not EL']
for i in range (2,len(County['county_name'])):
    X=dfEL[1][dfEL[1].COUNTY==County['county_name'][i]] .groupby(['LANGUAGE']).sum()['TOTAL_EL'].idxmax()
    HighestELLanguage10.append(X)

    #Function to found county highest EL language for 2018
HighestELLanguage18=['Spanish','Not EL']
for i in range (2,len(County['county_name'])):
    Z=dfEL[9][dfEL[9].COUNTY==County['county_name'][i]] .groupby(['LANGUAGE']).sum()['TOTAL_EL'].idxmax()
    HighestELLanguage18.append(Z)

df=pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/minoritymajority.csv')
df=df[df.STNAME=='California'][['FIPS', 'CTYNAME']]
df['HighestELLanguage10']=HighestELLanguage10
df['HighestELLanguage18']=HighestELLanguage18
df.head()

df['HighestELLanguage10'].drop_duplicates()

2605    Spanish
2606     Not EL
Name: HighestELLanguage10, dtype: object

In the California highest English Learners are speaking as first language is spanish

df['HighestELLanguage18'].drop_duplicates()

2605    Spanish
2606     Not EL
2658      Hmong
Name: HighestELLanguage18, dtype: object

I would like to put EL 2018 in the map to see region so I will asign 0 to Not EL, 1 to Spanish and 2 to Hmog

Code 0="Not EL"
Code 1="Hmong"
Code 2="Spanish"

#One-Hot encoding:The basic strategy is to convert each category value into a column and assign a 1, 2, 3, 4, and 5. This has the benefit of not weighting a value improperly.
OneHot = {'HighestELLanguage18':     {"Spanish": 2, "Not EL": 0, "Hmong":1}}
df.replace(OneHot, inplace=True)
df['HighestELLanguage18'].drop_duplicates()

2605    2
2606    0
2658    1
Name: HighestELLanguage18, dtype: int64

# ETHNIC
#Racial/ethnic designation. This field is coded as follows:

     #Code 0="Not EL"
     #Code 1="Hmong"
     #Code 2="Spanish"

scope = ['California']
values = df['HighestELLanguage18'].tolist()
fips = df['FIPS'].tolist()
colorscale = ['blue','green','red','Cyan','Brown']

fig = ff.create_choropleth(
    fips=fips, values=values, scope=scope,
    colorscale=colorscale, round_legend_values=True,
    simplify_county=0, simplify_state=0,
    county_outline={'color': 'rgb(15, 15, 55)', 'width': 0.5},
    state_outline={'width': 1},
    legend_title='The Home Language code#',
    title='English Learners Home Language (20017-2018)')

py.iplot(fig)

The draw time for this plot will be slow for clients without much RAM.

Function to add EStotal, MStotal and HStotal to Enrolment df¶

#function to add df the total  number of Enrolment at Eelementarty, Middle and High School level
for i in range (0,len(allFiles2)):
    dfEnrlm[i]['EStotal'] = dfEnrlm[i].apply(lambda x: x['KDGN'] + x['GR_1']+x['GR_2']+ x['GR_3']+x['GR_4']+x['GR_4'], axis=1)
    dfEnrlm[i]['MStotal'] = dfEnrlm[i].apply(lambda x: x['GR_6']+x['GR_7']+x['GR_8'], axis=1)
    dfEnrlm[i]['HStotal'] = dfEnrlm[i].apply(lambda x: x['GR_9']+x['GR_10']+x['GR_11']+x['GR_12'], axis=1)

ETHNIC¶

Racial/ethnic designation. This field is coded as follows:

 Code 0 = Not reported
 Code 1 = American Indian or Alaska Native, Not Hispanic
 Code 2 = Asian, Not Hispanic
 Code 3 = Pacific Islander, Not Hispanic
 Code 4 = Filipino, Not Hispanic
 Code 5 = Hispanic or Latino
 Code 6 = African American, not Hispanic
 Code 7 = White, not Hispanic
 Code 9 = Two or More Races, Not Hispanic

dfEnrlm[9].head()

dfEnrlm[9]['ETHNIC'].drop_duplicates()

0     6
1     5
2     9
3     1
4     4
6     7
8     2
38    3
44    0
Name: ETHNIC, dtype: int64

Ethnicity Numbers 2009-2018

Code0=[] 
Code1=[] 
Code2=[] 
Code3=[] 
Code4=[] 
Code5=[] 
Code6=[] 
Code7=[] 
Code9=[]
CodeTotal=[]
for i in range (0,len(allFiles2)):
    Code0.append(dfEnrlm[i][dfEnrlm[i]['ETHNIC']==0]['ENR_TOTAL'].sum())
    Code1.append(dfEnrlm[i][dfEnrlm[i]['ETHNIC']==1]['ENR_TOTAL'].sum())
    Code2.append(dfEnrlm[i][dfEnrlm[i]['ETHNIC']==2]['ENR_TOTAL'].sum())
    Code3.append(dfEnrlm[i][dfEnrlm[i]['ETHNIC']==3]['ENR_TOTAL'].sum())
    Code4.append(dfEnrlm[i][dfEnrlm[i]['ETHNIC']==4]['ENR_TOTAL'].sum())
    Code5.append(dfEnrlm[i][dfEnrlm[i]['ETHNIC']==5]['ENR_TOTAL'].sum())
    Code6.append(dfEnrlm[i][dfEnrlm[i]['ETHNIC']==6]['ENR_TOTAL'].sum())
    Code7.append(dfEnrlm[i][dfEnrlm[i]['ETHNIC']==7]['ENR_TOTAL'].sum())
    Code9.append(dfEnrlm[i][dfEnrlm[i]['ETHNIC']==9]['ENR_TOTAL'].sum())
    CodeTotal.append(dfEnrlm[i]['ENR_TOTAL'].sum())

data={'Not Reoprted':Code0, 'American Indian or Alaska Native': Code1, 'Asian':Code2, 'Pacific Islander':Code3, 
     'Filipino':Code4, 'Hispanic or Latino':Code5, 'African American':Code6, 'White':Code7, 
      'Two or More Races':Code9, 'Total Enrolment':CodeTotal}
dfEthnic=pd.DataFrame(data)
dfEthnic['years']=[2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018]

Total Enrolment by Year

df=dfEthnic

trace_1 = go.Scatter(
                x=df.years,
                y=df['Total Enrolment'],
                name = "Total Enrolment",
                line = dict(color = 'blue'),
                opacity = 1)


data = [trace_1]

layout = dict(
    title = "California Schools Total Enrolment 2009-2018",
    xaxis = dict(
        range = ['2009','2018'])
)

fig = dict(data=data, layout=layout)
py.iplot(fig)

dfEthnic

It can be seen from table total number is diferent from totla numbers of ethicity so some of ethnicity is missing in 2009

df=dfEthnic

trace_1 = go.Scatter(
                x=df.years,
                y=df['Not Reoprted'],
                name = "Not Reoprted",
                line = dict(color = 'blue'),
                opacity = 1)
trace_2 = go.Scatter(
                x=df.years,
                y=df['American Indian or Alaska Native'],
                name = "American Indian or Alaska Native",
                line = dict(color = 'green'),
                opacity = 0.8)
trace_3 = go.Scatter(
                x=df.years,
                y=df['Asian'],
                name = "Asian",
                line = dict(color = 'red'),
                opacity = 0.8)
trace_4 = go.Scatter(
                x=df.years,
                y=df['Pacific Islander'],
                name = "Pacific Islander",
                line = dict(color = 'Cyan'),
                opacity = 0.5)
trace_5 = go.Scatter(
                x=df.years,
                y=df['Filipino'],
                name = "Filipino",
                line = dict(color = 'Brown'),
                opacity = 0.5)
trace_6 = go.Scatter(
                x=df.years,
                y=df['Hispanic or Latino'],
                name = "Hispanic or Latino",
                line = dict(color = 'Magenta'),
                opacity = 0.5)
trace_7 = go.Scatter(
                x=df.years,
                y=df['African American'],
                name = "African American",
                line = dict(color = 'Green'),
                opacity = 0.5)
trace_8 = go.Scatter(
                x=df.years,
                y=df['Two or More Races'],
                name = "Two or More Races",
                line = dict(color = 'black'),
                opacity = 0.5)
trace_9 = go.Scatter(
                x=df.years,
                y=df['White'],
                name = "White",
                line = dict(color = 'gray'),
                opacity = 0.5)

data = [trace_1,trace_2, trace_3, trace_4, trace_5, trace_6, trace_7, trace_8, trace_9]

layout = dict(
    title = "Ethnic Makeup of California Schools 2009-2018",
    xaxis = dict(
        range = ['2009','2018'])
)

fig = dict(data=data, layout=layout)
py.iplot(fig, filename = "Manually Set Range")

Ethnic Rate¶

dfEthnic.head(2)

dfEtnicRate=dfEthnic

dfEtnicRate['Not Reoprted']=dfEthnic['Not Reoprted']/dfEthnic['Total Enrolment']
dfEtnicRate['American Indian or Alaska Native']=dfEthnic['American Indian or Alaska Native']/dfEthnic['Total Enrolment']
dfEtnicRate['Asian']=dfEthnic['Asian']/dfEthnic['Total Enrolment']
dfEtnicRate['Pacific Islander']=dfEthnic['Pacific Islander']/dfEthnic['Total Enrolment']
dfEtnicRate['Filipino']=dfEthnic['Filipino']/dfEthnic['Total Enrolment']
dfEtnicRate['Hispanic or Latino']=dfEthnic['Hispanic or Latino']/dfEthnic['Total Enrolment']
dfEtnicRate['African American']=dfEthnic['African American']/dfEthnic['Total Enrolment']
dfEtnicRate['White']=dfEthnic['White']/dfEthnic['Total Enrolment']
dfEtnicRate['Two or More Races']=dfEthnic['Two or More Races']/dfEthnic['Total Enrolment']
dfEtnicRate.iloc[:,0:10].round(3)

trace_1 = go.Scatter(
                x=dfEtnicRate.years,
                y=dfEtnicRate['Not Reoprted'],
                name = "Not Reoprted",
                line = dict(color = 'blue'),
                opacity = 1)
trace_2 = go.Scatter(
                x=dfEtnicRate.years,
                y=dfEtnicRate['American Indian or Alaska Native'],
                name = "American Indian or Alaska Native",
                line = dict(color = 'green'),
                opacity = 0.8)
trace_3 = go.Scatter(
                x=dfEtnicRate.years,
                y=dfEtnicRate['Asian'],
                name = "Asian",
                line = dict(color = 'red'),
                opacity = 0.8)
trace_4 = go.Scatter(
                x=dfEtnicRate.years,
                y=dfEtnicRate['Pacific Islander'],
                name = "Pacific Islander",
                line = dict(color = 'Cyan'),
                opacity = 0.5)
trace_5 = go.Scatter(
                x=dfEtnicRate.years,
                y=dfEtnicRate['Filipino'],
                name = "Filipino",
                line = dict(color = 'Brown'),
                opacity = 0.5)
trace_6 = go.Scatter(
                x=dfEtnicRate.years,
                y=dfEtnicRate['Hispanic or Latino'],
                name = "Hispanic or Latino",
                line = dict(color = 'Magenta'),
                opacity = 0.5)
trace_7 = go.Scatter(
                x=dfEtnicRate.years,
                y=dfEtnicRate['African American'],
                name = "African American",
                line = dict(color = 'Green'),
                opacity = 0.5)
trace_8 = go.Scatter(
                x=dfEtnicRate.years,
                y=dfEtnicRate['Two or More Races'],
                name = "Two or More Races",
                line = dict(color = 'black'),
                opacity = 0.5)
trace_9 = go.Scatter(
                x=dfEtnicRate.years,
                y=dfEtnicRate['White'],
                name = "White",
                line = dict(color = 'gray'),
                opacity = 0.5)

data = [trace_1,trace_2, trace_3, trace_4, trace_5, trace_6, trace_7, trace_8, trace_9]

layout = dict(
    title = "Ethnic Makeup of California Schools 2009-2018",
    xaxis = dict(
        range = ['2009','2018'])
)

fig = dict(data=data, layout=layout)
py.iplot(fig, filename = "Manually Set Range")

Pie Chart Ethinicty¶

dfEnrlm[9][dfEnrlm[9]['ETHNIC']==0]['ENR_TOTAL'].sum()

58350

labels = ['Not reported','American Indian or Alaska Native','Asian','Pacific Islander','Filipino', 'Hispanic or Latino', 
          'African American', 'White', 'Two or More Races' ]
values = [dfEnrlm[9][dfEnrlm[9]['ETHNIC']==0]['ENR_TOTAL'].sum(),dfEnrlm[9][dfEnrlm[9]['ETHNIC']==1]['ENR_TOTAL'].sum(),
          dfEnrlm[9][dfEnrlm[9]['ETHNIC']==2]['ENR_TOTAL'].sum(), dfEnrlm[9][dfEnrlm[9]['ETHNIC']==3]['ENR_TOTAL'].sum(),
         dfEnrlm[9][dfEnrlm[9]['ETHNIC']==4]['ENR_TOTAL'].sum(), dfEnrlm[9][dfEnrlm[9]['ETHNIC']==5]['ENR_TOTAL'].sum(),
         dfEnrlm[9][dfEnrlm[9]['ETHNIC']==6]['ENR_TOTAL'].sum(),dfEnrlm[9][dfEnrlm[9]['ETHNIC']==7]['ENR_TOTAL'].sum(),
         dfEnrlm[9][dfEnrlm[9]['ETHNIC']==9]['ENR_TOTAL'].sum()]

trace = go.Pie(labels=labels, values=values)

py.iplot([trace], filename='basic_pie_chart')

labels = ['Not reported','American Indian or Alaska Native','Asian','Pacific Islander','Filipino', 'Hispanic or Latino', 
          'African American', 'White', 'Two or More Races' ]
values = [dfEnrlm[8][dfEnrlm[8]['ETHNIC']==0]['ENR_TOTAL'].sum(),dfEnrlm[8][dfEnrlm[8]['ETHNIC']==1]['ENR_TOTAL'].sum(),
          dfEnrlm[8][dfEnrlm[8]['ETHNIC']==2]['ENR_TOTAL'].sum(), dfEnrlm[8][dfEnrlm[8]['ETHNIC']==3]['ENR_TOTAL'].sum(),
         dfEnrlm[8][dfEnrlm[8]['ETHNIC']==4]['ENR_TOTAL'].sum(), dfEnrlm[8][dfEnrlm[8]['ETHNIC']==5]['ENR_TOTAL'].sum(),
         dfEnrlm[8][dfEnrlm[8]['ETHNIC']==6]['ENR_TOTAL'].sum(),dfEnrlm[8][dfEnrlm[8]['ETHNIC']==7]['ENR_TOTAL'].sum(),
         dfEnrlm[8][dfEnrlm[8]['ETHNIC']==9]['ENR_TOTAL'].sum()]

trace = go.Pie(labels=labels, values=values)

py.iplot([trace], filename='basic_pie_chart')

labels = ['Not reported','American Indian or Alaska Native','Asian','Pacific Islander','Filipino', 'Hispanic or Latino', 
          'African American', 'White', 'Two or More Races' ]
values = [dfEnrlm[7][dfEnrlm[7]['ETHNIC']==0]['ENR_TOTAL'].sum(),dfEnrlm[7][dfEnrlm[7]['ETHNIC']==1]['ENR_TOTAL'].sum(),
          dfEnrlm[7][dfEnrlm[7]['ETHNIC']==2]['ENR_TOTAL'].sum(), dfEnrlm[7][dfEnrlm[7]['ETHNIC']==3]['ENR_TOTAL'].sum(),
         dfEnrlm[7][dfEnrlm[7]['ETHNIC']==4]['ENR_TOTAL'].sum(), dfEnrlm[7][dfEnrlm[7]['ETHNIC']==5]['ENR_TOTAL'].sum(),
         dfEnrlm[7][dfEnrlm[7]['ETHNIC']==6]['ENR_TOTAL'].sum(),dfEnrlm[7][dfEnrlm[7]['ETHNIC']==7]['ENR_TOTAL'].sum(),
         dfEnrlm[7][dfEnrlm[7]['ETHNIC']==9]['ENR_TOTAL'].sum()]

trace = go.Pie(labels=labels, values=values)

py.iplot([trace], filename='basic_pie_chart')

labels = ['Not reported','American Indian or Alaska Native','Asian','Pacific Islander','Filipino', 'Hispanic or Latino', 
          'African American', 'White', 'Two or More Races' ]
values = [dfEnrlm[0][dfEnrlm[0]['ETHNIC']==0]['ENR_TOTAL'].sum(),dfEnrlm[0][dfEnrlm[0]['ETHNIC']==1]['ENR_TOTAL'].sum(),
          dfEnrlm[0][dfEnrlm[0]['ETHNIC']==2]['ENR_TOTAL'].sum(), dfEnrlm[0][dfEnrlm[0]['ETHNIC']==3]['ENR_TOTAL'].sum(),
         dfEnrlm[0][dfEnrlm[0]['ETHNIC']==4]['ENR_TOTAL'].sum(), dfEnrlm[0][dfEnrlm[0]['ETHNIC']==5]['ENR_TOTAL'].sum(),
         dfEnrlm[0][dfEnrlm[0]['ETHNIC']==6]['ENR_TOTAL'].sum(),dfEnrlm[0][dfEnrlm[0]['ETHNIC']==7]['ENR_TOTAL'].sum(),
         dfEnrlm[0][dfEnrlm[0]['ETHNIC']==9]['ENR_TOTAL'].sum()]

trace = go.Pie(labels=labels, values=values)

py.iplot([trace], filename='basic_pie_chart')

Maping to found county highest ethinicty¶

dfEnrlm[9].head(2) #2017-2018

County=pd.read_csv('Counties.csv')
len(County['county_name'])

58

Function to found county highest population ethinicity

#Function to found county highest population ethinicity for 2018     
HighestEthnicCode18=[]
for i in range (0,len(County['county_name'])):
    X=dfEnrlm[9][dfEnrlm[9].COUNTY==County['county_name'][i]] .groupby(['ETHNIC']).sum()['ENR_TOTAL'].idxmax()
    HighestEthnicCode18.append(X)

    #Function to found county highest population ethinicity for 2010
HighestEthnicCode10=[]
for i in range (0,len(County['county_name'])):
    Z=dfEnrlm[9][dfEnrlm[9].COUNTY==County['county_name'][i]] .groupby(['ETHNIC']).sum()['ENR_TOTAL'].idxmax()
    HighestEthnicCode10.append(Z)

df=pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/minoritymajority.csv')
df=df[df.STNAME=='California'][['FIPS', 'CTYNAME']]
df['HighestEthnicCode10']=HighestEthnicCode10
df['HighestEthnicCode18']=HighestEthnicCode18
df.head()

# ETHNIC
#Racial/ethnic designation. This field is coded as follows:

     #Code 0 = Not reported
     #Code 1 = American Indian or Alaska Native, Not Hispanic
     #Code 2 = Asian, Not Hispanic
     #Code 3 = Pacific Islander, Not Hispanic
     #Code 4 = Filipino, Not Hispanic
     #Code 5 = Hispanic or Latino
     #Code 6 = African American, not Hispanic
     #Code 7 = White, not Hispanic
     #Code 9 = Two or More Races, Not HispanicPercentile = df['HighestEthnicCode'].tolist()
scope = ['California']
values10 = df['HighestEthnicCode10'].tolist()
values18 = df['HighestEthnicCode18'].tolist()
fips = df['FIPS'].tolist()

colorscale = ['blue','green','red','Cyan','Brown']

fig10 = ff.create_choropleth(
    fips=fips, values=values10, scope=scope,
    colorscale=colorscale, round_legend_values=True,
    simplify_county=0, simplify_state=0,
    county_outline={'color': 'rgb(15, 15, 55)', 'width': 0.5},
    state_outline={'width': 1},
    legend_title='ethnic group code#',
    title='California Public Schools and Districts largest ethnic group by county (2009-2010)'
)
fig18 = ff.create_choropleth(
    fips=fips, values=values18, scope=scope,
    colorscale=colorscale, round_legend_values=True,
    simplify_county=0, simplify_state=0,
    county_outline={'color': 'rgb(15, 15, 55)', 'width': 0.5},
    state_outline={'width': 1},
    legend_title='ethnic group code#',
    title='California Public Schools and Districts largest ethnic group by county (2017-2018)'
)

py.iplot(fig10)

The draw time for this plot will be slow for clients without much RAM.

py.iplot(fig18)

The draw time for this plot will be slow for clients without much RAM.

	CAES	CAMS	CAHS	Total CA EL	years
0	955848	266557	293145	1513233	2009
1	945867	255342	286947	1468235	2010
2	704185	173223	195992	1057075	2011
3	931982	220549	256625	1387665	2012
4	919107	210510	237281	1346333	2013
5	964919	226959	242450	1413549	2014
6	950774	225014	235112	1392263	2015
7	937759	235064	227301	1373724	2016
8	882237	236686	223588	1332405	2017
9	832366	233573	217974	1271150	2018

	CAESrate	CAMSrate	CAHSrate	TOTAL_ELrate	years
0	0.342225	0.185763	0.145576	0.242039	2009
1	0.338649	0.182196	0.143691	0.237113	2010
2	0.250725	0.123471	0.098305	0.170030	2011
3	0.327751	0.157430	0.129630	0.223062	2012
4	0.321289	0.150347	0.120769	0.216209	2013
5	0.335196	0.162102	0.124186	0.226651	2014
6	0.329897	0.160640	0.120585	0.223279	2015
7	0.325508	0.167003	0.117105	0.220617	2016
8	0.310168	0.166478	0.115292	0.213930	2017
9	0.294748	0.162384	0.112130	0.204351	2018

	FIPS	STNAME	CTYNAME	TOTAL_EL	ENR_TOTAL
0	6001	California	Alameda County	46772	228356
1	6003	California	Alpine County	0	80
2	6005	California	Amador County	107	4147
3	6007	California	Butte County	2138	31760
4	6009	California	Calaveras County	195	5461

	FIPS	STNAME	CTYNAME	TOTAL_EL	ENR_TOTAL	percent
0	6001	California	Alameda County	46772	228356	20.482054
1	6003	California	Alpine County	0	80	0.000000
2	6005	California	Amador County	107	4147	2.580178
3	6007	California	Butte County	2138	31760	6.731738
4	6009	California	Calaveras County	195	5461	3.570775

	county_number	county_name
0	1	Alameda
1	2	Alpine
2	3	Amador

	CDS_CODE	COUNTY	DISTRICT	SCHOOL	ETHNIC	GENDER	KDGN	GR_1	GR_2	GR_3	...	GR_7	GR_8	UNGR_ELM	GR_9	GR_10	GR_11	GR_12	UNGR_SEC	ENR_TOTAL	ADULT
121062	36677100000001	NaN	NaN	NaN	5	M	0	0	0	0	...	0	0	0	0	0	0	1	0	1	0
121067	36678270000001	NaN	NaN	NaN	5	F	1	0	0	0	...	0	0	0	0	0	0	0	0	1	0

	FIPS	CTYNAME	HighestELLanguage10	HighestELLanguage18
2605	6001	Alameda County	Spanish	Spanish
2606	6003	Alpine County	Not EL	Not EL
2607	6005	Amador County	Spanish	Spanish
2608	6007	Butte County	Spanish	Spanish
2609	6009	Calaveras County	Spanish	Spanish

	CDS_CODE	COUNTY	DISTRICT	SCHOOL	ETHNIC	GENDER	KDGN	GR_1	GR_2	GR_3	...	ENR_TOTAL	EStotal	MStotal
0	10621666006266	Fresno	Fresno Unified	Gibson Elementary	6	M	2	2	5	2	...	25	21	9
1	10621666006266	Fresno	Fresno Unified	Gibson Elementary	5	F	18	11	14	12	...	98	77	19
2	10621666006266	Fresno	Fresno Unified	Gibson Elementary	9	F	2	2	2	1	...	13	13	1
3	10621666006266	Fresno	Fresno Unified	Gibson Elementary	1	F	0	0	0	1	...	2	1	1
4	10621666006266	Fresno	Fresno Unified	Gibson Elementary	4	F	1	0	0	0	...	1	1	0

	Not Reoprted	American Indian or Alaska Native	Asian	Pacific Islander	Filipino	Hispanic or Latino	African American	White	Two or More Races	Total Enrolment	years
0	0	46446	526403	39510	168112	3064614	454781	1741664	0	6252031	2009
1	112362	44919	527193	37026	156557	3118662	424462	1674066	96874	6192121	2010
2	67247	43552	529510	35787	159038	3197384	416098	1655598	112788	6217002	2011
3	49556	42539	535829	34944	157640	3236942	406089	1626507	130947	6220993	2012
4	44757	40414	536970	33958	154891	3282105	394695	1589393	149806	6226989	2013
5	39119	38616	542540	32821	151745	3321274	384291	1559113	167153	6236672	2014
6	38809	36755	545720	31513	158224	3344431	373280	1531088	175700	6235520	2015
7	38810	34704	551229	30436	156166	3360562	361752	1500932	192146	6226737	2016
8	46302	33369	559159	29384	153670	3378344	350338	1470499	207170	6228235	2017
9	58350	32500	569744	28920	151650	3376591	340841	1442388	219429	6220413	2018

	Not Reoprted	American Indian or Alaska Native	Asian	Pacific Islander	Filipino	Hispanic or Latino	African American	White	Two or More Races	Total Enrolment
0	0.000	0.007	0.084	0.006	0.027	0.490	0.073	0.279	0.000	6252031
1	0.018	0.007	0.085	0.006	0.025	0.504	0.069	0.270	0.016	6192121
2	0.011	0.007	0.085	0.006	0.026	0.514	0.067	0.266	0.018	6217002
3	0.008	0.007	0.086	0.006	0.025	0.520	0.065	0.261	0.021	6220993
4	0.007	0.006	0.086	0.005	0.025	0.527	0.063	0.255	0.024	6226989
5	0.006	0.006	0.087	0.005	0.024	0.533	0.062	0.250	0.027	6236672
6	0.006	0.006	0.088	0.005	0.025	0.536	0.060	0.246	0.028	6235520
7	0.006	0.006	0.089	0.005	0.025	0.540	0.058	0.241	0.031	6226737
8	0.007	0.005	0.090	0.005	0.025	0.542	0.056	0.236	0.033	6228235
9	0.009	0.005	0.092	0.005	0.024	0.543	0.055	0.232	0.035	6220413