Fork me on GitHub
California_Districts_Data

Insight into California's Public Schools and Districts Diversity

We will use the California Student and school 2009-2018 data to analyze California's Public Schools and Districts Success. Access to database is available via the following link: https://www.cde.ca.gov/ds/sd/sd/#esd

1. California's Public Schools and Districts Diversity

we will use the Enrollment Data and English Learner (EL) Data to analyze California's Public Schools and Districts Diversity

In [1]:
#!pip install plotly 
In [2]:
import matplotlib.pyplot
In [3]:
import pandas as pd
import numpy as np
import warnings
import plotly.plotly as py
import plotly.graph_objs as go
import plotly as plt
import seaborn as sns
import plotly.figure_factory as ff
import os
import glob
warnings.filterwarnings("ignore")
In [4]:
path = 'C:/Users/AKAYA/Desktop/DataIncubator/2019'
extension = 'txt'
os.chdir(path)
result = [i for i in glob.glob('*.{}'.format(extension))]
print(result)
['EL2009df.txt', 'EL2010df.txt', 'EL2011df.txt', 'EL2012df.txt', 'EL2013df.txt', 'EL2014df.txt', 'EL2015df.txt', 'EL2016df.txt', 'EL2017df.txt', 'EL2018df.txt', 'ELSCA2018.txt', 'Enrl2009dd.txt', 'Enrl2010dd.txt', 'Enrl2011dd.txt', 'Enrl2012dd.txt', 'Enrl2013dd.txt', 'Enrl2014dd.txt', 'Enrl2015dd.txt', 'Enrl2016dd.txt', 'Enrl2017dd.txt', 'Enrl2018dd.txt', 'ESLCA2018.txt', 'expenditures_readme.txt', 'Graduade20162017.txt', 'plot1.txt', 'Plot2.txt', 'web.txt']
In [5]:
#Function to load English Learner (EL) Data
path = 'C:/Users/AKAYA/Desktop/DataIncubator/2019' # use your path
allFiles1 = glob.glob(path + "/*df.txt")
frame = pd.DataFrame()
dfEL = []
for file_ in allFiles1:
    df = pd.read_table(file_,index_col=None, header=0)
    dfEL.append(df)
frame = pd.concat(dfEL)
In [6]:
len(frame)
Out[6]:
613107
In [7]:
# Function to check df shape
for i in range (0,len(allFiles1)):
    print(dfEL[i].shape)
(75685, 21)
(58909, 21)
(47748, 21)
(58676, 21)
(58648, 21)
(60775, 21)
(62042, 21)
(63250, 21)
(63714, 21)
(63660, 21)
In [8]:
#quck check to see na/null
for i in range (0,len(allFiles1)):
    print(dfEL[i].isna().sum())
CDS         0
COUNTY      0
DISTRICT    0
SCHOOL      0
LC          0
LANGUAGE    0
KDGN        0
GR_1        0
GR_2        0
GR_3        0
GR_4        0
GR_5        0
GR_6        0
GR_7        0
GR_8        0
GR_9        0
GR_10       0
GR_11       0
GR_12       0
GR_UNGR     0
TOTAL_EL    0
dtype: int64
CDS         0
COUNTY      0
DISTRICT    0
SCHOOL      0
LC          0
LANGUAGE    0
KDGN        0
GR_1        0
GR_2        0
GR_3        0
GR_4        0
GR_5        0
GR_6        0
GR_7        0
GR_8        0
GR_9        0
GR_10       0
GR_11       0
GR_12       0
GR_UNGR     0
TOTAL_EL    0
dtype: int64
CDS         0
COUNTY      0
DISTRICT    0
SCHOOL      0
LC          0
LANGUAGE    0
KDGN        0
GR_1        0
GR_2        0
GR_3        0
GR_4        0
GR_5        0
GR_6        0
GR_7        0
GR_8        0
GR_9        0
GR_10       0
GR_11       0
GR_12       0
GR_UNGR     0
TOTAL_EL    0
dtype: int64
CDS         0
COUNTY      0
DISTRICT    0
SCHOOL      0
LC          0
LANGUAGE    0
KDGN        0
GR_1        0
GR_2        0
GR_3        0
GR_4        0
GR_5        0
GR_6        0
GR_7        0
GR_8        0
GR_9        0
GR_10       0
GR_11       0
GR_12       0
UNGR        0
TOTAL_EL    0
dtype: int64
CDS         0
COUNTY      0
DISTRICT    0
SCHOOL      0
LC          0
LANGUAGE    0
KDGN        0
GR_1        0
GR_2        0
GR_3        0
GR_4        0
GR_5        0
GR_6        0
GR_7        0
GR_8        0
GR_9        0
GR_10       0
GR_11       0
GR_12       0
UNGR        0
TOTAL_EL    0
dtype: int64
CDS         0
COUNTY      0
DISTRICT    0
SCHOOL      0
LC          0
LANGUAGE    0
KDGN        0
GR_1        0
GR_2        0
GR_3        0
GR_4        0
GR_5        0
GR_6        0
GR_7        0
GR_8        0
GR_9        0
GR_10       0
GR_11       0
GR_12       0
UNGR        0
TOTAL_EL    0
dtype: int64
CDS         0
COUNTY      0
DISTRICT    0
SCHOOL      0
LC          0
LANGUAGE    0
KDGN        0
GR_1        0
GR_2        0
GR_3        0
GR_4        0
GR_5        0
GR_6        0
GR_7        0
GR_8        0
GR_9        0
GR_10       0
GR_11       0
GR_12       0
UNGR        0
TOTAL_EL    0
dtype: int64
CDS         0
COUNTY      0
DISTRICT    0
SCHOOL      0
LC          0
LANGUAGE    0
KDGN        0
GR_1        0
GR_2        0
GR_3        0
GR_4        0
GR_5        0
GR_6        0
GR_7        0
GR_8        0
GR_9        0
GR_10       0
GR_11       0
GR_12       0
UNGR        0
TOTAL_EL    0
dtype: int64
CDS         0
COUNTY      0
DISTRICT    0
SCHOOL      0
LC          0
LANGUAGE    0
KDGN        0
GR_1        0
GR_2        0
GR_3        0
GR_4        0
GR_5        0
GR_6        0
GR_7        0
GR_8        0
GR_9        0
GR_10       0
GR_11       0
GR_12       0
UNGR        0
TOTAL_EL    0
dtype: int64
CDS         0
COUNTY      0
DISTRICT    0
SCHOOL      0
LC          0
LANGUAGE    0
KDGN        0
GR_1        0
GR_2        0
GR_3        0
GR_4        0
GR_5        0
GR_6        0
GR_7        0
GR_8        0
GR_9        0
GR_10       0
GR_11       0
GR_12       0
UNGR        0
TOTAL_EL    0
dtype: int64
In [9]:
#Function to load Enrollment Data
path = 'C:/Users/AKAYA/Desktop/DataIncubator/2019' # use your path
allFiles2 = glob.glob(path + "/*dd.txt")
frame = pd.DataFrame()
dfEnrlm = []
for file_ in allFiles2:
    df = pd.read_table(file_,index_col=None, header=0)
    dfEnrlm.append(df)
frame = pd.concat(dfEnrlm)
In [10]:
# Function to check df shape
for i in range (0,len(allFiles2)):
    print(dfEnrlm[i].shape)
(121068, 23)
(124714, 23)
(126082, 23)
(127715, 23)
(127445, 23)
(127130, 23)
(129354, 23)
(129717, 23)
(129813, 23)
(130194, 23)
In [11]:
#quck check to see na/null
for i in range (0,len(allFiles2)):
    print(dfEnrlm[i].isna().sum())
CDS_CODE     0
COUNTY       2
DISTRICT     2
SCHOOL       2
ETHNIC       0
GENDER       0
KDGN         0
GR_1         0
GR_2         0
GR_3         0
GR_4         0
GR_5         0
GR_6         0
GR_7         0
GR_8         0
UNGR_ELM     0
GR_9         0
GR_10        0
GR_11        0
GR_12        0
UNGR_SEC     0
ENR_TOTAL    0
ADULT        0
dtype: int64
CDS_CODE     0
COUNTY       0
DISTRICT     0
SCHOOL       0
ETHNIC       0
GENDER       0
KDGN         0
GR_1         0
GR_2         0
GR_3         0
GR_4         0
GR_5         0
GR_6         0
GR_7         0
GR_8         0
UNGR_ELM     0
GR_9         0
GR_10        0
GR_11        0
GR_12        0
UNGR_SEC     0
ENR_TOTAL    0
ADULT        0
dtype: int64
CDS_CODE     0
COUNTY       0
DISTRICT     0
SCHOOL       0
ETHNIC       0
GENDER       0
KDGN         0
GR_1         0
GR_2         0
GR_3         0
GR_4         0
GR_5         0
GR_6         0
GR_7         0
GR_8         0
UNGR_ELM     0
GR_9         0
GR_10        0
GR_11        0
GR_12        0
UNGR_SEC     0
ENR_TOTAL    0
ADULT        0
dtype: int64
CDS_CODE     0
COUNTY       0
DISTRICT     0
SCHOOL       0
ETHNIC       0
GENDER       0
KDGN         0
GR_1         0
GR_2         0
GR_3         0
GR_4         0
GR_5         0
GR_6         0
GR_7         0
GR_8         0
UNGR_ELM     0
GR_9         0
GR_10        0
GR_11        0
GR_12        0
UNGR_SEC     0
ENR_TOTAL    0
ADULT        0
dtype: int64
CDS_CODE     0
COUNTY       0
DISTRICT     0
SCHOOL       0
ETHNIC       0
GENDER       0
KDGN         0
GR_1         0
GR_2         0
GR_3         0
GR_4         0
GR_5         0
GR_6         0
GR_7         0
GR_8         0
UNGR_ELM     0
GR_9         0
GR_10        0
GR_11        0
GR_12        0
UNGR_SEC     0
ENR_TOTAL    0
ADULT        0
dtype: int64
CDS_CODE     0
COUNTY       0
DISTRICT     0
SCHOOL       0
ETHNIC       0
GENDER       0
KDGN         0
GR_1         0
GR_2         0
GR_3         0
GR_4         0
GR_5         0
GR_6         0
GR_7         0
GR_8         0
UNGR_ELM     0
GR_9         0
GR_10        0
GR_11        0
GR_12        0
UNGR_SEC     0
ENR_TOTAL    0
ADULT        0
dtype: int64
CDS_CODE     0
COUNTY       0
DISTRICT     0
SCHOOL       0
ETHNIC       0
GENDER       0
KDGN         0
GR_1         0
GR_2         0
GR_3         0
GR_4         0
GR_5         0
GR_6         0
GR_7         0
GR_8         0
UNGR_ELM     0
GR_9         0
GR_10        0
GR_11        0
GR_12        0
UNGR_SEC     0
ENR_TOTAL    0
ADULT        0
dtype: int64
CDS_CODE     0
COUNTY       0
DISTRICT     0
SCHOOL       0
ETHNIC       0
GENDER       0
KDGN         0
GR_1         0
GR_2         0
GR_3         0
GR_4         0
GR_5         0
GR_6         0
GR_7         0
GR_8         0
UNGR_ELM     0
GR_9         0
GR_10        0
GR_11        0
GR_12        0
UNGR_SEC     0
ENR_TOTAL    0
ADULT        0
dtype: int64
CDS_CODE     0
COUNTY       0
DISTRICT     0
SCHOOL       0
ETHNIC       0
GENDER       0
KDGN         0
GR_1         0
GR_2         0
GR_3         0
GR_4         0
GR_5         0
GR_6         0
GR_7         0
GR_8         0
UNGR_ELM     0
GR_9         0
GR_10        0
GR_11        0
GR_12        0
UNGR_SEC     0
ENR_TOTAL    0
ADULT        0
dtype: int64
CDS_CODE     0
COUNTY       0
DISTRICT     0
SCHOOL       0
ETHNIC       0
GENDER       0
KDGN         0
GR_1         0
GR_2         0
GR_3         0
GR_4         0
GR_5         0
GR_6         0
GR_7         0
GR_8         0
UNGR_ELM     0
GR_9         0
GR_10        0
GR_11        0
GR_12        0
UNGR_SEC     0
ENR_TOTAL    0
ADULT        0
dtype: int64
In [12]:
#quck check to see na/null
for i in range (0,len(allFiles2)):
    print(dfEnrlm[i].isna().sum())
CDS_CODE     0
COUNTY       2
DISTRICT     2
SCHOOL       2
ETHNIC       0
GENDER       0
KDGN         0
GR_1         0
GR_2         0
GR_3         0
GR_4         0
GR_5         0
GR_6         0
GR_7         0
GR_8         0
UNGR_ELM     0
GR_9         0
GR_10        0
GR_11        0
GR_12        0
UNGR_SEC     0
ENR_TOTAL    0
ADULT        0
dtype: int64
CDS_CODE     0
COUNTY       0
DISTRICT     0
SCHOOL       0
ETHNIC       0
GENDER       0
KDGN         0
GR_1         0
GR_2         0
GR_3         0
GR_4         0
GR_5         0
GR_6         0
GR_7         0
GR_8         0
UNGR_ELM     0
GR_9         0
GR_10        0
GR_11        0
GR_12        0
UNGR_SEC     0
ENR_TOTAL    0
ADULT        0
dtype: int64
CDS_CODE     0
COUNTY       0
DISTRICT     0
SCHOOL       0
ETHNIC       0
GENDER       0
KDGN         0
GR_1         0
GR_2         0
GR_3         0
GR_4         0
GR_5         0
GR_6         0
GR_7         0
GR_8         0
UNGR_ELM     0
GR_9         0
GR_10        0
GR_11        0
GR_12        0
UNGR_SEC     0
ENR_TOTAL    0
ADULT        0
dtype: int64
CDS_CODE     0
COUNTY       0
DISTRICT     0
SCHOOL       0
ETHNIC       0
GENDER       0
KDGN         0
GR_1         0
GR_2         0
GR_3         0
GR_4         0
GR_5         0
GR_6         0
GR_7         0
GR_8         0
UNGR_ELM     0
GR_9         0
GR_10        0
GR_11        0
GR_12        0
UNGR_SEC     0
ENR_TOTAL    0
ADULT        0
dtype: int64
CDS_CODE     0
COUNTY       0
DISTRICT     0
SCHOOL       0
ETHNIC       0
GENDER       0
KDGN         0
GR_1         0
GR_2         0
GR_3         0
GR_4         0
GR_5         0
GR_6         0
GR_7         0
GR_8         0
UNGR_ELM     0
GR_9         0
GR_10        0
GR_11        0
GR_12        0
UNGR_SEC     0
ENR_TOTAL    0
ADULT        0
dtype: int64
CDS_CODE     0
COUNTY       0
DISTRICT     0
SCHOOL       0
ETHNIC       0
GENDER       0
KDGN         0
GR_1         0
GR_2         0
GR_3         0
GR_4         0
GR_5         0
GR_6         0
GR_7         0
GR_8         0
UNGR_ELM     0
GR_9         0
GR_10        0
GR_11        0
GR_12        0
UNGR_SEC     0
ENR_TOTAL    0
ADULT        0
dtype: int64
CDS_CODE     0
COUNTY       0
DISTRICT     0
SCHOOL       0
ETHNIC       0
GENDER       0
KDGN         0
GR_1         0
GR_2         0
GR_3         0
GR_4         0
GR_5         0
GR_6         0
GR_7         0
GR_8         0
UNGR_ELM     0
GR_9         0
GR_10        0
GR_11        0
GR_12        0
UNGR_SEC     0
ENR_TOTAL    0
ADULT        0
dtype: int64
CDS_CODE     0
COUNTY       0
DISTRICT     0
SCHOOL       0
ETHNIC       0
GENDER       0
KDGN         0
GR_1         0
GR_2         0
GR_3         0
GR_4         0
GR_5         0
GR_6         0
GR_7         0
GR_8         0
UNGR_ELM     0
GR_9         0
GR_10        0
GR_11        0
GR_12        0
UNGR_SEC     0
ENR_TOTAL    0
ADULT        0
dtype: int64
CDS_CODE     0
COUNTY       0
DISTRICT     0
SCHOOL       0
ETHNIC       0
GENDER       0
KDGN         0
GR_1         0
GR_2         0
GR_3         0
GR_4         0
GR_5         0
GR_6         0
GR_7         0
GR_8         0
UNGR_ELM     0
GR_9         0
GR_10        0
GR_11        0
GR_12        0
UNGR_SEC     0
ENR_TOTAL    0
ADULT        0
dtype: int64
CDS_CODE     0
COUNTY       0
DISTRICT     0
SCHOOL       0
ETHNIC       0
GENDER       0
KDGN         0
GR_1         0
GR_2         0
GR_3         0
GR_4         0
GR_5         0
GR_6         0
GR_7         0
GR_8         0
UNGR_ELM     0
GR_9         0
GR_10        0
GR_11        0
GR_12        0
UNGR_SEC     0
ENR_TOTAL    0
ADULT        0
dtype: int64

Upon close inspection we find that there 2 missing values in The COUNTY, DISTRICT and SCHOOL columns. We will check missing values by looking at other columns to see missing values impact on the data.

In [13]:
dfEnrlm[0][dfEnrlm[0].COUNTY.isna()]
Out[13]:
CDS_CODE COUNTY DISTRICT SCHOOL ETHNIC GENDER KDGN GR_1 GR_2 GR_3 ... GR_7 GR_8 UNGR_ELM GR_9 GR_10 GR_11 GR_12 UNGR_SEC ENR_TOTAL ADULT
121062 36677100000001 NaN NaN NaN 5 M 0 0 0 0 ... 0 0 0 0 0 0 1 0 1 0
121067 36678270000001 NaN NaN NaN 5 F 1 0 0 0 ... 0 0 0 0 0 0 0 0 1 0

2 rows × 23 columns

The only two students' COUNTY, DISTRICT and SCHOOL information is missing so we will remove it during county analysis

English Learner Data Analaysis

Function to add ESTotal, MSTotal, HSTotal

In [14]:
#function to add df the total  number and percentile of EL at Eelementarty, Middle and High School level
for i in range (0,len(allFiles1)):
    dfEL[i]['EStotal'] = dfEL[i].apply(lambda x: x['KDGN'] + x['GR_1']+x['GR_2']+ x['GR_3']+x['GR_4']+x['GR_4'], axis=1)
    dfEL[i]['MStotal'] = dfEL[i].apply(lambda x: x['GR_6']+x['GR_7']+x['GR_8'], axis=1)
    dfEL[i]['HStotal'] = dfEL[i].apply(lambda x: x['GR_9']+x['GR_10']+x['GR_11']+x['GR_12'], axis=1)
In [15]:
#function to add df the total  number of Enrolment at Eelementarty, Middle and High School level
for i in range (0,len(allFiles2)):
    dfEnrlm[i]['EStotal'] = dfEnrlm[i].apply(lambda x: x['KDGN'] + x['GR_1']+x['GR_2']+ x['GR_3']+x['GR_4']+x['GR_4'], axis=1)
    dfEnrlm[i]['MStotal'] = dfEnrlm[i].apply(lambda x: x['GR_6']+x['GR_7']+x['GR_8'], axis=1)
    dfEnrlm[i]['HStotal'] = dfEnrlm[i].apply(lambda x: x['GR_9']+x['GR_10']+x['GR_11']+x['GR_12'], axis=1)
In [16]:
for i in range (0,len(allFiles1)):
    dfEL[i]['ESpercent']= dfEL[i]['EStotal']/dfEnrlm[i]['EStotal'].sum()
    dfEL[i]['MSpercent']=dfEL[i]['MStotal']/dfEnrlm[i]['MStotal'].sum()
    dfEL[i]['HSpercent']=dfEL[i]['HStotal']/dfEnrlm[i]['HStotal'].sum()

Number for EL in California

In [17]:
CAES=[]
CAHS=[]
CAMS=[]
TOTAL_EL=[]
for i in range (0,len(allFiles1)):
    CAES.append(dfEL[i]['EStotal'].sum())
    CAMS.append(dfEL[i]['MStotal'].sum())
    CAHS.append(dfEL[i]['HStotal'].sum())
    TOTAL_EL.append(dfEL[i]['TOTAL_EL'].sum())
In [18]:
data = {'CAES':CAES,'CAMS':CAMS,'CAHS':CAHS,'Total CA EL':TOTAL_EL}
dftotal=pd.DataFrame(data)
dftotal['years']=[2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018]
dftotal
Out[18]:
CAES CAMS CAHS Total CA EL years
0 955848 266557 293145 1513233 2009
1 945867 255342 286947 1468235 2010
2 704185 173223 195992 1057075 2011
3 931982 220549 256625 1387665 2012
4 919107 210510 237281 1346333 2013
5 964919 226959 242450 1413549 2014
6 950774 225014 235112 1392263 2015
7 937759 235064 227301 1373724 2016
8 882237 236686 223588 1332405 2017
9 832366 233573 217974 1271150 2018
In [19]:
trace_1 = go.Scatter(
                x=dftotal.years,
                y=dftotal['CAES'],
                name = "CA EL Elementary Sch.",
                line = dict(color = 'blue'),
                opacity = 1)
trace_2 = go.Scatter(
                x=dftotal.years,
                y=dftotal['CAMS'],
                name = "CA EL Middle Sch.",
                line = dict(color = 'green'),
                opacity = 0.8)
trace_3 = go.Scatter(
                x=dftotal.years,
                y=dftotal['CAHS'],
                name = "CA EL High Sch.",
                line = dict(color = 'red'),
                opacity = 0.8)
trace_4 = go.Scatter(
                x=dftotal.years,
                y=dftotal['Total CA EL'],
                name = "Total CA EL",
                line = dict(color = 'black'),
                opacity = 0.5)
data = [trace_1,trace_2, trace_3, trace_4]

layout = dict(
    title = "Number of EL California",
    xaxis = dict(
        range = ['2009','2018'])
)

fig = dict(data=data, layout=layout)
py.iplot(fig)
Out[19]:
In [20]:
CAESrate=[]
CAHSrate=[]
CAMSrate=[]
TOTAL_ELrate=[]
for i in range (0,len(allFiles1)):
    CAESrate.append(dfEL[i]['ESpercent'].sum())
    CAMSrate.append(dfEL[i]['MSpercent'].sum())
    CAHSrate.append(dfEL[i]['HSpercent'].sum())
    TOTAL_ELrate.append(dfEL[i]['TOTAL_EL'].sum()/dfEnrlm[i]['ENR_TOTAL'].sum())
    data = {'CAESrate':CAESrate,'CAMSrate':CAMSrate,'CAHSrate':CAHSrate,'TOTAL_ELrate':TOTAL_ELrate}
dfrate=pd.DataFrame(data)
dfrate['years']=[2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018]
dfrate
Out[20]:
CAESrate CAMSrate CAHSrate TOTAL_ELrate years
0 0.342225 0.185763 0.145576 0.242039 2009
1 0.338649 0.182196 0.143691 0.237113 2010
2 0.250725 0.123471 0.098305 0.170030 2011
3 0.327751 0.157430 0.129630 0.223062 2012
4 0.321289 0.150347 0.120769 0.216209 2013
5 0.335196 0.162102 0.124186 0.226651 2014
6 0.329897 0.160640 0.120585 0.223279 2015
7 0.325508 0.167003 0.117105 0.220617 2016
8 0.310168 0.166478 0.115292 0.213930 2017
9 0.294748 0.162384 0.112130 0.204351 2018
In [21]:
trace_1 = go.Scatter(
                x=dfrate.years,
                y=dfrate['CAESrate'],
                name = "CA EL ratet Elementary Sch.",
                line = dict(color = 'blue'),
                opacity = 1)
trace_2 = go.Scatter(
                x=dfrate.years,
                y=dfrate['CAMSrate'],
                name = "CA ELrate Middle Sch.",
                line = dict(color = 'green'),
                opacity = 0.8)
trace_3 = go.Scatter(
                x=dfrate.years,
                y=dfrate['CAHSrate'],
                name = "CA EL rate High Sch.",
                line = dict(color = 'red'),
                opacity = 0.8)
trace_4 = go.Scatter(
                x=dfrate.years,
                y=dfrate['TOTAL_ELrate'],
                name = "CA EL total rate",
                line = dict(color = 'black'),
                opacity = 0.5)
data = [trace_1,trace_2, trace_3, trace_4]

layout = dict(
    title = "EL rate California",
    xaxis = dict(
        range = ['2009','2018'])
)

fig = dict(data=data, layout=layout)
py.iplot(fig, filename = "Manually Set Range")
Out[21]:

save the data for EL maping

In [22]:
dfmap = pd.merge(pd.DataFrame(dfEL[9].groupby(['COUNTY']).sum()['TOTAL_EL']),
                  pd.DataFrame(dfEnrlm[9].groupby(['COUNTY']).sum()['ENR_TOTAL']),how='right', 
                                on='COUNTY')
dfmap=dfmap.fillna(0)
In [23]:
df_map = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/minoritymajority.csv')
df_map=df_map.iloc[:,0:3]
df_map=df_map[df_map.STNAME=='California']
df_map=df_map.reset_index(drop=True)
In [24]:
#dfmap.to_csv('dfmap.csv', index=False)
#df_map.to_csv('df_map.csv', index=False)

EL MAP

In [25]:
df=pd.read_csv('formap.csv')
df.head()
Out[25]:
FIPS STNAME CTYNAME TOTAL_EL ENR_TOTAL
0 6001 California Alameda County 46772 228356
1 6003 California Alpine County 0 80
2 6005 California Amador County 107 4147
3 6007 California Butte County 2138 31760
4 6009 California Calaveras County 195 5461
In [26]:
df['percent']=df['TOTAL_EL']*100/df['ENR_TOTAL']
df.head()
Out[26]:
FIPS STNAME CTYNAME TOTAL_EL ENR_TOTAL percent
0 6001 California Alameda County 46772 228356 20.482054
1 6003 California Alpine County 0 80 0.000000
2 6005 California Amador County 107 4147 2.580178
3 6007 California Butte County 2138 31760 6.731738
4 6009 California Calaveras County 195 5461 3.570775
In [27]:
Percentile = df['percent'].tolist()
fips = df['FIPS'].tolist()

colorscale = ["#c6dbef", "#b3d2e9","#9ecae1","#85bcdb","#3082be","#2171b5","#1361a9","#08519c","#0b4083","#08306b"]

fig = ff.create_choropleth(
    fips=fips, values=Percentile, scope=['CA'],
    binning_endpoints=[5, 10, 15, 20, 25, 30, 35, 40], colorscale=colorscale,
    county_outline={'color': 'blue', 'width': 0.3}, round_legend_values=True,
    legend_title='Percent%', title='Percentage of English learners by County'
)
py.iplot(fig)
Out[27]:

EL The Home Language Map¶

In [28]:
County=pd.read_csv('Counties.csv')
County.head(3)
Out[28]:
county_number county_name
0 1 Alameda
1 2 Alpine
2 3 Amador
In [29]:
#In the Alpine there is EL so we will add Alpine as 0 manually and reindex the df to avoid effect to function
#County=County.drop(County.index[[1]]).reset_index(drop=True)
In [30]:
#Function to found county highest EL language for 2010. We will tart from 2nd one due to Alpine there is no EL  
HighestELLanguage10=['Spanish','Not EL']
for i in range (2,len(County['county_name'])):
    X=dfEL[1][dfEL[1].COUNTY==County['county_name'][i]] .groupby(['LANGUAGE']).sum()['TOTAL_EL'].idxmax()
    HighestELLanguage10.append(X)

    #Function to found county highest EL language for 2018
HighestELLanguage18=['Spanish','Not EL']
for i in range (2,len(County['county_name'])):
    Z=dfEL[9][dfEL[9].COUNTY==County['county_name'][i]] .groupby(['LANGUAGE']).sum()['TOTAL_EL'].idxmax()
    HighestELLanguage18.append(Z)
In [31]:
df=pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/minoritymajority.csv')
df=df[df.STNAME=='California'][['FIPS', 'CTYNAME']]
df['HighestELLanguage10']=HighestELLanguage10
df['HighestELLanguage18']=HighestELLanguage18
df.head()
Out[31]:
FIPS CTYNAME HighestELLanguage10 HighestELLanguage18
2605 6001 Alameda County Spanish Spanish
2606 6003 Alpine County Not EL Not EL
2607 6005 Amador County Spanish Spanish
2608 6007 Butte County Spanish Spanish
2609 6009 Calaveras County Spanish Spanish
In [32]:
df['HighestELLanguage10'].drop_duplicates()
Out[32]:
2605    Spanish
2606     Not EL
Name: HighestELLanguage10, dtype: object

In the California highest English Learners are speaking as first language is spanish

In [33]:
df['HighestELLanguage18'].drop_duplicates()
Out[33]:
2605    Spanish
2606     Not EL
2658      Hmong
Name: HighestELLanguage18, dtype: object

I would like to put EL 2018 in the map to see region so I will asign 0 to Not EL, 1 to Spanish and 2 to Hmog

Code 0="Not EL"
Code 1="Hmong"
Code 2="Spanish"
In [34]:
#One-Hot encoding:The basic strategy is to convert each category value into a column and assign a 1, 2, 3, 4, and 5. This has the benefit of not weighting a value improperly.
OneHot = {'HighestELLanguage18':     {"Spanish": 2, "Not EL": 0, "Hmong":1}}
df.replace(OneHot, inplace=True)
df['HighestELLanguage18'].drop_duplicates()
Out[34]:
2605    2
2606    0
2658    1
Name: HighestELLanguage18, dtype: int64
In [35]:
# ETHNIC
#Racial/ethnic designation. This field is coded as follows:

     #Code 0="Not EL"
     #Code 1="Hmong"
     #Code 2="Spanish"

scope = ['California']
values = df['HighestELLanguage18'].tolist()
fips = df['FIPS'].tolist()
colorscale = ['blue','green','red','Cyan','Brown']

fig = ff.create_choropleth(
    fips=fips, values=values, scope=scope,
    colorscale=colorscale, round_legend_values=True,
    simplify_county=0, simplify_state=0,
    county_outline={'color': 'rgb(15, 15, 55)', 'width': 0.5},
    state_outline={'width': 1},
    legend_title='The Home Language code#',
    title='English Learners Home Language (20017-2018)')

py.iplot(fig)
The draw time for this plot will be slow for clients without much RAM.
Out[35]:

Function to add EStotal, MStotal and HStotal to Enrolment df

In [37]:
#function to add df the total  number of Enrolment at Eelementarty, Middle and High School level
for i in range (0,len(allFiles2)):
    dfEnrlm[i]['EStotal'] = dfEnrlm[i].apply(lambda x: x['KDGN'] + x['GR_1']+x['GR_2']+ x['GR_3']+x['GR_4']+x['GR_4'], axis=1)
    dfEnrlm[i]['MStotal'] = dfEnrlm[i].apply(lambda x: x['GR_6']+x['GR_7']+x['GR_8'], axis=1)
    dfEnrlm[i]['HStotal'] = dfEnrlm[i].apply(lambda x: x['GR_9']+x['GR_10']+x['GR_11']+x['GR_12'], axis=1)

ETHNIC

Racial/ethnic designation. This field is coded as follows:

 Code 0 = Not reported
 Code 1 = American Indian or Alaska Native, Not Hispanic
 Code 2 = Asian, Not Hispanic
 Code 3 = Pacific Islander, Not Hispanic
 Code 4 = Filipino, Not Hispanic
 Code 5 = Hispanic or Latino
 Code 6 = African American, not Hispanic
 Code 7 = White, not Hispanic
 Code 9 = Two or More Races, Not Hispanic
In [38]:
dfEnrlm[9].head()
Out[38]:
CDS_CODE COUNTY DISTRICT SCHOOL ETHNIC GENDER KDGN GR_1 GR_2 GR_3 ... GR_9 GR_10 GR_11 GR_12 UNGR_SEC ENR_TOTAL ADULT EStotal MStotal HStotal
0 10621666006266 Fresno Fresno Unified Gibson Elementary 6 M 2 2 5 2 ... 0 0 0 0 0 25 0 21 9 0
1 10621666006266 Fresno Fresno Unified Gibson Elementary 5 F 18 11 14 12 ... 0 0 0 0 0 98 0 77 19 0
2 10621666006266 Fresno Fresno Unified Gibson Elementary 9 F 2 2 2 1 ... 0 0 0 0 0 13 0 13 1 0
3 10621666006266 Fresno Fresno Unified Gibson Elementary 1 F 0 0 0 1 ... 0 0 0 0 0 2 0 1 1 0
4 10621666006266 Fresno Fresno Unified Gibson Elementary 4 F 1 0 0 0 ... 0 0 0 0 0 1 0 1 0 0

5 rows × 26 columns

In [39]:
dfEnrlm[9]['ETHNIC'].drop_duplicates()
Out[39]:
0     6
1     5
2     9
3     1
4     4
6     7
8     2
38    3
44    0
Name: ETHNIC, dtype: int64

Ethnicity Numbers 2009-2018

In [40]:
Code0=[] 
Code1=[] 
Code2=[] 
Code3=[] 
Code4=[] 
Code5=[] 
Code6=[] 
Code7=[] 
Code9=[]
CodeTotal=[]
for i in range (0,len(allFiles2)):
    Code0.append(dfEnrlm[i][dfEnrlm[i]['ETHNIC']==0]['ENR_TOTAL'].sum())
    Code1.append(dfEnrlm[i][dfEnrlm[i]['ETHNIC']==1]['ENR_TOTAL'].sum())
    Code2.append(dfEnrlm[i][dfEnrlm[i]['ETHNIC']==2]['ENR_TOTAL'].sum())
    Code3.append(dfEnrlm[i][dfEnrlm[i]['ETHNIC']==3]['ENR_TOTAL'].sum())
    Code4.append(dfEnrlm[i][dfEnrlm[i]['ETHNIC']==4]['ENR_TOTAL'].sum())
    Code5.append(dfEnrlm[i][dfEnrlm[i]['ETHNIC']==5]['ENR_TOTAL'].sum())
    Code6.append(dfEnrlm[i][dfEnrlm[i]['ETHNIC']==6]['ENR_TOTAL'].sum())
    Code7.append(dfEnrlm[i][dfEnrlm[i]['ETHNIC']==7]['ENR_TOTAL'].sum())
    Code9.append(dfEnrlm[i][dfEnrlm[i]['ETHNIC']==9]['ENR_TOTAL'].sum())
    CodeTotal.append(dfEnrlm[i]['ENR_TOTAL'].sum())
In [41]:
data={'Not Reoprted':Code0, 'American Indian or Alaska Native': Code1, 'Asian':Code2, 'Pacific Islander':Code3, 
     'Filipino':Code4, 'Hispanic or Latino':Code5, 'African American':Code6, 'White':Code7, 
      'Two or More Races':Code9, 'Total Enrolment':CodeTotal}
dfEthnic=pd.DataFrame(data)
dfEthnic['years']=[2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018]

Total Enrolment by Year

In [42]:
df=dfEthnic

trace_1 = go.Scatter(
                x=df.years,
                y=df['Total Enrolment'],
                name = "Total Enrolment",
                line = dict(color = 'blue'),
                opacity = 1)


data = [trace_1]

layout = dict(
    title = "California Schools Total Enrolment 2009-2018",
    xaxis = dict(
        range = ['2009','2018'])
)

fig = dict(data=data, layout=layout)
py.iplot(fig)
Out[42]:
In [44]:
dfEthnic
Out[44]:
Not Reoprted American Indian or Alaska Native Asian Pacific Islander Filipino Hispanic or Latino African American White Two or More Races Total Enrolment years
0 0 46446 526403 39510 168112 3064614 454781 1741664 0 6252031 2009
1 112362 44919 527193 37026 156557 3118662 424462 1674066 96874 6192121 2010
2 67247 43552 529510 35787 159038 3197384 416098 1655598 112788 6217002 2011
3 49556 42539 535829 34944 157640 3236942 406089 1626507 130947 6220993 2012
4 44757 40414 536970 33958 154891 3282105 394695 1589393 149806 6226989 2013
5 39119 38616 542540 32821 151745 3321274 384291 1559113 167153 6236672 2014
6 38809 36755 545720 31513 158224 3344431 373280 1531088 175700 6235520 2015
7 38810 34704 551229 30436 156166 3360562 361752 1500932 192146 6226737 2016
8 46302 33369 559159 29384 153670 3378344 350338 1470499 207170 6228235 2017
9 58350 32500 569744 28920 151650 3376591 340841 1442388 219429 6220413 2018

It can be seen from table total number is diferent from totla numbers of ethicity so some of ethnicity is missing in 2009

In [45]:
df=dfEthnic

trace_1 = go.Scatter(
                x=df.years,
                y=df['Not Reoprted'],
                name = "Not Reoprted",
                line = dict(color = 'blue'),
                opacity = 1)
trace_2 = go.Scatter(
                x=df.years,
                y=df['American Indian or Alaska Native'],
                name = "American Indian or Alaska Native",
                line = dict(color = 'green'),
                opacity = 0.8)
trace_3 = go.Scatter(
                x=df.years,
                y=df['Asian'],
                name = "Asian",
                line = dict(color = 'red'),
                opacity = 0.8)
trace_4 = go.Scatter(
                x=df.years,
                y=df['Pacific Islander'],
                name = "Pacific Islander",
                line = dict(color = 'Cyan'),
                opacity = 0.5)
trace_5 = go.Scatter(
                x=df.years,
                y=df['Filipino'],
                name = "Filipino",
                line = dict(color = 'Brown'),
                opacity = 0.5)
trace_6 = go.Scatter(
                x=df.years,
                y=df['Hispanic or Latino'],
                name = "Hispanic or Latino",
                line = dict(color = 'Magenta'),
                opacity = 0.5)
trace_7 = go.Scatter(
                x=df.years,
                y=df['African American'],
                name = "African American",
                line = dict(color = 'Green'),
                opacity = 0.5)
trace_8 = go.Scatter(
                x=df.years,
                y=df['Two or More Races'],
                name = "Two or More Races",
                line = dict(color = 'black'),
                opacity = 0.5)
trace_9 = go.Scatter(
                x=df.years,
                y=df['White'],
                name = "White",
                line = dict(color = 'gray'),
                opacity = 0.5)

data = [trace_1,trace_2, trace_3, trace_4, trace_5, trace_6, trace_7, trace_8, trace_9]

layout = dict(
    title = "Ethnic Makeup of California Schools 2009-2018",
    xaxis = dict(
        range = ['2009','2018'])
)

fig = dict(data=data, layout=layout)
py.iplot(fig, filename = "Manually Set Range")
Out[45]:

Ethnic Rate

In [46]:
dfEthnic.head(2)
Out[46]:
Not Reoprted American Indian or Alaska Native Asian Pacific Islander Filipino Hispanic or Latino African American White Two or More Races Total Enrolment years
0 0 46446 526403 39510 168112 3064614 454781 1741664 0 6252031 2009
1 112362 44919 527193 37026 156557 3118662 424462 1674066 96874 6192121 2010
In [47]:
dfEtnicRate=dfEthnic

dfEtnicRate['Not Reoprted']=dfEthnic['Not Reoprted']/dfEthnic['Total Enrolment']
dfEtnicRate['American Indian or Alaska Native']=dfEthnic['American Indian or Alaska Native']/dfEthnic['Total Enrolment']
dfEtnicRate['Asian']=dfEthnic['Asian']/dfEthnic['Total Enrolment']
dfEtnicRate['Pacific Islander']=dfEthnic['Pacific Islander']/dfEthnic['Total Enrolment']
dfEtnicRate['Filipino']=dfEthnic['Filipino']/dfEthnic['Total Enrolment']
dfEtnicRate['Hispanic or Latino']=dfEthnic['Hispanic or Latino']/dfEthnic['Total Enrolment']
dfEtnicRate['African American']=dfEthnic['African American']/dfEthnic['Total Enrolment']
dfEtnicRate['White']=dfEthnic['White']/dfEthnic['Total Enrolment']
dfEtnicRate['Two or More Races']=dfEthnic['Two or More Races']/dfEthnic['Total Enrolment']
dfEtnicRate.iloc[:,0:10].round(3)
Out[47]:
Not Reoprted American Indian or Alaska Native Asian Pacific Islander Filipino Hispanic or Latino African American White Two or More Races Total Enrolment
0 0.000 0.007 0.084 0.006 0.027 0.490 0.073 0.279 0.000 6252031
1 0.018 0.007 0.085 0.006 0.025 0.504 0.069 0.270 0.016 6192121
2 0.011 0.007 0.085 0.006 0.026 0.514 0.067 0.266 0.018 6217002
3 0.008 0.007 0.086 0.006 0.025 0.520 0.065 0.261 0.021 6220993
4 0.007 0.006 0.086 0.005 0.025 0.527 0.063 0.255 0.024 6226989
5 0.006 0.006 0.087 0.005 0.024 0.533 0.062 0.250 0.027 6236672
6 0.006 0.006 0.088 0.005 0.025 0.536 0.060 0.246 0.028 6235520
7 0.006 0.006 0.089 0.005 0.025 0.540 0.058 0.241 0.031 6226737
8 0.007 0.005 0.090 0.005 0.025 0.542 0.056 0.236 0.033 6228235
9 0.009 0.005 0.092 0.005 0.024 0.543 0.055 0.232 0.035 6220413
In [48]:
trace_1 = go.Scatter(
                x=dfEtnicRate.years,
                y=dfEtnicRate['Not Reoprted'],
                name = "Not Reoprted",
                line = dict(color = 'blue'),
                opacity = 1)
trace_2 = go.Scatter(
                x=dfEtnicRate.years,
                y=dfEtnicRate['American Indian or Alaska Native'],
                name = "American Indian or Alaska Native",
                line = dict(color = 'green'),
                opacity = 0.8)
trace_3 = go.Scatter(
                x=dfEtnicRate.years,
                y=dfEtnicRate['Asian'],
                name = "Asian",
                line = dict(color = 'red'),
                opacity = 0.8)
trace_4 = go.Scatter(
                x=dfEtnicRate.years,
                y=dfEtnicRate['Pacific Islander'],
                name = "Pacific Islander",
                line = dict(color = 'Cyan'),
                opacity = 0.5)
trace_5 = go.Scatter(
                x=dfEtnicRate.years,
                y=dfEtnicRate['Filipino'],
                name = "Filipino",
                line = dict(color = 'Brown'),
                opacity = 0.5)
trace_6 = go.Scatter(
                x=dfEtnicRate.years,
                y=dfEtnicRate['Hispanic or Latino'],
                name = "Hispanic or Latino",
                line = dict(color = 'Magenta'),
                opacity = 0.5)
trace_7 = go.Scatter(
                x=dfEtnicRate.years,
                y=dfEtnicRate['African American'],
                name = "African American",
                line = dict(color = 'Green'),
                opacity = 0.5)
trace_8 = go.Scatter(
                x=dfEtnicRate.years,
                y=dfEtnicRate['Two or More Races'],
                name = "Two or More Races",
                line = dict(color = 'black'),
                opacity = 0.5)
trace_9 = go.Scatter(
                x=dfEtnicRate.years,
                y=dfEtnicRate['White'],
                name = "White",
                line = dict(color = 'gray'),
                opacity = 0.5)

data = [trace_1,trace_2, trace_3, trace_4, trace_5, trace_6, trace_7, trace_8, trace_9]

layout = dict(
    title = "Ethnic Makeup of California Schools 2009-2018",
    xaxis = dict(
        range = ['2009','2018'])
)

fig = dict(data=data, layout=layout)
py.iplot(fig, filename = "Manually Set Range")
Out[48]:

Pie Chart Ethinicty

In [49]:
dfEnrlm[9][dfEnrlm[9]['ETHNIC']==0]['ENR_TOTAL'].sum()
Out[49]:
58350
In [50]:
labels = ['Not reported','American Indian or Alaska Native','Asian','Pacific Islander','Filipino', 'Hispanic or Latino', 
          'African American', 'White', 'Two or More Races' ]
values = [dfEnrlm[9][dfEnrlm[9]['ETHNIC']==0]['ENR_TOTAL'].sum(),dfEnrlm[9][dfEnrlm[9]['ETHNIC']==1]['ENR_TOTAL'].sum(),
          dfEnrlm[9][dfEnrlm[9]['ETHNIC']==2]['ENR_TOTAL'].sum(), dfEnrlm[9][dfEnrlm[9]['ETHNIC']==3]['ENR_TOTAL'].sum(),
         dfEnrlm[9][dfEnrlm[9]['ETHNIC']==4]['ENR_TOTAL'].sum(), dfEnrlm[9][dfEnrlm[9]['ETHNIC']==5]['ENR_TOTAL'].sum(),
         dfEnrlm[9][dfEnrlm[9]['ETHNIC']==6]['ENR_TOTAL'].sum(),dfEnrlm[9][dfEnrlm[9]['ETHNIC']==7]['ENR_TOTAL'].sum(),
         dfEnrlm[9][dfEnrlm[9]['ETHNIC']==9]['ENR_TOTAL'].sum()]

trace = go.Pie(labels=labels, values=values)

py.iplot([trace], filename='basic_pie_chart')
Out[50]:
In [51]:
labels = ['Not reported','American Indian or Alaska Native','Asian','Pacific Islander','Filipino', 'Hispanic or Latino', 
          'African American', 'White', 'Two or More Races' ]
values = [dfEnrlm[8][dfEnrlm[8]['ETHNIC']==0]['ENR_TOTAL'].sum(),dfEnrlm[8][dfEnrlm[8]['ETHNIC']==1]['ENR_TOTAL'].sum(),
          dfEnrlm[8][dfEnrlm[8]['ETHNIC']==2]['ENR_TOTAL'].sum(), dfEnrlm[8][dfEnrlm[8]['ETHNIC']==3]['ENR_TOTAL'].sum(),
         dfEnrlm[8][dfEnrlm[8]['ETHNIC']==4]['ENR_TOTAL'].sum(), dfEnrlm[8][dfEnrlm[8]['ETHNIC']==5]['ENR_TOTAL'].sum(),
         dfEnrlm[8][dfEnrlm[8]['ETHNIC']==6]['ENR_TOTAL'].sum(),dfEnrlm[8][dfEnrlm[8]['ETHNIC']==7]['ENR_TOTAL'].sum(),
         dfEnrlm[8][dfEnrlm[8]['ETHNIC']==9]['ENR_TOTAL'].sum()]

trace = go.Pie(labels=labels, values=values)

py.iplot([trace], filename='basic_pie_chart')
Out[51]:
In [52]:
labels = ['Not reported','American Indian or Alaska Native','Asian','Pacific Islander','Filipino', 'Hispanic or Latino', 
          'African American', 'White', 'Two or More Races' ]
values = [dfEnrlm[7][dfEnrlm[7]['ETHNIC']==0]['ENR_TOTAL'].sum(),dfEnrlm[7][dfEnrlm[7]['ETHNIC']==1]['ENR_TOTAL'].sum(),
          dfEnrlm[7][dfEnrlm[7]['ETHNIC']==2]['ENR_TOTAL'].sum(), dfEnrlm[7][dfEnrlm[7]['ETHNIC']==3]['ENR_TOTAL'].sum(),
         dfEnrlm[7][dfEnrlm[7]['ETHNIC']==4]['ENR_TOTAL'].sum(), dfEnrlm[7][dfEnrlm[7]['ETHNIC']==5]['ENR_TOTAL'].sum(),
         dfEnrlm[7][dfEnrlm[7]['ETHNIC']==6]['ENR_TOTAL'].sum(),dfEnrlm[7][dfEnrlm[7]['ETHNIC']==7]['ENR_TOTAL'].sum(),
         dfEnrlm[7][dfEnrlm[7]['ETHNIC']==9]['ENR_TOTAL'].sum()]

trace = go.Pie(labels=labels, values=values)

py.iplot([trace], filename='basic_pie_chart')
Out[52]:
In [53]:
labels = ['Not reported','American Indian or Alaska Native','Asian','Pacific Islander','Filipino', 'Hispanic or Latino', 
          'African American', 'White', 'Two or More Races' ]
values = [dfEnrlm[0][dfEnrlm[0]['ETHNIC']==0]['ENR_TOTAL'].sum(),dfEnrlm[0][dfEnrlm[0]['ETHNIC']==1]['ENR_TOTAL'].sum(),
          dfEnrlm[0][dfEnrlm[0]['ETHNIC']==2]['ENR_TOTAL'].sum(), dfEnrlm[0][dfEnrlm[0]['ETHNIC']==3]['ENR_TOTAL'].sum(),
         dfEnrlm[0][dfEnrlm[0]['ETHNIC']==4]['ENR_TOTAL'].sum(), dfEnrlm[0][dfEnrlm[0]['ETHNIC']==5]['ENR_TOTAL'].sum(),
         dfEnrlm[0][dfEnrlm[0]['ETHNIC']==6]['ENR_TOTAL'].sum(),dfEnrlm[0][dfEnrlm[0]['ETHNIC']==7]['ENR_TOTAL'].sum(),
         dfEnrlm[0][dfEnrlm[0]['ETHNIC']==9]['ENR_TOTAL'].sum()]

trace = go.Pie(labels=labels, values=values)

py.iplot([trace], filename='basic_pie_chart')
Out[53]:

Maping to found county highest ethinicty

In [54]:
dfEnrlm[9].head(2) #2017-2018
Out[54]:
CDS_CODE COUNTY DISTRICT SCHOOL ETHNIC GENDER KDGN GR_1 GR_2 GR_3 ... GR_9 GR_10 GR_11 GR_12 UNGR_SEC ENR_TOTAL ADULT EStotal MStotal HStotal
0 10621666006266 Fresno Fresno Unified Gibson Elementary 6 M 2 2 5 2 ... 0 0 0 0 0 25 0 21 9 0
1 10621666006266 Fresno Fresno Unified Gibson Elementary 5 F 18 11 14 12 ... 0 0 0 0 0 98 0 77 19 0

2 rows × 26 columns

In [55]:
County=pd.read_csv('Counties.csv')
len(County['county_name'])
Out[55]:
58

Function to found county highest population ethinicity

In [56]:
#Function to found county highest population ethinicity for 2018     
HighestEthnicCode18=[]
for i in range (0,len(County['county_name'])):
    X=dfEnrlm[9][dfEnrlm[9].COUNTY==County['county_name'][i]] .groupby(['ETHNIC']).sum()['ENR_TOTAL'].idxmax()
    HighestEthnicCode18.append(X)

    #Function to found county highest population ethinicity for 2010
HighestEthnicCode10=[]
for i in range (0,len(County['county_name'])):
    Z=dfEnrlm[9][dfEnrlm[9].COUNTY==County['county_name'][i]] .groupby(['ETHNIC']).sum()['ENR_TOTAL'].idxmax()
    HighestEthnicCode10.append(Z)
In [57]:
df=pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/minoritymajority.csv')
df=df[df.STNAME=='California'][['FIPS', 'CTYNAME']]
df['HighestEthnicCode10']=HighestEthnicCode10
df['HighestEthnicCode18']=HighestEthnicCode18
df.head()
Out[57]:
FIPS CTYNAME HighestEthnicCode10 HighestEthnicCode18
2605 6001 Alameda County 5 5
2606 6003 Alpine County 1 1
2607 6005 Amador County 7 7
2608 6007 Butte County 7 7
2609 6009 Calaveras County 7 7
In [58]:
# ETHNIC
#Racial/ethnic designation. This field is coded as follows:

     #Code 0 = Not reported
     #Code 1 = American Indian or Alaska Native, Not Hispanic
     #Code 2 = Asian, Not Hispanic
     #Code 3 = Pacific Islander, Not Hispanic
     #Code 4 = Filipino, Not Hispanic
     #Code 5 = Hispanic or Latino
     #Code 6 = African American, not Hispanic
     #Code 7 = White, not Hispanic
     #Code 9 = Two or More Races, Not HispanicPercentile = df['HighestEthnicCode'].tolist()
scope = ['California']
values10 = df['HighestEthnicCode10'].tolist()
values18 = df['HighestEthnicCode18'].tolist()
fips = df['FIPS'].tolist()

colorscale = ['blue','green','red','Cyan','Brown']

fig10 = ff.create_choropleth(
    fips=fips, values=values10, scope=scope,
    colorscale=colorscale, round_legend_values=True,
    simplify_county=0, simplify_state=0,
    county_outline={'color': 'rgb(15, 15, 55)', 'width': 0.5},
    state_outline={'width': 1},
    legend_title='ethnic group code#',
    title='California Public Schools and Districts largest ethnic group by county (2009-2010)'
)
fig18 = ff.create_choropleth(
    fips=fips, values=values18, scope=scope,
    colorscale=colorscale, round_legend_values=True,
    simplify_county=0, simplify_state=0,
    county_outline={'color': 'rgb(15, 15, 55)', 'width': 0.5},
    state_outline={'width': 1},
    legend_title='ethnic group code#',
    title='California Public Schools and Districts largest ethnic group by county (2017-2018)'
)
In [59]:
py.iplot(fig10)
The draw time for this plot will be slow for clients without much RAM.
Out[59]:
In [60]:
py.iplot(fig18)
The draw time for this plot will be slow for clients without much RAM.
Out[60]: