Source code for our project: https://github.com/Chauhanshi/NASA-SpaceApp-Hackathon-Challenge_Integrated-assessment
| #!/usr/bin/env python | |
| # coding: utf-8 | |
| #author: Shivam | |
| # In[1]: | |
| #load libraries | |
| importpandasaspd | |
| importnumpyasnp | |
| importmatplotlib.pyplotasplt | |
| get_ipython().run_line_magic('matplotlib', 'inline') | |
| importseabornassns | |
| # In[2]: | |
| #load datasets | |
| unemp_df=pd.read_csv("Bureau_of_Labor_Statistics_Monthly_Unemployment__current_14_months_.csv") | |
| cvi_df=pd.read_csv("Chmura_CVI_Counties_April_2020.csv") | |
| glob_mob=pd.read_csv("Global_Mobility_Report.csv") | |
| # In[3]: | |
| #check umemployment data | |
| unemp_df.head() | |
| # In[4]: | |
| pd.options.display.max_rows=80 | |
| #check for the missing data | |
| unemp_df.isna().sum() | |
| # In[5]: | |
| #drop the columns | |
| unemp_df.dropna(axis=1,inplace=True) | |
| # In[6]: | |
| #check for the duplicates | |
| unemp_df.duplicated().sum() | |
| # In[7]: | |
| unemp_df.info() | |
| # In[8]: | |
| unemp_df_1=unemp_df.iloc[:,[3,4,5,6,7,8,9,21,34,47,60]] | |
| # In[ ]: | |
| # In[9]: | |
| #check cvi data | |
| cvi_df.head() | |
| # In[10]: | |
| #change the col names of cvi data | |
| cvi_colnames= ['county_cvi','state_cvi','vulnerability_index','rank_cvi','objected_cvi','excel_id_cvi','fips_cvi'] | |
| cvi_df.columns=cvi_colnames | |
| # In[11]: | |
| #test the column names | |
| cvi_df.head(1) | |
| # In[12]: | |
| #check for missing data | |
| cvi_df.isna().sum() | |
| # In[13]: | |
| #check for duplicates | |
| cvi_df.duplicated().sum() | |
| # In[14]: | |
| #check info | |
| cvi_df.info() | |
| # In[ ]: | |
| ## Combine both the data | |
| # In[15]: | |
| #merge on county ans state | |
| df=pd.merge(unemp_df_1,cvi_df,left_on=['NAME','State'],right_on=['county_cvi','state_cvi']) | |
| # In[16]: | |
| df.head(1) | |
| # In[17]: | |
| df.info() | |
| # ALand: Area of Land | |
| # AWater: Area of Water | |
| # NAME: name of county | |
| # State: name of state | |
| # fips: FIPS code | |
| # LAbourForce_01Month: Count of people within the US Labor Force - most current month | |
| # Employed_01Month : Count of Employed Population - most current month | |
| # Unemployement_01Month : Count of Unemployed Population - most current month | |
| # PctUnemployement_01Month : Percent Unemployed - most current month | |
| # CurrentMonth : Month and year - most current month offered by BLS | |
| # county_cvi: county name | |
| # state_name: state name | |
| # vulnerability_index : | |
| # rank_cvi : rank of vulnerability from covid | |
| # objected_cvi: index(we can drop this) | |
| # excel_ID: ed (we can drop this) | |
| # FIPS: FIPS code | |
| # In[18]: | |
| #Check if the merge is successful. Result should be 0 | |
| df.query('county_cvi != NAME ') | |
| # In[19]: | |
| df.drop(columns=['fips','LaborForce_02Month','county_cvi','state_cvi','objected_cvi','excel_id_cvi'],axis=1,inplace=True) | |
| # In[20]: | |
| #save the merged file | |
| df.to_csv("cvi_unemployment_2020.csv",index=False) | |
| # In[21]: | |
| df.info() | |
| # In[22]: | |
| #find the county with maximum land area | |
| df[df.ALAND==max(df.ALAND)] | |
| # In[23]: | |
| #find the county with maximum land area | |
| df[df.AWATER==max(df.AWATER)] | |
| # In[24]: | |
| #counties with highest labour force | |
| index_top10=list(df.LaborForce_01Month.sort_values(ascending=False).index[:5]) | |
| df.iloc[index_top10] | |
| # In[25]: | |
| #counties with highest unemployment rate | |
| index_top10_u=list(df.PctUnemployed_01Month.sort_values(ascending=False).index[:5]) | |
| df.iloc[index_top10_u] | |
| # In[ ]: | |
| # In[ ]: | |
| # In[26]: | |
| #counties with highest vulnerability index | |
| index_top10_v=list(df.vulnerability_index.sort_values(ascending=False).index[:5]) | |
| df.iloc[index_top10_v] | |
| # #### An average Vulnerability Index score is 100, representing the average job loss expected in the United States. Higher scores indicate the degree to which job losses may be greater — an index score of 200, for example, means the rate of job loss can be twice as large as the national average. Conversely, an index score of 50 would mean a possible job loss of half the national average. Regions heavily dependent on tourism with relatively high concentrations of leisure and hospitality jobs, for example, are likely to have high index scores. | |
| # | |
| # #### The Vulnerability Index only measures the impact potential related to the mix of industry employment. The index does not take into account variation due to a region’s rate of virus infection, nor does it factor in local government's policies in reaction to the virus. | |
| # In[ ]: | |
| # Denali Borough county in Alaska seems to have higest unemployment rate in counties with highest CVI | |
| # In[ ]: | |
| # In[31]: | |
| glob_mob.tail() | |
| # In[34]: | |
| us_mob=glob_mob.query('country_region_code == "US"') | |
| us_mob.head() |