Team Updates

#!/usr/bin/env python
# coding: utf-8
#author: Shivam
# In[1]:
#load libraries
importpandasaspd
importnumpyasnp
importmatplotlib.pyplotasplt
get_ipython().run_line_magic('matplotlib', 'inline')
importseabornassns
# In[2]:
#load datasets
unemp_df=pd.read_csv("Bureau_of_Labor_Statistics_Monthly_Unemployment__current_14_months_.csv")
cvi_df=pd.read_csv("Chmura_CVI_Counties_April_2020.csv")
glob_mob=pd.read_csv("Global_Mobility_Report.csv")
# In[3]:
#check umemployment data
unemp_df.head()
# In[4]:
pd.options.display.max_rows=80
#check for the missing data
unemp_df.isna().sum()
# In[5]:
#drop the columns
unemp_df.dropna(axis=1,inplace=True)
# In[6]:
#check for the duplicates
unemp_df.duplicated().sum()
# In[7]:
unemp_df.info()
# In[8]:
unemp_df_1=unemp_df.iloc[:,[3,4,5,6,7,8,9,21,34,47,60]]
# In[ ]:
# In[9]:
#check cvi data
cvi_df.head()
# In[10]:
#change the col names of cvi data
cvi_colnames= ['county_cvi','state_cvi','vulnerability_index','rank_cvi','objected_cvi','excel_id_cvi','fips_cvi']
cvi_df.columns=cvi_colnames
# In[11]:
#test the column names
cvi_df.head(1)
# In[12]:
#check for missing data
cvi_df.isna().sum()
# In[13]:
#check for duplicates
cvi_df.duplicated().sum()
# In[14]:
#check info
cvi_df.info()
# In[ ]:
## Combine both the data
# In[15]:
#merge on county ans state
df=pd.merge(unemp_df_1,cvi_df,left_on=['NAME','State'],right_on=['county_cvi','state_cvi'])
# In[16]:
df.head(1)
# In[17]:
df.info()
# ALand: Area of Land
# AWater: Area of Water
# NAME: name of county
# State: name of state
# fips: FIPS code
# LAbourForce_01Month: Count of people within the US Labor Force - most current month
# Employed_01Month : Count of Employed Population - most current month
# Unemployement_01Month : Count of Unemployed Population - most current month
# PctUnemployement_01Month : Percent Unemployed - most current month
# CurrentMonth : Month and year - most current month offered by BLS
# county_cvi: county name
# state_name: state name
# vulnerability_index :
# rank_cvi : rank of vulnerability from covid
# objected_cvi: index(we can drop this)
# excel_ID: ed (we can drop this)
# FIPS: FIPS code
# In[18]:
#Check if the merge is successful. Result should be 0
df.query('county_cvi != NAME ')
# In[19]:
df.drop(columns=['fips','LaborForce_02Month','county_cvi','state_cvi','objected_cvi','excel_id_cvi'],axis=1,inplace=True)
# In[20]:
#save the merged file
df.to_csv("cvi_unemployment_2020.csv",index=False)
# In[21]:
df.info()
# In[22]:
#find the county with maximum land area
df[df.ALAND==max(df.ALAND)]
# In[23]:
#find the county with maximum land area
df[df.AWATER==max(df.AWATER)]
# In[24]:
#counties with highest labour force
index_top10=list(df.LaborForce_01Month.sort_values(ascending=False).index[:5])
df.iloc[index_top10]
# In[25]:
#counties with highest unemployment rate
index_top10_u=list(df.PctUnemployed_01Month.sort_values(ascending=False).index[:5])
df.iloc[index_top10_u]
# In[ ]:
# In[ ]:
# In[26]:
#counties with highest vulnerability index
index_top10_v=list(df.vulnerability_index.sort_values(ascending=False).index[:5])
df.iloc[index_top10_v]
# #### An average Vulnerability Index score is 100, representing the average job loss expected in the United States. Higher scores indicate the degree to which job losses may be greater — an index score of 200, for example, means the rate of job loss can be twice as large as the national average. Conversely, an index score of 50 would mean a possible job loss of half the national average. Regions heavily dependent on tourism with relatively high concentrations of leisure and hospitality jobs, for example, are likely to have high index scores.
#
# #### The Vulnerability Index only measures the impact potential related to the mix of industry employment. The index does not take into account variation due to a region’s rate of virus infection, nor does it factor in local government's policies in reaction to the virus.
# In[ ]:
# Denali Borough county in Alaska seems to have higest unemployment rate in counties with highest CVI
# In[ ]:
# In[31]:
glob_mob.tail()
# In[34]:
us_mob=glob_mob.query('country_region_code == "US"')
us_mob.head()
chauhan.shiShivam Chauhan