In [1]:
import pandas as pd

Reading the Dataset¶

In [2]:
df = pd.read_csv(r"C:\Users\Asus\Downloads\dataset.csv")

Let's get some data overview¶

In [3]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 112634 entries, 0 to 112633
Data columns (total 17 columns):
 #   Column                                             Non-Null Count   Dtype  
---  ------                                             --------------   -----  
 0   VIN (1-10)                                         112634 non-null  object 
 1   County                                             112634 non-null  object 
 2   City                                               112634 non-null  object 
 3   State                                              112634 non-null  object 
 4   Postal Code                                        112634 non-null  int64  
 5   Model Year                                         112634 non-null  int64  
 6   Make                                               112634 non-null  object 
 7   Model                                              112614 non-null  object 
 8   Electric Vehicle Type                              112634 non-null  object 
 9   Clean Alternative Fuel Vehicle (CAFV) Eligibility  112634 non-null  object 
 10  Electric Range                                     112634 non-null  int64  
 11  Base MSRP                                          112634 non-null  int64  
 12  Legislative District                               112348 non-null  float64
 13  DOL Vehicle ID                                     112634 non-null  int64  
 14  Vehicle Location                                   112610 non-null  object 
 15  Electric Utility                                   112191 non-null  object 
 16  2020 Census Tract                                  112634 non-null  int64  
dtypes: float64(1), int64(6), object(10)
memory usage: 14.6+ MB
In [4]:
df.describe()
Out[4]:
Postal Code Model Year Electric Range Base MSRP Legislative District DOL Vehicle ID 2020 Census Tract
count 112634.000000 112634.000000 112634.000000 112634.000000 112348.000000 1.126340e+05 1.126340e+05
mean 98156.226850 2019.003365 87.812987 1793.439681 29.805604 1.994567e+08 5.296650e+10
std 2648.733064 2.892364 102.334216 10783.753486 14.700545 9.398427e+07 1.699104e+09
min 1730.000000 1997.000000 0.000000 0.000000 1.000000 4.777000e+03 1.101001e+09
25% 98052.000000 2017.000000 0.000000 0.000000 18.000000 1.484142e+08 5.303301e+10
50% 98119.000000 2020.000000 32.000000 0.000000 34.000000 1.923896e+08 5.303303e+10
75% 98370.000000 2022.000000 208.000000 0.000000 43.000000 2.191899e+08 5.305307e+10
max 99701.000000 2023.000000 337.000000 845000.000000 49.000000 4.792548e+08 5.603300e+10
In [5]:
df.isnull().sum()
Out[5]:
VIN (1-10)                                             0
County                                                 0
City                                                   0
State                                                  0
Postal Code                                            0
Model Year                                             0
Make                                                   0
Model                                                 20
Electric Vehicle Type                                  0
Clean Alternative Fuel Vehicle (CAFV) Eligibility      0
Electric Range                                         0
Base MSRP                                              0
Legislative District                                 286
DOL Vehicle ID                                         0
Vehicle Location                                      24
Electric Utility                                     443
2020 Census Tract                                      0
dtype: int64
In [6]:
df.shape
Out[6]:
(112634, 17)

Removing the Null Values¶

In [7]:
df.dropna(inplace=True)
In [8]:
df.shape
Out[8]:
(112152, 17)

To install Plotly use: !pip install plotly¶

In [9]:
import plotly.express as px

Data Visulaization¶

Univariate Analysis¶

In [10]:
fig_ev_type = px.bar(df['Electric Vehicle Type'].value_counts(), title='Count of Electric Vehicle Types')
fig_ev_type.show()
In [11]:
fig_cafv_eligibility = px.bar(df['Clean Alternative Fuel Vehicle (CAFV) Eligibility'].value_counts(), 
                              title='Count of CAFV Eligibility')
fig_cafv_eligibility.show()
In [12]:
fig_range = px.histogram(df, x='Electric Range', nbins=30, title='Electric Range Distribution')
fig_range.show()
In [13]:
fig_msrp = px.histogram(df, x='Base MSRP', nbins=50, title='Base MSRP Distribution', range_x = [0, 210000])
fig_msrp.show()
In [14]:
company_counts = df['Make'].value_counts().reset_index()
company_counts.columns = ['Company', 'EV Count']
company_counts = company_counts.sort_values(by='EV Count', ascending=False)

fig_bar = px.bar(company_counts, x='Company', y='EV Count', title='Number of Electric Vehicles by Company')
fig_bar.show()
In [15]:
county_counts = df['County'].value_counts().reset_index()
county_counts.columns = ['County', 'EV Count']

county_counts = county_counts.sort_values(by='EV Count', ascending=False)


fig_bar = px.bar(county_counts, x='County', y='EV Count', title='Number of Electric Vehicles by County')
fig_bar.show()
In [16]:
df_counts = df['Model Year'].value_counts().reset_index()
df_counts.columns = ['Model Year', 'Count']
df_counts.sort_values(by='Model Year', inplace=True)

fig = px.bar(df_counts, x='Model Year', y='Count', title='Electric Vehicles Count by Model Year', labels={'Model Year': 'Model Year', 'Count': 'Count'})
fig.show()
In [17]:
ev_type_counts = df['Electric Vehicle Type'].value_counts().reset_index()
ev_type_counts.columns = ['Electric Vehicle Type', 'EV Count']
ev_type_counts = ev_type_counts.sort_values(by='EV Count', ascending=False)

fig_bar = px.bar(ev_type_counts, x='Electric Vehicle Type', y='EV Count', title='Number of Electric Vehicles by Electric Vehicle Type')
fig_bar.show()
In [18]:
electric_utility_counts = df['Electric Utility'].value_counts().reset_index()
electric_utility_counts.columns = ['Electric Utility', 'EV Count']

fig_bar = px.bar(electric_utility_counts, x='Electric Utility', y='EV Count', title='Electric Vehicles Count by Electric Utility')
fig_bar.show()

Bivariate Analysis¶

In [19]:
fig_scatter_ev_range_msrp = px.scatter(df, x='Electric Range', y='Base MSRP', title='Electric Range vs. Base MSRP')
fig_scatter_ev_range_msrp.show()
In [20]:
fig_scatter_ev_range_model_year = px.scatter(df, x='Model Year', y='Electric Range', title='Electric Range vs. Model Year')
fig_scatter_ev_range_model_year.show()
In [21]:
fig_box_ev_type_range = px.box(df, x='Electric Vehicle Type', y='Electric Range', title='Electric Vehicle Type vs. Electric Range')
fig_box_ev_type_range.show()
In [22]:
fig_scatter_model_year_ev_count = px.scatter(df['Model Year'].value_counts(), title='Number of Electric Vehicles by Model Year')
fig_scatter_model_year_ev_count.show()
In [23]:
fig_box = px.box(df, x='Electric Vehicle Type', y='Base MSRP', title='Electric Vehicle Type vs. Base MSRP')
fig_box.show()
In [24]:
fig_scatter_model_year = px.scatter(df, x='Model Year', y='Base MSRP', title='Model Year vs. Base MSRP')
fig_scatter_model_year.show()
In [25]:
fig_box_ev_type = px.violin(df, x='Electric Vehicle Type', y='Electric Range', title='Electric Vehicle Type vs. Electric Range')
fig_box_ev_type.show()
In [26]:
fig_box = px.box(df, x='Clean Alternative Fuel Vehicle (CAFV) Eligibility', y='Electric Range', 
                 title='Clean Alternative Fuel Vehicle (CAFV) Eligibility vs. Electric Range',
                 labels={'Clean Alternative Fuel Vehicle (CAFV) Eligibility': 'CAFV Eligibility', 
                         'Electric Range': 'Electric Range (miles)'})

fig_box.show()
In [27]:
mean_electric_range = df.groupby('Model Year')['Electric Range'].mean().reset_index()

fig_line = px.line(mean_electric_range, x='Model Year', y='Electric Range', title='Model Year vs. Mean Electric Range',
                   labels={'Model Year': 'Model Year', 'Electric Range': 'Mean Electric Range (miles)'})

fig_line.show()
In [28]:
df_counts = df.groupby(['Model Year', 'Electric Vehicle Type']).size().reset_index(name='Count')

fig_bar = px.bar(df_counts, x='Model Year', y='Count', color='Electric Vehicle Type', 
                 title='Count of Different Electric Vehicle Types by Model Year')

fig_bar.show()

Multivariate Analysis¶

In [29]:
fig_splof = px.scatter_matrix(df, dimensions=['Electric Range', 'Base MSRP', 'Model Year'],
                              title='Scatter Plot Matrix (SPLOM)')
fig_splof.show()
In [30]:
fig_sunburst = px.sunburst(df, path=['State', 'County', 'City'],
                           title='Sunburst Chart of Electric Vehicles by State, County, and City')
fig_sunburst.show()
In [31]:
fig_3d_scatter = px.scatter_3d(df, x='Electric Range', y='Base MSRP', z='Model Year', color='Electric Vehicle Type',
                               title='3D Scatter Plot of Electric Range, Base MSRP, and Model Year')
fig_3d_scatter.show()
In [32]:
df['EV_Type_Num'] = df['Electric Vehicle Type'].map({'Plug-in Hybrid Electric Vehicle (PHEV)': 1,
                                                     'Battery Electric Vehicle (BEV)': 2,
                                                     'Hybrid Electric Vehicle (HEV)': 3})

fig_parallel_coordinates = px.parallel_coordinates(df, color='EV_Type_Num',
                                                   dimensions=['Electric Range', 'Base MSRP', 'Model Year'],
                                                   title='Parallel Coordinates Plot')
In [ ]:
 

Task - 2¶

In [33]:
df['Longitude'] = df['Vehicle Location'].apply(lambda loc: float(loc.split()[1][1:]))
df['Latitude'] = df['Vehicle Location'].apply(lambda loc: float(loc.split()[2][:-1]))

location_counts = df.groupby(['Latitude', 'Longitude', 'Postal Code', 'County', "State"]).size().reset_index(name='EV Count')
In [34]:
fig_scatter_map = px.scatter_mapbox(location_counts,
                                    lat='Latitude',
                                    lon='Longitude',
                                    color='EV Count',
                                    size='EV Count',
                                    mapbox_style='carto-positron',
                                    zoom=3,
                                    center={'lat': 37.0902, 'lon': -95.7129},
                                    title='Scatter Map of Electric Vehicle Locations')

fig_scatter_map.show()

Task - 3¶

To install bar chart race use: !pip install bar-chart-race¶

In [35]:
import bar_chart_race as bcr
import warnings
In [36]:
warnings.filterwarnings("ignore", category=UserWarning)

df['Model Year'] = df['Model Year'].astype(str)

grouped_data = df.groupby(['Model Year', 'Make']).size().reset_index(name='Count')
pivoted_data = grouped_data.pivot(index='Model Year', columns='Make', values='Count')

bcr.bar_chart_race(df=pivoted_data, filename='EV_make_racing_bar_plot.mp4',
                   orientation='h', sort='desc', n_bars=10,
                   title='EV Make Count Over the Years', filter_column_colors=True,  period_length=1000)

display(bcr.bar_chart_race(df=pivoted_data, orientation='h',
                           sort='desc', n_bars=10, title='EV Make Count Over the Years',
                           filter_column_colors=True, period_length=1000))
Your browser does not support the video tag.
In [ ]: