import pandas as pd


df = pd.read_csv(r"C:\Users\Asus\Downloads\dataset.csv")


df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 112634 entries, 0 to 112633
Data columns (total 17 columns):
 #   Column                                             Non-Null Count   Dtype  
---  ------                                             --------------   -----  
 0   VIN (1-10)                                         112634 non-null  object 
 1   County                                             112634 non-null  object 
 2   City                                               112634 non-null  object 
 3   State                                              112634 non-null  object 
 4   Postal Code                                        112634 non-null  int64  
 5   Model Year                                         112634 non-null  int64  
 6   Make                                               112634 non-null  object 
 7   Model                                              112614 non-null  object 
 8   Electric Vehicle Type                              112634 non-null  object 
 9   Clean Alternative Fuel Vehicle (CAFV) Eligibility  112634 non-null  object 
 10  Electric Range                                     112634 non-null  int64  
 11  Base MSRP                                          112634 non-null  int64  
 12  Legislative District                               112348 non-null  float64
 13  DOL Vehicle ID                                     112634 non-null  int64  
 14  Vehicle Location                                   112610 non-null  object 
 15  Electric Utility                                   112191 non-null  object 
 16  2020 Census Tract                                  112634 non-null  int64  
dtypes: float64(1), int64(6), object(10)
memory usage: 14.6+ MB


df.describe()


df.isnull().sum()

VIN (1-10)                                             0
County                                                 0
City                                                   0
State                                                  0
Postal Code                                            0
Model Year                                             0
Make                                                   0
Model                                                 20
Electric Vehicle Type                                  0
Clean Alternative Fuel Vehicle (CAFV) Eligibility      0
Electric Range                                         0
Base MSRP                                              0
Legislative District                                 286
DOL Vehicle ID                                         0
Vehicle Location                                      24
Electric Utility                                     443
2020 Census Tract                                      0
dtype: int64


df.shape

(112634, 17)


df.dropna(inplace=True)


df.shape

(112152, 17)


import plotly.express as px


fig_ev_type = px.bar(df['Electric Vehicle Type'].value_counts(), title='Count of Electric Vehicle Types')
fig_ev_type.show()


fig_cafv_eligibility = px.bar(df['Clean Alternative Fuel Vehicle (CAFV) Eligibility'].value_counts(), 
                              title='Count of CAFV Eligibility')
fig_cafv_eligibility.show()


fig_range = px.histogram(df, x='Electric Range', nbins=30, title='Electric Range Distribution')
fig_range.show()


fig_msrp = px.histogram(df, x='Base MSRP', nbins=50, title='Base MSRP Distribution', range_x = [0, 210000])
fig_msrp.show()


company_counts = df['Make'].value_counts().reset_index()
company_counts.columns = ['Company', 'EV Count']
company_counts = company_counts.sort_values(by='EV Count', ascending=False)

fig_bar = px.bar(company_counts, x='Company', y='EV Count', title='Number of Electric Vehicles by Company')
fig_bar.show()


county_counts = df['County'].value_counts().reset_index()
county_counts.columns = ['County', 'EV Count']

county_counts = county_counts.sort_values(by='EV Count', ascending=False)


fig_bar = px.bar(county_counts, x='County', y='EV Count', title='Number of Electric Vehicles by County')
fig_bar.show()


df_counts = df['Model Year'].value_counts().reset_index()
df_counts.columns = ['Model Year', 'Count']
df_counts.sort_values(by='Model Year', inplace=True)

fig = px.bar(df_counts, x='Model Year', y='Count', title='Electric Vehicles Count by Model Year', labels={'Model Year': 'Model Year', 'Count': 'Count'})
fig.show()


ev_type_counts = df['Electric Vehicle Type'].value_counts().reset_index()
ev_type_counts.columns = ['Electric Vehicle Type', 'EV Count']
ev_type_counts = ev_type_counts.sort_values(by='EV Count', ascending=False)

fig_bar = px.bar(ev_type_counts, x='Electric Vehicle Type', y='EV Count', title='Number of Electric Vehicles by Electric Vehicle Type')
fig_bar.show()


electric_utility_counts = df['Electric Utility'].value_counts().reset_index()
electric_utility_counts.columns = ['Electric Utility', 'EV Count']

fig_bar = px.bar(electric_utility_counts, x='Electric Utility', y='EV Count', title='Electric Vehicles Count by Electric Utility')
fig_bar.show()


fig_scatter_ev_range_msrp = px.scatter(df, x='Electric Range', y='Base MSRP', title='Electric Range vs. Base MSRP')
fig_scatter_ev_range_msrp.show()


fig_scatter_ev_range_model_year = px.scatter(df, x='Model Year', y='Electric Range', title='Electric Range vs. Model Year')
fig_scatter_ev_range_model_year.show()


fig_box_ev_type_range = px.box(df, x='Electric Vehicle Type', y='Electric Range', title='Electric Vehicle Type vs. Electric Range')
fig_box_ev_type_range.show()


fig_scatter_model_year_ev_count = px.scatter(df['Model Year'].value_counts(), title='Number of Electric Vehicles by Model Year')
fig_scatter_model_year_ev_count.show()


fig_box = px.box(df, x='Electric Vehicle Type', y='Base MSRP', title='Electric Vehicle Type vs. Base MSRP')
fig_box.show()


fig_scatter_model_year = px.scatter(df, x='Model Year', y='Base MSRP', title='Model Year vs. Base MSRP')
fig_scatter_model_year.show()


fig_box_ev_type = px.violin(df, x='Electric Vehicle Type', y='Electric Range', title='Electric Vehicle Type vs. Electric Range')
fig_box_ev_type.show()


fig_box = px.box(df, x='Clean Alternative Fuel Vehicle (CAFV) Eligibility', y='Electric Range', 
                 title='Clean Alternative Fuel Vehicle (CAFV) Eligibility vs. Electric Range',
                 labels={'Clean Alternative Fuel Vehicle (CAFV) Eligibility': 'CAFV Eligibility', 
                         'Electric Range': 'Electric Range (miles)'})

fig_box.show()


mean_electric_range = df.groupby('Model Year')['Electric Range'].mean().reset_index()

fig_line = px.line(mean_electric_range, x='Model Year', y='Electric Range', title='Model Year vs. Mean Electric Range',
                   labels={'Model Year': 'Model Year', 'Electric Range': 'Mean Electric Range (miles)'})

fig_line.show()


df_counts = df.groupby(['Model Year', 'Electric Vehicle Type']).size().reset_index(name='Count')

fig_bar = px.bar(df_counts, x='Model Year', y='Count', color='Electric Vehicle Type', 
                 title='Count of Different Electric Vehicle Types by Model Year')

fig_bar.show()


fig_splof = px.scatter_matrix(df, dimensions=['Electric Range', 'Base MSRP', 'Model Year'],
                              title='Scatter Plot Matrix (SPLOM)')
fig_splof.show()


fig_sunburst = px.sunburst(df, path=['State', 'County', 'City'],
                           title='Sunburst Chart of Electric Vehicles by State, County, and City')
fig_sunburst.show()


fig_3d_scatter = px.scatter_3d(df, x='Electric Range', y='Base MSRP', z='Model Year', color='Electric Vehicle Type',
                               title='3D Scatter Plot of Electric Range, Base MSRP, and Model Year')
fig_3d_scatter.show()


df['EV_Type_Num'] = df['Electric Vehicle Type'].map({'Plug-in Hybrid Electric Vehicle (PHEV)': 1,
                                                     'Battery Electric Vehicle (BEV)': 2,
                                                     'Hybrid Electric Vehicle (HEV)': 3})

fig_parallel_coordinates = px.parallel_coordinates(df, color='EV_Type_Num',
                                                   dimensions=['Electric Range', 'Base MSRP', 'Model Year'],
                                                   title='Parallel Coordinates Plot')


df['Longitude'] = df['Vehicle Location'].apply(lambda loc: float(loc.split()[1][1:]))
df['Latitude'] = df['Vehicle Location'].apply(lambda loc: float(loc.split()[2][:-1]))

location_counts = df.groupby(['Latitude', 'Longitude', 'Postal Code', 'County', "State"]).size().reset_index(name='EV Count')


fig_scatter_map = px.scatter_mapbox(location_counts,
                                    lat='Latitude',
                                    lon='Longitude',
                                    color='EV Count',
                                    size='EV Count',
                                    mapbox_style='carto-positron',
                                    zoom=3,
                                    center={'lat': 37.0902, 'lon': -95.7129},
                                    title='Scatter Map of Electric Vehicle Locations')

fig_scatter_map.show()


import bar_chart_race as bcr
import warnings


warnings.filterwarnings("ignore", category=UserWarning)

df['Model Year'] = df['Model Year'].astype(str)

grouped_data = df.groupby(['Model Year', 'Make']).size().reset_index(name='Count')
pivoted_data = grouped_data.pivot(index='Model Year', columns='Make', values='Count')

bcr.bar_chart_race(df=pivoted_data, filename='EV_make_racing_bar_plot.mp4',
                   orientation='h', sort='desc', n_bars=10,
                   title='EV Make Count Over the Years', filter_column_colors=True,  period_length=1000)

display(bcr.bar_chart_race(df=pivoted_data, orientation='h',
                           sort='desc', n_bars=10, title='EV Make Count Over the Years',
                           filter_column_colors=True, period_length=1000))

	Postal Code	Model Year	Electric Range	Base MSRP	Legislative District	DOL Vehicle ID	2020 Census Tract
count	112634.000000	112634.000000	112634.000000	112634.000000	112348.000000	1.126340e+05	1.126340e+05
mean	98156.226850	2019.003365	87.812987	1793.439681	29.805604	1.994567e+08	5.296650e+10
std	2648.733064	2.892364	102.334216	10783.753486	14.700545	9.398427e+07	1.699104e+09
min	1730.000000	1997.000000	0.000000	0.000000	1.000000	4.777000e+03	1.101001e+09
25%	98052.000000	2017.000000	0.000000	0.000000	18.000000	1.484142e+08	5.303301e+10
50%	98119.000000	2020.000000	32.000000	0.000000	34.000000	1.923896e+08	5.303303e+10
75%	98370.000000	2022.000000	208.000000	0.000000	43.000000	2.191899e+08	5.305307e+10
max	99701.000000	2023.000000	337.000000	845000.000000	49.000000	4.792548e+08	5.603300e+10

Reading the Dataset¶

Let's get some data overview¶

Removing the Null Values¶

To install Plotly use: !pip install plotly¶

Data Visulaization¶

Univariate Analysis¶

Bivariate Analysis¶

Multivariate Analysis¶

Task - 2¶

Task - 3¶

To install bar chart race use: !pip install bar-chart-race¶