import pandas as pd
df = pd.read_csv(r"C:\Users\Asus\Downloads\dataset.csv")
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 112634 entries, 0 to 112633 Data columns (total 17 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 VIN (1-10) 112634 non-null object 1 County 112634 non-null object 2 City 112634 non-null object 3 State 112634 non-null object 4 Postal Code 112634 non-null int64 5 Model Year 112634 non-null int64 6 Make 112634 non-null object 7 Model 112614 non-null object 8 Electric Vehicle Type 112634 non-null object 9 Clean Alternative Fuel Vehicle (CAFV) Eligibility 112634 non-null object 10 Electric Range 112634 non-null int64 11 Base MSRP 112634 non-null int64 12 Legislative District 112348 non-null float64 13 DOL Vehicle ID 112634 non-null int64 14 Vehicle Location 112610 non-null object 15 Electric Utility 112191 non-null object 16 2020 Census Tract 112634 non-null int64 dtypes: float64(1), int64(6), object(10) memory usage: 14.6+ MB
df.describe()
Postal Code | Model Year | Electric Range | Base MSRP | Legislative District | DOL Vehicle ID | 2020 Census Tract | |
---|---|---|---|---|---|---|---|
count | 112634.000000 | 112634.000000 | 112634.000000 | 112634.000000 | 112348.000000 | 1.126340e+05 | 1.126340e+05 |
mean | 98156.226850 | 2019.003365 | 87.812987 | 1793.439681 | 29.805604 | 1.994567e+08 | 5.296650e+10 |
std | 2648.733064 | 2.892364 | 102.334216 | 10783.753486 | 14.700545 | 9.398427e+07 | 1.699104e+09 |
min | 1730.000000 | 1997.000000 | 0.000000 | 0.000000 | 1.000000 | 4.777000e+03 | 1.101001e+09 |
25% | 98052.000000 | 2017.000000 | 0.000000 | 0.000000 | 18.000000 | 1.484142e+08 | 5.303301e+10 |
50% | 98119.000000 | 2020.000000 | 32.000000 | 0.000000 | 34.000000 | 1.923896e+08 | 5.303303e+10 |
75% | 98370.000000 | 2022.000000 | 208.000000 | 0.000000 | 43.000000 | 2.191899e+08 | 5.305307e+10 |
max | 99701.000000 | 2023.000000 | 337.000000 | 845000.000000 | 49.000000 | 4.792548e+08 | 5.603300e+10 |
df.isnull().sum()
VIN (1-10) 0 County 0 City 0 State 0 Postal Code 0 Model Year 0 Make 0 Model 20 Electric Vehicle Type 0 Clean Alternative Fuel Vehicle (CAFV) Eligibility 0 Electric Range 0 Base MSRP 0 Legislative District 286 DOL Vehicle ID 0 Vehicle Location 24 Electric Utility 443 2020 Census Tract 0 dtype: int64
df.shape
(112634, 17)
df.dropna(inplace=True)
df.shape
(112152, 17)
import plotly.express as px
fig_ev_type = px.bar(df['Electric Vehicle Type'].value_counts(), title='Count of Electric Vehicle Types')
fig_ev_type.show()
fig_cafv_eligibility = px.bar(df['Clean Alternative Fuel Vehicle (CAFV) Eligibility'].value_counts(),
title='Count of CAFV Eligibility')
fig_cafv_eligibility.show()
fig_range = px.histogram(df, x='Electric Range', nbins=30, title='Electric Range Distribution')
fig_range.show()
fig_msrp = px.histogram(df, x='Base MSRP', nbins=50, title='Base MSRP Distribution', range_x = [0, 210000])
fig_msrp.show()
company_counts = df['Make'].value_counts().reset_index()
company_counts.columns = ['Company', 'EV Count']
company_counts = company_counts.sort_values(by='EV Count', ascending=False)
fig_bar = px.bar(company_counts, x='Company', y='EV Count', title='Number of Electric Vehicles by Company')
fig_bar.show()
county_counts = df['County'].value_counts().reset_index()
county_counts.columns = ['County', 'EV Count']
county_counts = county_counts.sort_values(by='EV Count', ascending=False)
fig_bar = px.bar(county_counts, x='County', y='EV Count', title='Number of Electric Vehicles by County')
fig_bar.show()
df_counts = df['Model Year'].value_counts().reset_index()
df_counts.columns = ['Model Year', 'Count']
df_counts.sort_values(by='Model Year', inplace=True)
fig = px.bar(df_counts, x='Model Year', y='Count', title='Electric Vehicles Count by Model Year', labels={'Model Year': 'Model Year', 'Count': 'Count'})
fig.show()
ev_type_counts = df['Electric Vehicle Type'].value_counts().reset_index()
ev_type_counts.columns = ['Electric Vehicle Type', 'EV Count']
ev_type_counts = ev_type_counts.sort_values(by='EV Count', ascending=False)
fig_bar = px.bar(ev_type_counts, x='Electric Vehicle Type', y='EV Count', title='Number of Electric Vehicles by Electric Vehicle Type')
fig_bar.show()
electric_utility_counts = df['Electric Utility'].value_counts().reset_index()
electric_utility_counts.columns = ['Electric Utility', 'EV Count']
fig_bar = px.bar(electric_utility_counts, x='Electric Utility', y='EV Count', title='Electric Vehicles Count by Electric Utility')
fig_bar.show()
fig_scatter_ev_range_msrp = px.scatter(df, x='Electric Range', y='Base MSRP', title='Electric Range vs. Base MSRP')
fig_scatter_ev_range_msrp.show()
fig_scatter_ev_range_model_year = px.scatter(df, x='Model Year', y='Electric Range', title='Electric Range vs. Model Year')
fig_scatter_ev_range_model_year.show()
fig_box_ev_type_range = px.box(df, x='Electric Vehicle Type', y='Electric Range', title='Electric Vehicle Type vs. Electric Range')
fig_box_ev_type_range.show()
fig_scatter_model_year_ev_count = px.scatter(df['Model Year'].value_counts(), title='Number of Electric Vehicles by Model Year')
fig_scatter_model_year_ev_count.show()
fig_box = px.box(df, x='Electric Vehicle Type', y='Base MSRP', title='Electric Vehicle Type vs. Base MSRP')
fig_box.show()
fig_scatter_model_year = px.scatter(df, x='Model Year', y='Base MSRP', title='Model Year vs. Base MSRP')
fig_scatter_model_year.show()
fig_box_ev_type = px.violin(df, x='Electric Vehicle Type', y='Electric Range', title='Electric Vehicle Type vs. Electric Range')
fig_box_ev_type.show()
fig_box = px.box(df, x='Clean Alternative Fuel Vehicle (CAFV) Eligibility', y='Electric Range',
title='Clean Alternative Fuel Vehicle (CAFV) Eligibility vs. Electric Range',
labels={'Clean Alternative Fuel Vehicle (CAFV) Eligibility': 'CAFV Eligibility',
'Electric Range': 'Electric Range (miles)'})
fig_box.show()
mean_electric_range = df.groupby('Model Year')['Electric Range'].mean().reset_index()
fig_line = px.line(mean_electric_range, x='Model Year', y='Electric Range', title='Model Year vs. Mean Electric Range',
labels={'Model Year': 'Model Year', 'Electric Range': 'Mean Electric Range (miles)'})
fig_line.show()
df_counts = df.groupby(['Model Year', 'Electric Vehicle Type']).size().reset_index(name='Count')
fig_bar = px.bar(df_counts, x='Model Year', y='Count', color='Electric Vehicle Type',
title='Count of Different Electric Vehicle Types by Model Year')
fig_bar.show()
fig_splof = px.scatter_matrix(df, dimensions=['Electric Range', 'Base MSRP', 'Model Year'],
title='Scatter Plot Matrix (SPLOM)')
fig_splof.show()
fig_sunburst = px.sunburst(df, path=['State', 'County', 'City'],
title='Sunburst Chart of Electric Vehicles by State, County, and City')
fig_sunburst.show()
fig_3d_scatter = px.scatter_3d(df, x='Electric Range', y='Base MSRP', z='Model Year', color='Electric Vehicle Type',
title='3D Scatter Plot of Electric Range, Base MSRP, and Model Year')
fig_3d_scatter.show()
df['EV_Type_Num'] = df['Electric Vehicle Type'].map({'Plug-in Hybrid Electric Vehicle (PHEV)': 1,
'Battery Electric Vehicle (BEV)': 2,
'Hybrid Electric Vehicle (HEV)': 3})
fig_parallel_coordinates = px.parallel_coordinates(df, color='EV_Type_Num',
dimensions=['Electric Range', 'Base MSRP', 'Model Year'],
title='Parallel Coordinates Plot')
df['Longitude'] = df['Vehicle Location'].apply(lambda loc: float(loc.split()[1][1:]))
df['Latitude'] = df['Vehicle Location'].apply(lambda loc: float(loc.split()[2][:-1]))
location_counts = df.groupby(['Latitude', 'Longitude', 'Postal Code', 'County', "State"]).size().reset_index(name='EV Count')
fig_scatter_map = px.scatter_mapbox(location_counts,
lat='Latitude',
lon='Longitude',
color='EV Count',
size='EV Count',
mapbox_style='carto-positron',
zoom=3,
center={'lat': 37.0902, 'lon': -95.7129},
title='Scatter Map of Electric Vehicle Locations')
fig_scatter_map.show()
import bar_chart_race as bcr
import warnings
warnings.filterwarnings("ignore", category=UserWarning)
df['Model Year'] = df['Model Year'].astype(str)
grouped_data = df.groupby(['Model Year', 'Make']).size().reset_index(name='Count')
pivoted_data = grouped_data.pivot(index='Model Year', columns='Make', values='Count')
bcr.bar_chart_race(df=pivoted_data, filename='EV_make_racing_bar_plot.mp4',
orientation='h', sort='desc', n_bars=10,
title='EV Make Count Over the Years', filter_column_colors=True, period_length=1000)
display(bcr.bar_chart_race(df=pivoted_data, orientation='h',
sort='desc', n_bars=10, title='EV Make Count Over the Years',
filter_column_colors=True, period_length=1000))