Formula One Data Exploration - Part I
Overview Drivers and Constructors
- Load data from csv
- Sample and describe data
- Joining data from different sources
- Plot Map with Circuits
- Drivers with more Wins
- Constructors ChampionShip by Season
- Evolution of Constructors
using DataFrames
using CSV
using Plots
using Statistics
using StatsPlots
using Shapefile
# using ZipFile
# for f in r.files
# println("Filename: $(f.name)")
# open(f.name, "w") do io
# write(io, read(f))
# end
# end
# close(r)
shp_countries = Shapefile.shapes(Shapefile.Table("./ne_110m_admin_0_countries.shp"));
df_drivers = CSV.read("drivers.csv", DataFrame);
df_circuits = CSV.read("circuits.csv", DataFrame);
df_results = CSV.read("results.csv", DataFrame);
df_races = CSV.read("races.csv", DataFrame);
df_constructors = CSV.read("constructors.csv", DataFrame);
first(df_drivers, 5) #show first 5
df_drivers |> describe
first(df_circuits, 5) #show first 5
df_circuits |> describe
df_circuits[:,4:end]
first(df_results, 5) #show first 5
df_results |> describe
first(df_races, 5) #show first 5
df_races |> describe
first(df_constructors, 5) #show first 5
df_constructors |> describe
df_resul = innerjoin(df_results, df_drivers, on = :driverId, makeunique=true);
df_resulcons = innerjoin(df_resul, df_races, on = :raceId, makeunique=true);
df_resulrac = innerjoin(df_resulcons, df_constructors, on = :constructorId, makeunique=true);
first(df_resul, 5)
for nam in names(df_resul)
println(nam)
end
lat = df_circuits[!,"lat"];
lng = df_circuits[!,"lng"];
gr()
plot(shp_countries, alpha=0.21)
scatter!(lng, lat, ms = 3, c = "red", type="heatmap")
xlabel!("latitude")
ylabel!("longitude")
title!("Formula One Circuits")
wins(pos) = pos == "1"
df_numWins = filter(:position => wins, df_resul)
first(df_numWins,5)
df_numWins |> names;
gdf = groupby(df_numWins, :driverId);
dfw = combine(gdf, nrow => :WinsCount);
df_drivers_winners = innerjoin(dfw, df_drivers, on = :driverId, makeunique=true);
sort!(df_drivers_winners, :WinsCount, rev=true);
bar( df_drivers_winners[:,:driverRef], df_drivers_winners[:,:WinsCount], rotation=45, xticks = :all,size=(1200,400) )
Very hard to see with this long tail, so we will limit to top 25 drivers
df_drivers_winners25 = first(df_drivers_winners,25); #show first 25
plotlyjs()
bar( df_drivers_winners25[:,:driverRef], df_drivers_winners25[:,:WinsCount],
rotation=45, xticks = :all,size=(800,400),
title="Top 25 Formula One Drivers with the Most Wins")
df_numWins = filter(:year => year -> year == 2013, df_resulrac);
gdf = groupby(df_numWins, :constructorRef);
dfc = combine(gdf, :points => sum)
sort!(dfc, :points_sum, rev=true);
plotlyjs()
bar( dfc[:,:constructorRef], dfc[:,:points_sum], rotation=45, xticks = :all,size=(800,400),
title="Constructor Championship Ranking")
construcRanking(y) = filter(:year => year -> year == y, df_resulrac);
construcRanking(2020)
One thing I would like to see is the evolution of constructors over the years:
gdfy = groupby(df_resulrac, [:constructorRef, :year]);
dfcy = combine(gdfy, :points => sum)
filter(:year => year -> year == 2008, dfcy)
f(ref, yr) = ref == "mclaren" && yr == 2008;
filter([:constructorRef, :year] => f, dfcy)
filter([:constructorRef, :year] => f, dfcy)
getConstructPointsYr(constrRef, yr) = filter([:constructorRef, :year] => (x,y) -> x == constrRef && y >= yr, dfcy)
mc = getConstructPointsYr("mclaren", 2008);
ferr = getConstructPointsYr("ferrari", 2008);
williams = getConstructPointsYr("williams", 2008);
mercedes = getConstructPointsYr("mercedes", 2008);
redbull = getConstructPointsYr("red_bull", 2008);
plot(ferr[:, :year], ferr[:, :points_sum], marker=:o, color="red",
title="Constructors Points since 2008", grid=true, size=(800,400),
rotation=45, xticks = 2008:2022, legend = true, label="Ferrari")
plot!(mc[:, :year], mc[:, :points_sum], marker=:^, color="blue", label = "McLaren")
plot!(williams[:, :year], williams[:, :points_sum], marker=:diamond, color="green", label = "Williams")
plot!(mercedes[:, :year], mercedes[:, :points_sum], marker=:square, color="orange", label = "Mercedes")
plot!(redbull[:, :year], redbull[:, :points_sum], marker=:cross, color="purple", label = "Red Bull")
That is it for now. Next post we will analyse more driver and lap data.