MLS Expected Points

Author

Trevor S.

Motivation

Note that his project is a work in progress. I will update as I have the time. I am creating an RShiny app which will allow users to view whichever of this data they want (for any league with expected goal data on FBRef for that season).

Setup

Code
library(tidyr)
library(tidyverse)
library(gt)
library(rvest)
library(dplyr)
library(scales)
library(reactable)
library(janitor)

Code

Code
n <- 10000 # number of times to simulate each match
set.seed(2025) # for reproducibility

get_data <- function(year){
  url <- paste0("https://fbref.com/en/comps/22/", year, "/schedule/",
                year, "-Major-League-Soccer-Scores-and-Fixtures")
  html <- read_html(url)
  html_table <- html |> 
    html_element("table") |> 
    html_table() 
  
  head(html_table)
  
  data <- html_table |> 
    janitor::clean_names() |> 
    filter(date != "Date" & score != "") |>
    separate_wider_regex(score, patterns = c(home_score = "[0-9]+", "–", 
                                      away_score = "[0-9]+")) |>
      select("date", "home", "home_xg" = "x_g", "home_score", 
             "away_score", "away_xg" = "x_g_2", "away") |> 
      mutate("home_pts" = ifelse(home_score > away_score, 3, 
                               ifelse(home_score == away_score, 1, 0)), 
           "away_pts" = ifelse(away_score > home_score, 3, 
                               ifelse(away_score == home_score, 1, 0))) |>
    type_convert()
  data
}

xpts_table <- function(data){
  home_xpts <- rep(NA, times = nrow(data))
  away_xpts <- rep(NA, times = nrow(data))
  for(i in 1:nrow(data)){
    home_sim <- rpois(n, data$home_xg[i])
    away_sim <- rpois(n, data$away_xg[i])
    home_xpts[i] <- (3 * sum(home_sim > away_sim) + sum(home_sim == away_sim))/n
    away_xpts[i] <- (3 * sum(away_sim > home_sim) + sum(away_sim == home_sim))/n
  }
  
  # combine table of results with simulated results
  data <- cbind(data, home_xpts, away_xpts) 
  
  xpts_table <- data |>
    pivot_longer(cols = c("home", "away"), values_to = "team") |> 
    mutate("pts" = ifelse(name == "home", home_pts, away_pts),
           "scored" = ifelse(name == "home", home_score, away_score), 
           "against" = ifelse(name == "home", away_score, home_score), 
           "xpts" = ifelse(name == "home", home_xpts, away_xpts), 
           "xg_for" = ifelse(name == "home", home_xg, away_xg), 
           "xg_ag" = ifelse(name == "home", away_xg, home_xg),
           "pts_over_exp" = pts - xpts) |>
    group_by(team) |> 
    summarize(gp = n(), across(pts:xg_ag, \(x) round(mean(x), 3)), 
              pts_over_exp = round(sum(pts_over_exp), 3)) |> 
  arrange(desc(xpts))
  xpts_table
}

Expected Points Table

Code
# use reactable to allow for interactive sorting
mls_2025 <- get_data(2025)
mls_xpts_2025 <- xpts_table(mls_2025)
reactable(mls_xpts_2025, pagination = FALSE)

Whitecaps Matches

Code
set.seed(2025)
n <- 10000

team_stats <- function(team, data){
  team_matches <- data |> 
    filter(home == team | away == team) |> 
    mutate(opponent = ifelse(home == team, str_c("vs ", away), 
                             str_c("@ ", home)), 
         team_gf = ifelse(home == team, home_score, away_score),
         team_ga = ifelse(home == team, away_score, home_score),
         team_xgf = ifelse(home == team, home_xg, away_xg),
         team_xga = ifelse(home == team, away_xg, home_xg),
         res = ifelse(team_gf > team_ga, "W", 
                      ifelse(team_gf == team_ga, "D", "L")), 
         result = str_c(team_gf, "-", team_ga, " ", res)) |> 
    select(date, opponent, result, team_xgf, team_xga)

  team_pwin <- rep(NA, times = nrow(team_matches))
  team_pdraw <- rep(NA, times = nrow(team_matches))
  team_xpts <- rep(NA, times = nrow(team_matches))
  

  for(i in 1:nrow(team_matches)){
    team_sim <- rpois(n, team_matches$team_xgf[i])
    opp_sim <- rpois(n, team_matches$team_xga[i])
    team_pwin[i] <- label_percent(accuracy = 0.01)(round(sum(team_sim > opp_sim) / n, 4))
    team_pdraw[i] <- label_percent(accuracy = 0.01)(round(sum(team_sim == opp_sim) / n, 4))
    team_xpts[i] <- round((3*sum(team_sim > opp_sim) + 
                          sum(team_sim == opp_sim))/n, 3)
  }
  team_matches <- cbind(team_matches, team_pwin, team_pdraw, team_xpts)

  team_matches |> 
    select("Date" = date, "Opponent" = opponent, "Result" = result, 
         "xGF" = team_xgf, "xGA" = team_xga, "xPts" = team_xpts, 
         "P Win" = team_pwin, "P Draw" = team_pdraw) 
}


vwfc_stats <- team_stats("Vancouver W'caps", mls_2025)
gt(vwfc_stats)
Date Opponent Result xGF xGA xPts P Win P Draw
2025-02-23 @ Portland Timbers 4-1 W 3.0 0.7 2.590 82.42% 11.70%
2025-03-02 vs LA Galaxy 2-1 W 1.9 0.6 2.260 68.66% 19.98%
2025-03-08 vs CF Montréal 2-0 W 1.6 1.3 1.560 43.76% 24.76%
2025-03-15 @ FC Dallas 1-0 W 0.8 0.2 1.865 47.89% 42.85%
2025-03-22 vs Chicago Fire 1-3 L 1.2 2.2 0.813 20.28% 20.42%
2025-03-29 @ Toronto FC 0-0 D 1.3 0.7 1.824 51.52% 27.84%
2025-04-05 vs Colorado Rapids 2-0 W 2.0 0.3 2.532 78.99% 16.22%
2025-04-12 vs Austin 5-1 W 4.5 0.7 2.861 94.07% 3.92%
2025-04-19 @ St. Louis 0-0 D 1.1 1.0 1.424 37.32% 30.40%
2025-04-27 @ Minnesota Utd 3-1 W 1.2 1.1 1.428 38.12% 28.42%
2025-05-03 vs Real Salt Lake 2-1 W 2.9 1.6 2.093 64.28% 16.47%
2025-05-11 vs LAFC 2-2 D 0.8 1.8 0.698 15.65% 22.84%
2025-05-17 @ Austin 0-0 D 0.4 1.4 0.578 10.31% 26.88%
2025-05-24 @ Real Salt Lake 3-2 W 3.0 1.0 2.465 77.76% 13.25%
2025-05-28 vs Minnesota Utd 0-0 D 1.4 0.2 2.332 69.00% 26.22%
2025-06-08 vs Seattle Sounders 3-0 W 1.9 0.6 2.282 69.34% 20.22%
2025-06-14 @ Columbus Crew 1-2 L 0.7 1.2 0.933 20.69% 31.23%
2025-06-25 vs San Diego FC 3-5 L 1.1 1.5 1.098 28.03% 25.70%
2025-06-29 @ LAFC 1-0 W 0.7 0.9 1.154 26.55% 35.73%
2025-07-04 @ LA Galaxy 0-3 L 0.5 1.4 0.703 14.19% 27.73%
2025-07-12 @ Colorado Rapids 0-3 L 1.2 2.5 0.666 16.34% 17.62%
2025-07-16 @ Houston Dynamo 3-0 W 2.0 0.8 2.171 65.51% 20.61%
2025-07-19 @ San Diego FC 1-1 D 2.0 1.1 1.990 59.07% 21.74%
2025-07-26 vs Sporting KC 3-0 W 2.5 1.6 1.903 57.11% 18.97%

Plot by Match

Code
weekly_plot <- function(team_stats){
  match = 1:nrow(team_stats)
  team_plot <- cbind(team_stats, match)
  
  substrRight <- function(x, n){
    substr(x, nchar(x)-n+1, nchar(x))
  }
  
  
  plot(x = match, y = vwfc_stats$xPts, 
       bg = ifelse(substrRight(vwfc_stats$Result, 1) == "W", "darkgreen", 
                   ifelse(substrRight(vwfc_stats$Result, 1) == "D", "yellow", 
                          "red")), pch = 21)
  abline(h = mean(team_stats$xPts), col = "blue", lwd = 2)
  abline(h = sum(mls_xpts_2025$xpts * mls_xpts_2025$gp)/
           sum(mls_xpts_2025$gp), col = "red", lwd = 2) 
}

weekly_plot(vwfc_stats)