Download and Preprocess StatsBomb and SkillCorner Data

This script downloads StatsBomb event and match data, matches it with SkillCorner tracking data, and processes the combined data.

Dependencies

  • openstarlab_preprocessing

Usage

from preprocessing import SAR_data

#down_load_statsbomb_data function
def download_statsbomb_data(creds, save_dir,competition_id=11, season_id=281):

    os.makedirs(save_dir, exist_ok=True)

    def convert_df_in_dict(d):
        for key, value in d.items():
            if isinstance(value, pd.DataFrame):
                d[key] = value.to_dict(orient='records')
            elif isinstance(value, dict):
                convert_df_in_dict(value)
        return d

    # Get Statsbomb matches data
    matches = sb.matches(competition_id=competition_id, season_id=season_id, creds=creds)
    matches["competition_id"] = competition_id
    matches["season_id"] = season_id
    #moev the competition_id and season_id to the first column
    cols = matches.columns.tolist()
    cols = cols[-2:] + cols[:-2]
    matches = matches[cols]
    #save the matches to csv
    matches.to_csv(os.path.join(save_dir, "matches.csv"), index=False)

    # Get Statsbomb lineups and events
    os.makedirs(os.path.join(save_dir, "lineups"), exist_ok=True)
    os.makedirs(os.path.join(save_dir, "events"), exist_ok=True)
    for match_id in tqdm(matches["match_id"].unique()):
        lineups = sb.lineups(match_id=match_id, creds=creds)
        events = sb.events(match_id=match_id, include_360_metrics=True, creds=creds)
        events.to_csv(os.path.join(save_dir, "events", f"{match_id}.csv"), index=False)
        #save the lineups as json and with row changes
        lineups = convert_df_in_dict(lineups)
        with open(os.path.join(save_dir, "lineups", f"{match_id}.json"), "w") as f:
            json.dump(lineups, f, indent=4)

if __name__ == "__main__":
    #Statsbomb API
    creds = {"user": "input your Statsbomb api user name here", "passwd": "input your Statsbomb api password here"}
    #Statsbomb event data saving dir
    save_dir = "/statsbomb"
    #path to the skillcorner tracking data
    tracking_path="/skillcorner/tracking"
    #path to the skillcorner match data
    match_path="/skillcorner/match"

    download_statsbomb_data(creds, save_dir)


    #Match the statsbomb and skillcorner (one file)
    data_path = save_dir+'/events'
    state_def = 'PVS'
    match_id = "1120811"
    config_path = '/path/to/preprocess_config.json'
    statsbomb_skillcorner_match_id = '/path/to/statsbomb_skillcorner_match_id.json'

    #Load and preprocess single match data
    Soccer_SAR_data(
        data_provider='statsbomb_skillcorner',
        state_def=state_def,
        data_path=data_path,
        match_id=match_id, # match_id for skillcorner
        config_path=config_path,
        statsbomb_skillcorner_match_id=statsbomb_skillcorner_match_id,
        preprocess_method='SAR'
    ).preprocess_method()

    #Match the statsbomb and skillcorner (multiple files)
    config_path = '/path/to/preprocess_config.json'
    statsbomb_skillcorner_match_id = '/path/to/statsbomb_skillcorner_match_id.json'

    #Load and preprocess multiple matches data
    Soccer_SAR_data(
        data_provider='statsbomb_skillcorner',
        state_def=state_def,
        data_path=data_path,
        config_path=config_path,
        statsbomb_skillcorner_match_id=statsbomb_skillcorner_match_id,
        max_workers=2,
        preprocess_method='SAR'
    ).preprocess_method()

    print("---------------done-----------------")