sio
sio.Rmd
splits.io data
Load runs and runners
This module has its own types; here I convert to a list of dictionaries I can save locally so I don’t have to call the API every time I knit the html.
# this chunk is not evaluated
# to minimise api calls
# get 100% Category Super Metroid game data
sio_cat = splitsio.Category.from_id("279", historic=True)
type(sio_cat)
# extract runners from category
sio_runners = sio_cat.runners()
type(sio_runners)
sio_runners[0]
sio_runners[-1]
# extract run from category
sio_runs = sio_cat.runs()
type(sio_runs)
sio_runs[0]
sio_runs[-1]
# convert the run objects to dictionaries
sio_run_dicts = [sio_runs[x].to_dict() for x in range(len(sio_runs))]
# each element is a dictionary with keys
sio_run_dicts[0].keys()
# run id
sio_run_dicts[0].keys()
# get id
sio_run_dicts[0]['id']
# get realtime
sio_run_dicts[0]['realtime_duration_ms']
print(sio_run_dicts)
import pprint
pp = pprint.PrettyPrinter(indent=4)
pp.pprint(sio_run_dicts[0:3])
sio_cat.__dict__
for key in sio_cat.keys():
print(key.__dict__)
# save object locally
file_path = "data-raw/sio_cat.json"
with open(file_path, "w") as outfile:
outfile.write(json.dumps(sio_run_dicts, indent=4))
Run dataframe
Objective: to wrangle a data frame with run data
sio_run_df | description | splitsio |
---|---|---|
run_id | splits.io id | id |
date | timestamp of run upload | ? 'created_at', 'parsed_at', 'updated_at' we’ll use
‘updated_at’ for now, but should check splitsio docs for which |
run_time | total time of run in s or ms | realtime_duration_ms |
rank | “historical” if from previous record (nb only applies to speedrun.com); otherwise rank as int, list column | ? |
src_player_id | speedrun.com player id | srdc_id |
Try with list of dictionaries
# player data
pd.json_normalize(sio_run_dicts[0]['runners'])['id'][0]
#> '29900'
sio_run_player_index = 10 # len(sio_run_dicts)
run_runners_list = [pd.json_normalize(sio_run_dicts[x]['runners']).assign(run_id = sio_run_dicts[x]['id']) for x in range(len(sio_run_dicts))]
run_runners_concat = pd.concat(run_runners_list)
run_runners = run_runners_concat[['name', 'run_id','id']].rename(columns={'id':'player_id', 'name':'player_name'})
sio_run_dicts[0].keys()
#> dict_keys(['id', 'srdc_id', 'realtime_duration_ms', 'realtime_sum_of_best_ms', 'gametime_duration_ms', 'gametime_sum_of_best_ms', 'default_timing', 'program', 'attempts', 'image_url', 'parsed_at', 'created_at', 'updated_at', 'video_url', 'game', 'category', 'runners', 'segments', 'histories'])
# create a dataframe
sio_runs_df = pd.DataFrame({
'run_id' : [sio_run_dicts[x]['id'] for x in range(len(sio_run_dicts))],
't_ms' : [sio_run_dicts[x]['realtime_duration_ms'] for x in range(len(sio_run_dicts))],
'date' : [sio_run_dicts[x]['updated_at'] for x in range(len(sio_run_dicts))],
'image_url': [sio_run_dicts[x]['image_url'] for x in range(len(sio_run_dicts))],
'video_url': [sio_run_dicts[x]['video_url'] for x in range(len(sio_run_dicts))]
})
sio_runs_df.head()
#> run_id t_ms ... image_url video_url
#> 0 awdz 4473112 ... https://i.imgur.com/zSvYVbK.png None
#> 1 awcd 6516886 ... None None
#> 2 avqg 4965711 ... None None
#> 3 avgw 6885726 ... None None
#> 4 av92 4858865 ... None None
#>
#> [5 rows x 5 columns]
# need to merge with run runners
run_runners.head()
#> player_name run_id player_id
#> 0 CScottyW awdz 29900
#> 0 juniorr300 awcd 89051
#> 0 Hatz avqg 91587
#> 0 anatomecha avgw 89646
#> 0 eholden av92 37661
run_runners_df = sio_runs_df.merge(
run_runners, on = "run_id", how = 'left').reset_index(drop=True)
# run and runner data
run_runners_df.head()
#> run_id t_ms date ... video_url player_name player_id
#> 0 awdz 4473112 2023-06-27T04:24:02.434Z ... None CScottyW 29900
#> 1 awcd 6516886 2023-06-26T20:12:45.152Z ... None juniorr300 89051
#> 2 avqg 4965711 2023-06-21T20:55:31.169Z ... None Hatz 91587
#> 3 avgw 6885726 2023-06-19T19:13:58.050Z ... None anatomecha 89646
#> 4 av92 4858865 2023-06-17T23:19:14.463Z ... None eholden 37661
#>
#> [5 rows x 7 columns]
create segments dataframe
segment_df | description |
---|---|
run_id | unique identifier of run |
segment_id | unique identifier of segment |
game_event | description of split |
t_s | time in seconds, measured to millisecond precision |
# first level of segments
type(sio_run_dicts[0]['segments'])
#> <class 'list'>
type(sio_run_dicts[0]['segments'][0])
# inspect a segment
#> <class 'dict'>
sio_run_dicts[0]['segments']
# a single segment
#> [{'id': 'ff4b6370-f7aa-4d46-a9d2-90b1c286b050', 'name': 'Taco Tank', 'display_name': 'Taco Tank', 'segment_number': 0, 'realtime_start_ms': 0, 'realtime_duration_ms': 285053, 'realtime_end_ms': 285053, 'realtime_shortest_duration_ms': 282335, 'realtime_gold': False, 'realtime_skipped': False, 'realtime_reduced': False, 'gametime_start_ms': 0, 'gametime_duration_ms': 0, 'gametime_end_ms': 0, 'gametime_shortest_duration_ms': None, 'gametime_gold': False, 'gametime_skipped': True, 'gametime_reduced': False, 'histories': None}, {'id': 'e34dcf39-1394-4152-a11e-ee1b77e94754', 'name': 'X-Ray', 'display_name': 'X-Ray', 'segment_number': 1, 'realtime_start_ms': 285053, 'realtime_duration_ms': 362295, 'realtime_end_ms': 647348, 'realtime_shortest_duration_ms': 356183, 'realtime_gold': False, 'realtime_skipped': False, 'realtime_reduced': False, 'gametime_start_ms': 0, 'gametime_duration_ms': 0, 'gametime_end_ms': 0, 'gametime_shortest_duration_ms': None, 'gametime_gold': False, 'gametime_skipped': True, 'gametime_reduced': False, 'histories': None}, {'id': '2248d379-b4b8-45d7-b7ae-fba99580035e', 'name': 'Grapple', 'display_name': 'Grapple', 'segment_number': 2, 'realtime_start_ms': 647348, 'realtime_duration_ms': 346896, 'realtime_end_ms': 994244, 'realtime_shortest_duration_ms': 341495, 'realtime_gold': False, 'realtime_skipped': False, 'realtime_reduced': False, 'gametime_start_ms': 0, 'gametime_duration_ms': 0, 'gametime_end_ms': 0, 'gametime_shortest_duration_ms': None, 'gametime_gold': False, 'gametime_skipped': True, 'gametime_reduced': False, 'histories': None}, {'id': '63fdf981-7762-4f42-998e-af5c81fad0de', 'name': 'Phan (f/m)', 'display_name': 'Phan (f/m)', 'segment_number': 3, 'realtime_start_ms': 994244, 'realtime_duration_ms': 298478, 'realtime_end_ms': 1292722, 'realtime_shortest_duration_ms': 289674, 'realtime_gold': False, 'realtime_skipped': False, 'realtime_reduced': False, 'gametime_start_ms': 0, 'gametime_duration_ms': 0, 'gametime_end_ms': 0, 'gametime_shortest_duration_ms': None, 'gametime_gold': False, 'gametime_skipped': True, 'gametime_reduced': False, 'histories': None}, {'id': 'd31efcb8-a420-4d7f-aa11-4688b4a6741f', 'name': '100 Fishing Quests', 'display_name': '100 Fishing Quests', 'segment_number': 4, 'realtime_start_ms': 1292722, 'realtime_duration_ms': 316337, 'realtime_end_ms': 1609059, 'realtime_shortest_duration_ms': 316279, 'realtime_gold': False, 'realtime_skipped': False, 'realtime_reduced': False, 'gametime_start_ms': 0, 'gametime_duration_ms': 0, 'gametime_end_ms': 0, 'gametime_shortest_duration_ms': None, 'gametime_gold': False, 'gametime_skipped': True, 'gametime_reduced': False, 'histories': None}, {'id': '3202c9ab-05f6-49fa-9e85-5accf5bd2dfd', 'name': 'Exit Tourian', 'display_name': 'Exit Tourian', 'segment_number': 5, 'realtime_start_ms': 1609059, 'realtime_duration_ms': 392693, 'realtime_end_ms': 2001752, 'realtime_shortest_duration_ms': 375931, 'realtime_gold': False, 'realtime_skipped': False, 'realtime_reduced': False, 'gametime_start_ms': 0, 'gametime_duration_ms': 0, 'gametime_end_ms': 0, 'gametime_shortest_duration_ms': None, 'gametime_gold': False, 'gametime_skipped': True, 'gametime_reduced': False, 'histories': None}, {'id': '28c363db-ad09-47b5-9dc6-fa12f96eb699', 'name': 'Space Jump', 'display_name': 'Space Jump', 'segment_number': 6, 'realtime_start_ms': 2001752, 'realtime_duration_ms': 407696, 'realtime_end_ms': 2409448, 'realtime_shortest_duration_ms': 394762, 'realtime_gold': False, 'realtime_skipped': False, 'realtime_reduced': False, 'gametime_start_ms': 0, 'gametime_duration_ms': 0, 'gametime_end_ms': 0, 'gametime_shortest_duration_ms': None, 'gametime_gold': False, 'gametime_skipped': True, 'gametime_reduced': False, 'histories': None}, {'id': 'ddf8222e-414d-4aa9-9b3d-e5d1954765d6', 'name': 'Spring Ball', 'display_name': 'Spring Ball', 'segment_number': 7, 'realtime_start_ms': 2409448, 'realtime_duration_ms': 246277, 'realtime_end_ms': 2655725, 'realtime_shortest_duration_ms': 245998, 'realtime_gold': False, 'realtime_skipped': False, 'realtime_reduced': False, 'gametime_start_ms': 0, 'gametime_duration_ms': 0, 'gametime_end_ms': 0, 'gametime_shortest_duration_ms': None, 'gametime_gold': False, 'gametime_skipped': True, 'gametime_reduced': False, 'histories': None}, {'id': '9a98d829-f21c-43db-9b5a-5b455428c653', 'name': 'Ice', 'display_name': 'Ice', 'segment_number': 8, 'realtime_start_ms': 2655725, 'realtime_duration_ms': 392564, 'realtime_end_ms': 3048289, 'realtime_shortest_duration_ms': 389927, 'realtime_gold': False, 'realtime_skipped': False, 'realtime_reduced': False, 'gametime_start_ms': 0, 'gametime_duration_ms': 0, 'gametime_end_ms': 0, 'gametime_shortest_duration_ms': None, 'gametime_gold': False, 'gametime_skipped': True, 'gametime_reduced': False, 'histories': None}, {'id': 'a05ee907-b9d1-407f-9466-59fc48c123bf', 'name': 'The Riddler', 'display_name': 'The Riddler', 'segment_number': 9, 'realtime_start_ms': 3048289, 'realtime_duration_ms': 197030, 'realtime_end_ms': 3245319, 'realtime_shortest_duration_ms': 182305, 'realtime_gold': False, 'realtime_skipped': False, 'realtime_reduced': False, 'gametime_start_ms': 0, 'gametime_duration_ms': 0, 'gametime_end_ms': 0, 'gametime_shortest_duration_ms': None, 'gametime_gold': False, 'gametime_skipped': True, 'gametime_reduced': False, 'histories': None}, {'id': '1f4d582d-7713-4476-bdc9-cfd717f7eddc', 'name': 'Grapple (Reprise)', 'display_name': 'Grapple (Reprise)', 'segment_number': 10, 'realtime_start_ms': 3245319, 'realtime_duration_ms': 387838, 'realtime_end_ms': 3633157, 'realtime_shortest_duration_ms': 383449, 'realtime_gold': False, 'realtime_skipped': False, 'realtime_reduced': False, 'gametime_start_ms': 0, 'gametime_duration_ms': 0, 'gametime_end_ms': 0, 'gametime_shortest_duration_ms': None, 'gametime_gold': False, 'gametime_skipped': True, 'gametime_reduced': False, 'histories': None}, {'id': '80f0e660-0058-4b06-9d62-e75ec9a57b08', 'name': 'Make a Bet', 'display_name': 'Make a Bet', 'segment_number': 11, 'realtime_start_ms': 3633157, 'realtime_duration_ms': 433039, 'realtime_end_ms': 4066196, 'realtime_shortest_duration_ms': 424170, 'realtime_gold': False, 'realtime_skipped': False, 'realtime_reduced': False, 'gametime_start_ms': 0, 'gametime_duration_ms': 0, 'gametime_end_ms': 0, 'gametime_shortest_duration_ms': None, 'gametime_gold': False, 'gametime_skipped': True, 'gametime_reduced': False, 'histories': None}, {'id': '8139d4e1-2d1b-4237-a528-440ba5c93002', 'name': 'Michael', 'display_name': 'Michael', 'segment_number': 12, 'realtime_start_ms': 4066196, 'realtime_duration_ms': 406916, 'realtime_end_ms': 4473112, 'realtime_shortest_duration_ms': 396203, 'realtime_gold': False, 'realtime_skipped': False, 'realtime_reduced': False, 'gametime_start_ms': 0, 'gametime_duration_ms': 0, 'gametime_end_ms': 0, 'gametime_shortest_duration_ms': None, 'gametime_gold': False, 'gametime_skipped': True, 'gametime_reduced': False, 'histories': None}]
sio_run_dicts[0]['segments'][0].keys()
# convert to df
# I think this is a dictionary of single-element entries, so can be flattened
# how to efficiently check this?
#> dict_keys(['id', 'name', 'display_name', 'segment_number', 'realtime_start_ms', 'realtime_duration_ms', 'realtime_end_ms', 'realtime_shortest_duration_ms', 'realtime_gold', 'realtime_skipped', 'realtime_reduced', 'gametime_start_ms', 'gametime_duration_ms', 'gametime_end_ms', 'gametime_shortest_duration_ms', 'gametime_gold', 'gametime_skipped', 'gametime_reduced', 'histories'])
pd.DataFrame(sio_run_dicts[0]['segments']).head()
# List of dataframes of segments for each run
#> id ... histories
#> 0 ff4b6370-f7aa-4d46-a9d2-90b1c286b050 ... None
#> 1 e34dcf39-1394-4152-a11e-ee1b77e94754 ... None
#> 2 2248d379-b4b8-45d7-b7ae-fba99580035e ... None
#> 3 63fdf981-7762-4f42-998e-af5c81fad0de ... None
#> 4 d31efcb8-a420-4d7f-aa11-4688b4a6741f ... None
#>
#> [5 rows x 19 columns]
segments_list = [pd.DataFrame(sio_run_dicts[x]['segments']).assign(run_id = sio_run_dicts[x]['id']) for x in range(len(sio_run_dicts))]
segment_raw = pd.concat(segments_list).reset_index(drop=True)
segment_raw.columns
#> Index(['id', 'name', 'display_name', 'segment_number', 'realtime_start_ms',
#> 'realtime_duration_ms', 'realtime_end_ms',
#> 'realtime_shortest_duration_ms', 'realtime_gold', 'realtime_skipped',
#> 'realtime_reduced', 'gametime_start_ms', 'gametime_duration_ms',
#> 'gametime_end_ms', 'gametime_shortest_duration_ms', 'gametime_gold',
#> 'gametime_skipped', 'gametime_reduced', 'histories', 'run_id'],
#> dtype='object')
segment_raw.shape
#> (18458, 20)
segment_raw.head()
#> id name ... histories run_id
#> 0 ff4b6370-f7aa-4d46-a9d2-90b1c286b050 Taco Tank ... None awdz
#> 1 e34dcf39-1394-4152-a11e-ee1b77e94754 X-Ray ... None awdz
#> 2 2248d379-b4b8-45d7-b7ae-fba99580035e Grapple ... None awdz
#> 3 63fdf981-7762-4f42-998e-af5c81fad0de Phan (f/m) ... None awdz
#> 4 d31efcb8-a420-4d7f-aa11-4688b4a6741f 100 Fishing Quests ... None awdz
#>
#> [5 rows x 20 columns]
segment_raw.columns
#> Index(['id', 'name', 'display_name', 'segment_number', 'realtime_start_ms',
#> 'realtime_duration_ms', 'realtime_end_ms',
#> 'realtime_shortest_duration_ms', 'realtime_gold', 'realtime_skipped',
#> 'realtime_reduced', 'gametime_start_ms', 'gametime_duration_ms',
#> 'gametime_end_ms', 'gametime_shortest_duration_ms', 'gametime_gold',
#> 'gametime_skipped', 'gametime_reduced', 'histories', 'run_id'],
#> dtype='object')
sio_runs_df.columns
# sio_segments_df = (segment_raw.rename(
# {
# 'display_name': 'player_name'
# }
# ).merge(
# sio_runs_df, on = "run_id", how="left"))
#> Index(['run_id', 't_ms', 'date', 'image_url', 'video_url'], dtype='object')
sio_segments_df.shape
#> Error: NameError: name 'sio_segments_df' is not defined
sio_segments_df.columns
#> Error: NameError: name 'sio_segments_df' is not defined
sio_segments_df.head()
#> Error: NameError: name 'sio_segments_df' is not defined
# tidy it up in R before writing it
sio_segments <- py$segment_raw
sio_runs <- py$run_runners
sio_seg_runs <- left_join(sio_segments %>% distinct(),
sio_runs %>% distinct() %>%
rename(total_t_ms = t_ms) %>%
select(-video_url, -image_url, -date)
, by = "run_id")
colnames(sio_seg_runs)
sio_seg_runs %>%
count(run_id, id) %>%
arrange(desc(n)) %>%
filter(n > 1)
sio_splits_df <-
sio_segments %>%
select(
player_name = name,
game_event = display_name,
segment_number,
realtime_start_ms,
segment_id = id,
run_id,
player_id,
everything()) %>%
select(-contains("gametime"),
-contains("reduced"),
-contains("skipped"), -histories) %>%
mutate(player_id = as.character(player_id)))
head(sio_splits_df)
#> Error: <text>:33:48: unexpected ')'
#> 32: -contains("skipped"), -histories) %>%
#> 33: mutate(player_id = as.character(player_id)))
#> ^
# write data - this chunk is not evaluated on knit
usethis::use_data(sio_splits_df, overwrite = TRUE)