misc

Unnamed repository; edit this file 'description' to name the repository.
git clone git://git.alexwennerberg.com/misc
Log | Files | Refs | README | LICENSE

commit 86b2527c940d5905910b331830823499ef4565c8
parent b490366ccfdf93f62a2d33742e7add7ad391526c
Author: alex wennerberg <alex@alexwennerberg.com>
Date:   Sat, 24 Jun 2023 11:22:21 -0400

3cm data pull script

Diffstat:
A3cm/getdata.py | 94+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 94 insertions(+), 0 deletions(-)

diff --git a/3cm/getdata.py b/3cm/getdata.py @@ -0,0 +1,94 @@ +# set GOOG_KEY env variable to a valid api key +# all rounds available at https://sites.google.com/view/3cb-metashape/pairings-results/past-results?authuser=0 +# depends on requests and stdlib +# writes to 3cmdata.csv + +import requests, os, csv, re, io + +def file_to_csv(fileid): + params = {"mimeType": "text/csv"} + headers = {"x-goog-api-key": os.environ.get("GOOG_KEY")} + res = requests.get(f"https://www.googleapis.com/drive/v3/files/{fileid}/export", + params=params, + headers=headers) + return res.text + +# scrape site to list rounds +def get_round_fileids(): + res = requests.get("https://sites.google.com/view/3cb-metashape/pairings-results/past-results") + sheetre = re.compile("spreadsheets/d/(.*?)/") + ids = sheetre.findall(res.text) + ids.append("1dV6pP5GCxsu-N8OpStnow5iPrf5jni5FqGgpgnIZiiE") # last round + return ids + +played_decks = set() +played_cards = set() + +# fixing some data issues +def clean(card): + mapping = { + # y'all struggle with this one + "old growth dryad": "old-growth dryads", + "old-growth dryad": "old-growth dryads", + "old growth-dryads": "old-growth dryads", + "chancelor of the tangle (sic)": "chancellor of the tangle", + "karakaʂ": "karakas", + "emrakul the": "emrakul, the", + "aeon's torn": "aeons torn", + "chronomatonton (the 1 cost 1/1 that taps to get bigger)": "chronomaton", + "â": "'", + "burning inqiury": "burning inquiry", + "cenn's tactition": "cenn's tactician", + "icatian shore": "icatian store", + "lions eye diamond": "lion's eye diamond", + "deaths shadow": "death's shadow", + "that one wurm that makes the three 5/5s when it dies i have done too many scryfall searches today sorry": "worldspine wurm", + } + for k,v in mapping.items(): + card = card.replace(k, v) + return card + +# I got lazy and this code is ugly +def analyze_round(n, roundcsv): + global played_cards + global played_decks + out = [] + f = io.StringIO(roundcsv) + reader = csv.reader(f, delimiter=',') + round_cards = set() + round_decks = set() + for row in reader: + name = row[0] + udata = {"player": name, "round": n, "new_cards": []} + deck = [*map(lambda x: clean(x.strip().lower()), sorted(row[1].split("\n")))] + if len(deck) != 3: + continue # isn't a deck row + round_decks.add("|".join(deck)) + for card in deck: + round_cards.add(card) + if card not in played_cards: + udata["new_cards"].append(card) + udata["card_1"] = deck[0] # sorted, not based on submission + udata["card_2"] = deck[1] + udata["card_3"] = deck[2] + if "|".join(deck) not in played_decks: + udata["is_new_deck"] = True + udata["new_cards"]=";".join(udata["new_cards"]) + out.append(udata) + played_decks |= round_decks + played_cards |= round_cards + return out + +def main(): + fieldnames = ["round","player","card_1","card_2","card_3","is_new_deck","new_cards"] + with open("3cm-data.csv", 'w') as f: + w = csv.DictWriter(f, fieldnames=fieldnames) + w.writeheader() + for n, file in enumerate(get_round_fileids()): + print(f"analyzing round {n+1}") + rnd = analyze_round(n+1, file_to_csv(file)) + w.writerows(rnd) + +if __name__ == "__main__": + main() +