commit 86b2527c940d5905910b331830823499ef4565c8
parent b490366ccfdf93f62a2d33742e7add7ad391526c
Author: alex wennerberg <alex@alexwennerberg.com>
Date: Sat, 24 Jun 2023 11:22:21 -0400
3cm data pull script
Diffstat:
A | 3cm/getdata.py | | | 94 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
1 file changed, 94 insertions(+), 0 deletions(-)
diff --git a/3cm/getdata.py b/3cm/getdata.py
@@ -0,0 +1,94 @@
+# set GOOG_KEY env variable to a valid api key
+# all rounds available at https://sites.google.com/view/3cb-metashape/pairings-results/past-results?authuser=0
+# depends on requests and stdlib
+# writes to 3cmdata.csv
+
+import requests, os, csv, re, io
+
+def file_to_csv(fileid):
+ params = {"mimeType": "text/csv"}
+ headers = {"x-goog-api-key": os.environ.get("GOOG_KEY")}
+ res = requests.get(f"https://www.googleapis.com/drive/v3/files/{fileid}/export",
+ params=params,
+ headers=headers)
+ return res.text
+
+# scrape site to list rounds
+def get_round_fileids():
+ res = requests.get("https://sites.google.com/view/3cb-metashape/pairings-results/past-results")
+ sheetre = re.compile("spreadsheets/d/(.*?)/")
+ ids = sheetre.findall(res.text)
+ ids.append("1dV6pP5GCxsu-N8OpStnow5iPrf5jni5FqGgpgnIZiiE") # last round
+ return ids
+
+played_decks = set()
+played_cards = set()
+
+# fixing some data issues
+def clean(card):
+ mapping = {
+ # y'all struggle with this one
+ "old growth dryad": "old-growth dryads",
+ "old-growth dryad": "old-growth dryads",
+ "old growth-dryads": "old-growth dryads",
+ "chancelor of the tangle (sic)": "chancellor of the tangle",
+ "karakaʂ": "karakas",
+ "emrakul the": "emrakul, the",
+ "aeon's torn": "aeons torn",
+ "chronomatonton (the 1 cost 1/1 that taps to get bigger)": "chronomaton",
+ "â": "'",
+ "burning inqiury": "burning inquiry",
+ "cenn's tactition": "cenn's tactician",
+ "icatian shore": "icatian store",
+ "lions eye diamond": "lion's eye diamond",
+ "deaths shadow": "death's shadow",
+ "that one wurm that makes the three 5/5s when it dies i have done too many scryfall searches today sorry": "worldspine wurm",
+ }
+ for k,v in mapping.items():
+ card = card.replace(k, v)
+ return card
+
+# I got lazy and this code is ugly
+def analyze_round(n, roundcsv):
+ global played_cards
+ global played_decks
+ out = []
+ f = io.StringIO(roundcsv)
+ reader = csv.reader(f, delimiter=',')
+ round_cards = set()
+ round_decks = set()
+ for row in reader:
+ name = row[0]
+ udata = {"player": name, "round": n, "new_cards": []}
+ deck = [*map(lambda x: clean(x.strip().lower()), sorted(row[1].split("\n")))]
+ if len(deck) != 3:
+ continue # isn't a deck row
+ round_decks.add("|".join(deck))
+ for card in deck:
+ round_cards.add(card)
+ if card not in played_cards:
+ udata["new_cards"].append(card)
+ udata["card_1"] = deck[0] # sorted, not based on submission
+ udata["card_2"] = deck[1]
+ udata["card_3"] = deck[2]
+ if "|".join(deck) not in played_decks:
+ udata["is_new_deck"] = True
+ udata["new_cards"]=";".join(udata["new_cards"])
+ out.append(udata)
+ played_decks |= round_decks
+ played_cards |= round_cards
+ return out
+
+def main():
+ fieldnames = ["round","player","card_1","card_2","card_3","is_new_deck","new_cards"]
+ with open("3cm-data.csv", 'w') as f:
+ w = csv.DictWriter(f, fieldnames=fieldnames)
+ w.writeheader()
+ for n, file in enumerate(get_round_fileids()):
+ print(f"analyzing round {n+1}")
+ rnd = analyze_round(n+1, file_to_csv(file))
+ w.writerows(rnd)
+
+if __name__ == "__main__":
+ main()
+