commit b0ddf3f6269a94a45bb196cfe974d1b0cbb42e17
Author: alex wennerberg <alex@alexwennerberg.com>
Date: Sun, 29 Oct 2023 10:28:27 -0400
Initial Commit
Diffstat:
4 files changed, 177 insertions(+), 0 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,3 @@
+*.json
+allcards.txt
+*.db
diff --git a/README b/README
@@ -0,0 +1,16 @@
+3 card blind data analysis
+==========================
+
+Utilities for pulling data from 3cb google sheets
+
+Uses sqlite db
+
+Pull all cards from mtgjson
+
+https://mtgjson.com/downloads/all-files/
+
+wget https://mtgjson.com/api/v5/AtomicCards.json
+cat AtomicCards.json | jq -r '.data | keys[]' > allcards.txt
+
+After installing imports:
+python3 getdata.py
diff --git a/getdata.py b/getdata.py
@@ -0,0 +1,155 @@
+# set GOOG_KEY env variable to a valid api key
+# all rounds available at https://sites.google.com/view/3cb-metashape/pairings-results/past-results?authuser=0
+# depends on requests and stdlib
+# writes to 3cmdata.csv
+
+import requests, os, csv, re, io, sqlite3, titlecase, string
+from Levenshtein import distance
+
+con = sqlite3.connect("3cb.db")
+allcards = set(open('allcards.txt','r').read().splitlines())
+
+def file_to_csv_reader(fileid, final):
+ params = {"mimeType": "text/csv"}
+ headers = {"x-goog-api-key": os.environ.get("GOOG_KEY")}
+ out = []
+ res = requests.get(f"https://www.googleapis.com/drive/v3/files/{fileid}/export",
+ params=params,
+ headers=headers)
+ if res.status_code != 200:
+ print(res.text)
+ os.exit(1)
+ f = io.StringIO(res.text)
+ reader = csv.reader(f, delimiter=',')
+ return reader
+
+# scrape site to list rounds
+def get_round_fileids():
+ allrounds = requests.get("https://sites.google.com/view/3cb-metashape/pairings-results/past-results")
+ sheetre = re.compile("spreadsheets/d/(.*?)/")
+ curr_round = requests.get("https://sites.google.com/view/3cb-metashape/pairings-results")
+ ids = sheetre.findall(allrounds.text + curr_round.text)
+ return ids
+
+# fixing some data issues
+def best_guess(card):
+ m = 6
+ outcard = ""
+ for c in allcards:
+ if c.startswith(card):
+ return c
+ d = distance(card, c, score_cutoff=5)
+ if d < m:
+ m = d
+ outcard = c
+ if outcard:
+ return outcard
+ return card
+
+
+def replacement(card):
+ m = {
+ "Annex Chan": "Chancellor of the Annex",
+ "Annex-Chan": "Chancellor of the Annex",
+ "Forge-Chan": "Chancellor of the Forge",
+ "Basic Plains": "Plains",
+ "Urborg Tomb Yawgy": "Urborg, Tomb of Yawgmoth",
+ "Urborg (The One That Makes All Lands Swamps)": "Urborg, Tomb of Yawgmoth",
+ "Chronomatonton (The 1 Cost 1/1 That Taps to Get Bigger)": "Chronomaton",
+ "That One Wurm That Makes the Three 5/5s When It Dies I Have Done Too Many Scryfall Searches Today Sorry": "TBD",
+ "Chancelor of the Tangle (Sic)": "Chancellor of the Tangle",
+ "Karaka (Its Listed as Unbanned but the Form Wont Let Me Submit It Idk Its My First Time Lol)": "Karakas",
+ "Restore Balanse (I Have to Misspell This Cuz the Regex Is Buggy Lmao)": "Restore Balance",
+ "Elspeth Suns Champion Wooooooo": "Elspeth, Sun's Champion",
+ "Plains (ONS 333)": "Plains",
+ "Swamp (XLN 270)": "Swamp",
+ "Ulamog the Infinite Gyre (Borderless) (Foil)": "Ulamog, the Infinite Gyre",
+ "Dark Ritual (BTD 21)": "Dark Ritual",
+ "Swamp (BRB 18)": "Swamp",
+ "Monty Python and the Holy Grail Black Knight (Oathsworn Knight)": "Oathsworn Knight",
+ "Red Sun's Zenith (Again) ": "Red Sun's Zenith",
+ "Dwarven Hold (Again)": "Dwarven Hold",
+ }
+ if card in m:
+ return m[card]
+ return card
+
+def clean_card(card_name):
+ clean = titlecase.titlecase(card_name.strip())
+ clean = ''.join(filter(lambda x: x in string.printable, clean))
+ clean = clean.replace("’", "'")
+ clean = replacement(clean)
+ if clean not in allcards:
+ return best_guess(clean)
+ return clean
+
+# not really normalized well
+schema = ["""
+create table if not exists round (
+ id integer primary key,
+ fileid text not null
+);
+""",
+"""
+create table if not exists card (
+ round integer not null,
+ player varchar not null,
+ name text
+);""","""
+create table if not exists match (
+ round integer,
+ group_name text,
+ player text,
+ opp_player text,
+ score integer
+);"""]
+
+def save_group(cur, rreader, n):
+ group = ""
+ if n == 6: # One group, TODO
+ raise StopIteration
+
+ while len(group) != 1:
+ head = next(rreader)
+ group = head[0].partition(" ")[2]
+ players = head[2:-1]
+ for _ in players:
+ row = next(rreader)
+ player = row[0]
+ cards = row[1].split("\n")
+ if len(cards) != 3:
+ continue
+ for card in cards:
+ cur.execute("insert into card values (?,?,?);", (n, player, clean_card(card)))
+ for o, s in enumerate(row[2:-1]):
+ cur.execute("insert into match values (?, ?, ?, ?, ?);",
+ (n, group, player,players[o],s));
+
+def save_round(n, fileid):
+ rreader = file_to_csv_reader(fileid, False)
+ cur = con.cursor()
+ while 1: # wonky
+ try:
+ save_group(cur, rreader, n)
+ except StopIteration:
+ break
+ cur.execute("insert into round values (?, ?);", (n, fileid));
+ con.commit()
+
+# cache
+def fileids_in_db():
+ return con.cursor().execute("select fileid from round").fetchall()
+
+def main():
+ cache = fileids_in_db()
+ for n, file in enumerate(get_round_fileids()):
+ n = n +len(cache)
+ if file not in cache:
+ print(f"analyzing round {n+1}")
+ save_round(n+1, file)
+
+if __name__ == "__main__":
+ for table in schema:
+ con.execute(table)
+ main()
+
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,3 @@
+requests
+titlecase
+openpyxl