3cb-data

Unnamed repository; edit this file 'description' to name the repository.
git clone git://git.alexwennerberg.com/3cb-data.git
Log | Files | Refs | README | LICENSE

commit b0ddf3f6269a94a45bb196cfe974d1b0cbb42e17
Author: alex wennerberg <alex@alexwennerberg.com>
Date:   Sun, 29 Oct 2023 10:28:27 -0400

Initial Commit

Diffstat:
A.gitignore | 3+++
AREADME | 16++++++++++++++++
Agetdata.py | 155+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Arequirements.txt | 3+++
4 files changed, 177 insertions(+), 0 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -0,0 +1,3 @@ +*.json +allcards.txt +*.db diff --git a/README b/README @@ -0,0 +1,16 @@ +3 card blind data analysis +========================== + +Utilities for pulling data from 3cb google sheets + +Uses sqlite db + +Pull all cards from mtgjson + +https://mtgjson.com/downloads/all-files/ + +wget https://mtgjson.com/api/v5/AtomicCards.json +cat AtomicCards.json | jq -r '.data | keys[]' > allcards.txt + +After installing imports: +python3 getdata.py diff --git a/getdata.py b/getdata.py @@ -0,0 +1,155 @@ +# set GOOG_KEY env variable to a valid api key +# all rounds available at https://sites.google.com/view/3cb-metashape/pairings-results/past-results?authuser=0 +# depends on requests and stdlib +# writes to 3cmdata.csv + +import requests, os, csv, re, io, sqlite3, titlecase, string +from Levenshtein import distance + +con = sqlite3.connect("3cb.db") +allcards = set(open('allcards.txt','r').read().splitlines()) + +def file_to_csv_reader(fileid, final): + params = {"mimeType": "text/csv"} + headers = {"x-goog-api-key": os.environ.get("GOOG_KEY")} + out = [] + res = requests.get(f"https://www.googleapis.com/drive/v3/files/{fileid}/export", + params=params, + headers=headers) + if res.status_code != 200: + print(res.text) + os.exit(1) + f = io.StringIO(res.text) + reader = csv.reader(f, delimiter=',') + return reader + +# scrape site to list rounds +def get_round_fileids(): + allrounds = requests.get("https://sites.google.com/view/3cb-metashape/pairings-results/past-results") + sheetre = re.compile("spreadsheets/d/(.*?)/") + curr_round = requests.get("https://sites.google.com/view/3cb-metashape/pairings-results") + ids = sheetre.findall(allrounds.text + curr_round.text) + return ids + +# fixing some data issues +def best_guess(card): + m = 6 + outcard = "" + for c in allcards: + if c.startswith(card): + return c + d = distance(card, c, score_cutoff=5) + if d < m: + m = d + outcard = c + if outcard: + return outcard + return card + + +def replacement(card): + m = { + "Annex Chan": "Chancellor of the Annex", + "Annex-Chan": "Chancellor of the Annex", + "Forge-Chan": "Chancellor of the Forge", + "Basic Plains": "Plains", + "Urborg Tomb Yawgy": "Urborg, Tomb of Yawgmoth", + "Urborg (The One That Makes All Lands Swamps)": "Urborg, Tomb of Yawgmoth", + "Chronomatonton (The 1 Cost 1/1 That Taps to Get Bigger)": "Chronomaton", + "That One Wurm That Makes the Three 5/5s When It Dies I Have Done Too Many Scryfall Searches Today Sorry": "TBD", + "Chancelor of the Tangle (Sic)": "Chancellor of the Tangle", + "Karaka (Its Listed as Unbanned but the Form Wont Let Me Submit It Idk Its My First Time Lol)": "Karakas", + "Restore Balanse (I Have to Misspell This Cuz the Regex Is Buggy Lmao)": "Restore Balance", + "Elspeth Suns Champion Wooooooo": "Elspeth, Sun's Champion", + "Plains (ONS 333)": "Plains", + "Swamp (XLN 270)": "Swamp", + "Ulamog the Infinite Gyre (Borderless) (Foil)": "Ulamog, the Infinite Gyre", + "Dark Ritual (BTD 21)": "Dark Ritual", + "Swamp (BRB 18)": "Swamp", + "Monty Python and the Holy Grail Black Knight (Oathsworn Knight)": "Oathsworn Knight", + "Red Sun's Zenith (Again) ": "Red Sun's Zenith", + "Dwarven Hold (Again)": "Dwarven Hold", + } + if card in m: + return m[card] + return card + +def clean_card(card_name): + clean = titlecase.titlecase(card_name.strip()) + clean = ''.join(filter(lambda x: x in string.printable, clean)) + clean = clean.replace("’", "'") + clean = replacement(clean) + if clean not in allcards: + return best_guess(clean) + return clean + +# not really normalized well +schema = [""" +create table if not exists round ( + id integer primary key, + fileid text not null +); +""", +""" +create table if not exists card ( + round integer not null, + player varchar not null, + name text +);""",""" +create table if not exists match ( + round integer, + group_name text, + player text, + opp_player text, + score integer +);"""] + +def save_group(cur, rreader, n): + group = "" + if n == 6: # One group, TODO + raise StopIteration + + while len(group) != 1: + head = next(rreader) + group = head[0].partition(" ")[2] + players = head[2:-1] + for _ in players: + row = next(rreader) + player = row[0] + cards = row[1].split("\n") + if len(cards) != 3: + continue + for card in cards: + cur.execute("insert into card values (?,?,?);", (n, player, clean_card(card))) + for o, s in enumerate(row[2:-1]): + cur.execute("insert into match values (?, ?, ?, ?, ?);", + (n, group, player,players[o],s)); + +def save_round(n, fileid): + rreader = file_to_csv_reader(fileid, False) + cur = con.cursor() + while 1: # wonky + try: + save_group(cur, rreader, n) + except StopIteration: + break + cur.execute("insert into round values (?, ?);", (n, fileid)); + con.commit() + +# cache +def fileids_in_db(): + return con.cursor().execute("select fileid from round").fetchall() + +def main(): + cache = fileids_in_db() + for n, file in enumerate(get_round_fileids()): + n = n +len(cache) + if file not in cache: + print(f"analyzing round {n+1}") + save_round(n+1, file) + +if __name__ == "__main__": + for table in schema: + con.execute(table) + main() + diff --git a/requirements.txt b/requirements.txt @@ -0,0 +1,3 @@ +requests +titlecase +openpyxl