3cb-data

Unnamed repository; edit this file 'description' to name the repository.
git clone git://git.alexwennerberg.com/3cb-data.git
Log | Files | Refs | README | LICENSE

getdata.py (10977B)


      1 # set GOOG_KEY env variable to a valid api key
      2 # all rounds available at https://sites.google.com/view/3cb-metashape/pairings-results/past-results?authuser=0
      3 import requests, os, re, io, sqlite3, titlecase, string, openpyxl, json
      4 from datetime import datetime, timedelta
      5 from Levenshtein import distance
      6 from bs4 import BeautifulSoup
      7 
      8 con = sqlite3.connect("3cb.db")
      9 
     10 def getall(query): 
     11     return [a[0] for a in con.cursor().execute(query).fetchall()]
     12 
     13 allcards=getall("select name from mtg")
     14 
     15 def run_rounds():
     16     file_ids_in_db = getall("select fileid from round")
     17     for n, file in enumerate(get_round_fileids()):
     18         if file in file_ids_in_db:
     19             continue
     20         if file != "1LGHvTrQz2zhBjz1PhlW61ZYmwRWJyWkEHj-png7ZKdw": # misc file
     21             print(f"analyzing round {n+1}...")
     22             save_round(n+1, file)
     23 
     24 # scrape site to list rounds
     25 def get_round_fileids():
     26     allrounds = requests.get("https://sites.google.com/view/3cb-metashape/pairings-results/past-results")
     27     sheetre = re.compile("spreadsheets/d/(.*?)/")
     28     curr_round = requests.get("https://sites.google.com/view/3cb-metashape/pairings-results")
     29     ids = sheetre.findall(allrounds.text + curr_round.text)
     30     return ids
     31 
     32 def file_to_wb(fileid):
     33     params = {"mimeType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"}
     34     headers = {"x-goog-api-key": os.environ.get("GOOG_KEY")}
     35     out = []
     36     res = requests.get(f"https://www.googleapis.com/drive/v3/files/{fileid}/export", 
     37                    params=params,
     38                    headers=headers)
     39     if res.status_code != 200:
     40         print(res.text)
     41         os.exit(1)
     42     return openpyxl.load_workbook(io.BytesIO(res.content), data_only=True)
     43 
     44 def get_file_created_date(fileid):
     45     headers = {"x-goog-api-key": os.environ.get("GOOG_KEY")}
     46     params = {"fields": "createdTime"}
     47     res = requests.get(f"https://www.googleapis.com/drive/v3/files/{fileid}", 
     48                        params=params,
     49                        headers=headers)
     50     if res.status_code != 200:
     51         print(res.text)
     52         return None
     53     data = res.json()
     54     if "createdTime" in data:
     55         # Parse ISO format and return YYYY-MM-DD
     56         created_time = datetime.fromisoformat(data["createdTime"].replace('Z', '+00:00'))
     57         return created_time.strftime('%Y-%m-%d')
     58     return None
     59 
     60 def update_bans():
     61     banlist_page = requests.get("https://sites.google.com/view/4cb-metashape/dynamic-banlist")
     62     soup = BeautifulSoup(banlist_page.text, 'html.parser')
     63     
     64     # Find all list items containing banned cards
     65     cards = []
     66     for li in soup.find_all('li'):
     67         card_name = li.get_text(strip=True)
     68         if card_name and card_name not in ["", " "]:
     69             cards.append(card_name)
     70     
     71     cur = con.cursor()
     72     cur.execute("drop table if exists ban")
     73     cur.execute("create table ban (name text)")
     74     for card in cards:
     75         if card:
     76             cur.execute("insert into ban values (?)", (clean_card(card),))
     77     con.commit()
     78 
     79 def load_schema():
     80     with open('schema.sql', 'r') as f:
     81         schema_sql = f.read()
     82     con.executescript(schema_sql)
     83 
     84 def save_sheet(cur, sheet, n, final):
     85     sheetrows = list(sheet.rows)
     86     rows = [[cell.value for cell in row if cell.value != None] for row in sheetrows]
     87     group = ""
     88     i = 0
     89     while i < len(rows):
     90         row = rows[i]
     91         i += 1
     92         if len(row) < 3:
     93             continue
     94         group = row[0].partition(" ")[2]
     95         if final:
     96             group = "final"
     97         try:
     98             start = row.index("VS") + 1
     99             end = row.index("Score")
    100             if sheetrows[0][0].value == None: # missing text in upper left cell
    101                 start += 1
    102                 end += 1
    103         except ValueError:
    104             start = 2
    105             end = len(row)
    106         players = row[start:end]
    107         for _ in range(len(players)):
    108             row = rows[i]
    109             i += 1
    110             player = row[0]
    111             if len(player) > 50 and player[0] == "B":
    112                 player = "Beefman" # messy unicode
    113             if player == "IGNORE WAS MISTAKE":
    114                 continue
    115             cards = row[1].split("\n")
    116             if not final: #already done
    117                 cur.execute("insert into deck values (?,?,?,?,?);", (n, player, clean_card(cards[0]), clean_card(cards[1]), clean_card(cards[2])))
    118             for o, s in enumerate(row[start:end]):
    119                 if s == "?" or s == "":
    120                     s = 0
    121                 cur.execute("insert into match values (?, ?, ?, ?, ?);", 
    122                             (n, group, player,players[o],int(s)));
    123 
    124 def save_round(n, fileid):
    125     wb = file_to_wb(fileid)
    126     try:
    127         groups = wb["Groups"]
    128     except KeyError:
    129         groups = wb["Group"]
    130     
    131     # Get creation date from Google Drive API
    132     date_str = get_file_created_date(fileid)
    133     
    134     cur = con.cursor()
    135     save_sheet(cur, groups, n, False)
    136     if len(wb._sheets) > 1:
    137         final = wb["Final Group"]
    138         save_sheet(cur, final, n, True)
    139     cur.execute("insert into round values (?, ?, ?);", (n, fileid, date_str));
    140     con.commit()
    141     
    142 # fixing some data issues
    143 def best_guess(card):
    144     m = 6
    145     outcard = ""
    146     for c in allcards:
    147         if c.startswith(card):
    148             return c
    149         d = distance(card, c, score_cutoff=5)
    150         if d < m:
    151             m = d
    152             outcard = c
    153     if outcard:
    154         print(f"replacing {card} to {outcard}")
    155         return outcard
    156     return card
    157 
    158 
    159 replacement = {
    160         "You Guessed It, Magus of the Moooon": "Magus of the Moon",
    161         "Yera and Oski, Weaver and Guide": "Yera and Oski, Weaver and Guide",
    162         "Yera and Oski, Weaver and Guide": "Arachne, Psionic Weaver",
    163         "Witch-Blessed Meadow": "Witch's Cottage", # Wrong
    164         "Urborg Tomb Yawgy": "Urborg, Tomb of Yawgmoth",
    165         "Urborg (The One That Makes All Lands Swamps)": "Urborg, Tomb of Yawgmoth",
    166         "Ulamog the Infinite Gyre (Borderless) (Foil)": "Ulamog, the Infinite Gyre",
    167         "That One Wurm That Makes the Three 5/5s When It Dies I Have Done Too Many Scryfall Searches Today Sorry": "TBD",
    168         "Thallid Oh Yeah": "Thallid",
    169         "Tabernacle at Penrall Vale": "The Tabernacle at Pendrell Value",
    170         "Swamp (XLN 270)": "Swamp",
    171         "Swamp (DOM #258)": "Swamp",
    172         "Swamp (BRB 18)": "Swamp",
    173         "Swamp (8ED #339)": "Swamp",
    174         "Specter's Shriek (Plz Let It Get Banned This Time)": "Specter's Shriek",
    175         "Speaker of the Heavens!?!?!?!": "Speaker of the Heavens",
    176         "Simispiriguide": "Simian Spirit Guide",
    177         "Signaling Roar": "Riling Dawnbreaker // Signaling Roar",
    178         "Senu, the Keen-Eyed": "Senu, Keen-Eyed Protector",
    179         "Restore Balanse (I Have to Misspell This Cuz the Regex Is Buggy Lmao)": "Restore Balance",
    180         "Red Sun's Zenith (Again)": "Red Sun's Zenith",
    181         "Plains (ONS 333)": "Plains",
    182         "Phelps, Exuberant Swimmer (Phelia)": "Phelia, Exuberant Shepherd",
    183         "Phelia, Tail-Wagging Shepherd": "Phelia, Exuberant Shepherd",
    184         "Nesumi Shortfang": "Nezumi Shortfang // Stabwhisker the Odious",
    185         "Mox Pearl (The One Without Metalcraft)": "Mox Pearl",
    186         "Monty Python and the Holy Grail Black Knight (Oathsworn Knight)": "Oathsworn Knight",
    187         "Miku, Divine Diva": "Elspeth Tirel",
    188         "Mayor of Avarbruck": "Mayor of Avabruck // Howlpack Alpha",
    189         "Magus of the Mooooooooon": "Magus of the Moon",
    190         "Lion's Eye Diamond                                Cheatyface": "Lion's Eye Diamond",
    191         "Kytheon Hero of Akros": "Kytheon, Hero of Akros // Gideon, Battle-Forged",
    192         "Karaka (Its Listed as Unbanned but the Form Wont Let Me Submit It Idk Its My First Time Lol)": "Karakas",
    193         "Island [KLD #395]": "Island",
    194         "Island (ONE #273)": "Island",
    195         "Inky Mothy Nexy": "Inkmoth Nexus",
    196         "Ink Mothy Nexy": "Inkmoth Nexus",
    197         "Gavel of the Righteous, for I Am Stubborn and Naively Hopeful": "Gavel of the Righteous",
    198         "Gargadon (Neither Greater Nor Lesser)": "Gargadon",
    199         "Forge-Chan": "Chancellor of the Forge",
    200         "Forest!!!!!!": "Forest",
    201         "Forest 🌲": "Forest",
    202         "Forest [LCI #402]": "Forest",
    203         "Forest (Tempest #348)": "Forest",
    204         "Fizik, Etherium Mechanic": "Iron Spider, Stark Upgrade",
    205         "Fizik, Etherium Mechanic": "Fizik, Etherium Mechanic",
    206         "Filigree Sylex": "The Filigree Sylex",
    207         "Elspeth Suns Champion Wooooooo": "Elspeth, Sun's Champion",
    208         "Dwarven Hold (Again)": "Dwarven Hold",
    209         "Dreams of Oil and Steel": "Dreams of Steel and Oil",
    210         "Dark Ritual (STA #89)": "Dark Ritual",
    211         "Dark Ritual (BTD 21)": "Dark Ritual",
    212         "Chronomatonton (The 1 Cost 1/1 That Taps to Get Bigger)": "Chronomaton",
    213         "Chaplain of Arms": "Chaplain of Alms // Chapel Shieldgeist",
    214         "Chancelor of the Tangle (Sic)": "Chancellor of the Tangle",
    215         "Bottomless Depths": "Bottomless Vault", # wrong?
    216         "Boseiju, Who Destroys Target Artifact, Enchantment, or Nonbasic Land (Who Endures)": "Boseiju, Who Endures",
    217         "Bayou - Not Legal": "Bayou",
    218         "Basic Plains": "Plains",
    219         "Azorius Guildgate  Anniversary": "Azorius Guildgate",
    220         "Azorius Chancery  First": "Azorius Chancery",
    221         "Annex-Chan": "Chancellor of the Annex",
    222         "Annex Chan": "Chancellor of the Annex",
    223     }
    224 
    225 def clean_card(card_name):
    226     clean = titlecase.titlecase(card_name.strip())
    227     clean = ''.join(filter(lambda x: x in string.printable, clean))
    228     clean = clean.replace("’", "'")
    229     clean = replacement.get(clean) or clean 
    230     if clean not in allcards:
    231         return best_guess(clean)
    232     return clean
    233 
    234 cache_file = "AtomicCards.json"
    235 # update the db with all valid mtg cards
    236 def update_mtg_db():
    237     updated = None
    238     if os.path.exists(cache_file):
    239         updated = datetime.now() - datetime.fromtimestamp(os.path.getmtime(cache_file))
    240     if (not updated) or updated > timedelta(days=30):
    241         print("Updating AtomicCards (~100MB)")
    242         cards = requests.get("https://mtgjson.com/api/v5/AtomicCards.json")
    243         cards_data = cards.json()
    244         with open(cache_file, 'w') as f:
    245             json.dump(cards_data, f)
    246     return
    247 
    248 def populate_mtg_table():
    249     with open(cache_file, 'r') as f:
    250         cards_data = json.load(f)
    251     cur = con.cursor()
    252     cur.execute("delete from mtg")
    253     print("Populating mtg table...")
    254     count = 0
    255     for card_name, card_variants in cards_data.get('data', {}).items():
    256         if card_variants:
    257             card = card_variants[0]
    258             name = card.get('name', card_name)
    259             cost = card.get('manaCost', '')
    260             card_type = " ".join(card.get('types', ''))
    261             cur.execute("insert into mtg values (?, ?, ?)", (name, cost, card_type))
    262     con.commit()
    263     return
    264 
    265 if __name__ == "__main__":
    266     load_schema()
    267     update_mtg_db()
    268     populate_mtg_table()
    269     update_bans()
    270     run_rounds()
    271