getdata.py (10977B)
1 # set GOOG_KEY env variable to a valid api key 2 # all rounds available at https://sites.google.com/view/3cb-metashape/pairings-results/past-results?authuser=0 3 import requests, os, re, io, sqlite3, titlecase, string, openpyxl, json 4 from datetime import datetime, timedelta 5 from Levenshtein import distance 6 from bs4 import BeautifulSoup 7 8 con = sqlite3.connect("3cb.db") 9 10 def getall(query): 11 return [a[0] for a in con.cursor().execute(query).fetchall()] 12 13 allcards=getall("select name from mtg") 14 15 def run_rounds(): 16 file_ids_in_db = getall("select fileid from round") 17 for n, file in enumerate(get_round_fileids()): 18 if file in file_ids_in_db: 19 continue 20 if file != "1LGHvTrQz2zhBjz1PhlW61ZYmwRWJyWkEHj-png7ZKdw": # misc file 21 print(f"analyzing round {n+1}...") 22 save_round(n+1, file) 23 24 # scrape site to list rounds 25 def get_round_fileids(): 26 allrounds = requests.get("https://sites.google.com/view/3cb-metashape/pairings-results/past-results") 27 sheetre = re.compile("spreadsheets/d/(.*?)/") 28 curr_round = requests.get("https://sites.google.com/view/3cb-metashape/pairings-results") 29 ids = sheetre.findall(allrounds.text + curr_round.text) 30 return ids 31 32 def file_to_wb(fileid): 33 params = {"mimeType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"} 34 headers = {"x-goog-api-key": os.environ.get("GOOG_KEY")} 35 out = [] 36 res = requests.get(f"https://www.googleapis.com/drive/v3/files/{fileid}/export", 37 params=params, 38 headers=headers) 39 if res.status_code != 200: 40 print(res.text) 41 os.exit(1) 42 return openpyxl.load_workbook(io.BytesIO(res.content), data_only=True) 43 44 def get_file_created_date(fileid): 45 headers = {"x-goog-api-key": os.environ.get("GOOG_KEY")} 46 params = {"fields": "createdTime"} 47 res = requests.get(f"https://www.googleapis.com/drive/v3/files/{fileid}", 48 params=params, 49 headers=headers) 50 if res.status_code != 200: 51 print(res.text) 52 return None 53 data = res.json() 54 if "createdTime" in data: 55 # Parse ISO format and return YYYY-MM-DD 56 created_time = datetime.fromisoformat(data["createdTime"].replace('Z', '+00:00')) 57 return created_time.strftime('%Y-%m-%d') 58 return None 59 60 def update_bans(): 61 banlist_page = requests.get("https://sites.google.com/view/4cb-metashape/dynamic-banlist") 62 soup = BeautifulSoup(banlist_page.text, 'html.parser') 63 64 # Find all list items containing banned cards 65 cards = [] 66 for li in soup.find_all('li'): 67 card_name = li.get_text(strip=True) 68 if card_name and card_name not in ["", " "]: 69 cards.append(card_name) 70 71 cur = con.cursor() 72 cur.execute("drop table if exists ban") 73 cur.execute("create table ban (name text)") 74 for card in cards: 75 if card: 76 cur.execute("insert into ban values (?)", (clean_card(card),)) 77 con.commit() 78 79 def load_schema(): 80 with open('schema.sql', 'r') as f: 81 schema_sql = f.read() 82 con.executescript(schema_sql) 83 84 def save_sheet(cur, sheet, n, final): 85 sheetrows = list(sheet.rows) 86 rows = [[cell.value for cell in row if cell.value != None] for row in sheetrows] 87 group = "" 88 i = 0 89 while i < len(rows): 90 row = rows[i] 91 i += 1 92 if len(row) < 3: 93 continue 94 group = row[0].partition(" ")[2] 95 if final: 96 group = "final" 97 try: 98 start = row.index("VS") + 1 99 end = row.index("Score") 100 if sheetrows[0][0].value == None: # missing text in upper left cell 101 start += 1 102 end += 1 103 except ValueError: 104 start = 2 105 end = len(row) 106 players = row[start:end] 107 for _ in range(len(players)): 108 row = rows[i] 109 i += 1 110 player = row[0] 111 if len(player) > 50 and player[0] == "B": 112 player = "Beefman" # messy unicode 113 if player == "IGNORE WAS MISTAKE": 114 continue 115 cards = row[1].split("\n") 116 if not final: #already done 117 cur.execute("insert into deck values (?,?,?,?,?);", (n, player, clean_card(cards[0]), clean_card(cards[1]), clean_card(cards[2]))) 118 for o, s in enumerate(row[start:end]): 119 if s == "?" or s == "": 120 s = 0 121 cur.execute("insert into match values (?, ?, ?, ?, ?);", 122 (n, group, player,players[o],int(s))); 123 124 def save_round(n, fileid): 125 wb = file_to_wb(fileid) 126 try: 127 groups = wb["Groups"] 128 except KeyError: 129 groups = wb["Group"] 130 131 # Get creation date from Google Drive API 132 date_str = get_file_created_date(fileid) 133 134 cur = con.cursor() 135 save_sheet(cur, groups, n, False) 136 if len(wb._sheets) > 1: 137 final = wb["Final Group"] 138 save_sheet(cur, final, n, True) 139 cur.execute("insert into round values (?, ?, ?);", (n, fileid, date_str)); 140 con.commit() 141 142 # fixing some data issues 143 def best_guess(card): 144 m = 6 145 outcard = "" 146 for c in allcards: 147 if c.startswith(card): 148 return c 149 d = distance(card, c, score_cutoff=5) 150 if d < m: 151 m = d 152 outcard = c 153 if outcard: 154 print(f"replacing {card} to {outcard}") 155 return outcard 156 return card 157 158 159 replacement = { 160 "You Guessed It, Magus of the Moooon": "Magus of the Moon", 161 "Yera and Oski, Weaver and Guide": "Yera and Oski, Weaver and Guide", 162 "Yera and Oski, Weaver and Guide": "Arachne, Psionic Weaver", 163 "Witch-Blessed Meadow": "Witch's Cottage", # Wrong 164 "Urborg Tomb Yawgy": "Urborg, Tomb of Yawgmoth", 165 "Urborg (The One That Makes All Lands Swamps)": "Urborg, Tomb of Yawgmoth", 166 "Ulamog the Infinite Gyre (Borderless) (Foil)": "Ulamog, the Infinite Gyre", 167 "That One Wurm That Makes the Three 5/5s When It Dies I Have Done Too Many Scryfall Searches Today Sorry": "TBD", 168 "Thallid Oh Yeah": "Thallid", 169 "Tabernacle at Penrall Vale": "The Tabernacle at Pendrell Value", 170 "Swamp (XLN 270)": "Swamp", 171 "Swamp (DOM #258)": "Swamp", 172 "Swamp (BRB 18)": "Swamp", 173 "Swamp (8ED #339)": "Swamp", 174 "Specter's Shriek (Plz Let It Get Banned This Time)": "Specter's Shriek", 175 "Speaker of the Heavens!?!?!?!": "Speaker of the Heavens", 176 "Simispiriguide": "Simian Spirit Guide", 177 "Signaling Roar": "Riling Dawnbreaker // Signaling Roar", 178 "Senu, the Keen-Eyed": "Senu, Keen-Eyed Protector", 179 "Restore Balanse (I Have to Misspell This Cuz the Regex Is Buggy Lmao)": "Restore Balance", 180 "Red Sun's Zenith (Again)": "Red Sun's Zenith", 181 "Plains (ONS 333)": "Plains", 182 "Phelps, Exuberant Swimmer (Phelia)": "Phelia, Exuberant Shepherd", 183 "Phelia, Tail-Wagging Shepherd": "Phelia, Exuberant Shepherd", 184 "Nesumi Shortfang": "Nezumi Shortfang // Stabwhisker the Odious", 185 "Mox Pearl (The One Without Metalcraft)": "Mox Pearl", 186 "Monty Python and the Holy Grail Black Knight (Oathsworn Knight)": "Oathsworn Knight", 187 "Miku, Divine Diva": "Elspeth Tirel", 188 "Mayor of Avarbruck": "Mayor of Avabruck // Howlpack Alpha", 189 "Magus of the Mooooooooon": "Magus of the Moon", 190 "Lion's Eye Diamond Cheatyface": "Lion's Eye Diamond", 191 "Kytheon Hero of Akros": "Kytheon, Hero of Akros // Gideon, Battle-Forged", 192 "Karaka (Its Listed as Unbanned but the Form Wont Let Me Submit It Idk Its My First Time Lol)": "Karakas", 193 "Island [KLD #395]": "Island", 194 "Island (ONE #273)": "Island", 195 "Inky Mothy Nexy": "Inkmoth Nexus", 196 "Ink Mothy Nexy": "Inkmoth Nexus", 197 "Gavel of the Righteous, for I Am Stubborn and Naively Hopeful": "Gavel of the Righteous", 198 "Gargadon (Neither Greater Nor Lesser)": "Gargadon", 199 "Forge-Chan": "Chancellor of the Forge", 200 "Forest!!!!!!": "Forest", 201 "Forest 🌲": "Forest", 202 "Forest [LCI #402]": "Forest", 203 "Forest (Tempest #348)": "Forest", 204 "Fizik, Etherium Mechanic": "Iron Spider, Stark Upgrade", 205 "Fizik, Etherium Mechanic": "Fizik, Etherium Mechanic", 206 "Filigree Sylex": "The Filigree Sylex", 207 "Elspeth Suns Champion Wooooooo": "Elspeth, Sun's Champion", 208 "Dwarven Hold (Again)": "Dwarven Hold", 209 "Dreams of Oil and Steel": "Dreams of Steel and Oil", 210 "Dark Ritual (STA #89)": "Dark Ritual", 211 "Dark Ritual (BTD 21)": "Dark Ritual", 212 "Chronomatonton (The 1 Cost 1/1 That Taps to Get Bigger)": "Chronomaton", 213 "Chaplain of Arms": "Chaplain of Alms // Chapel Shieldgeist", 214 "Chancelor of the Tangle (Sic)": "Chancellor of the Tangle", 215 "Bottomless Depths": "Bottomless Vault", # wrong? 216 "Boseiju, Who Destroys Target Artifact, Enchantment, or Nonbasic Land (Who Endures)": "Boseiju, Who Endures", 217 "Bayou - Not Legal": "Bayou", 218 "Basic Plains": "Plains", 219 "Azorius Guildgate Anniversary": "Azorius Guildgate", 220 "Azorius Chancery First": "Azorius Chancery", 221 "Annex-Chan": "Chancellor of the Annex", 222 "Annex Chan": "Chancellor of the Annex", 223 } 224 225 def clean_card(card_name): 226 clean = titlecase.titlecase(card_name.strip()) 227 clean = ''.join(filter(lambda x: x in string.printable, clean)) 228 clean = clean.replace("’", "'") 229 clean = replacement.get(clean) or clean 230 if clean not in allcards: 231 return best_guess(clean) 232 return clean 233 234 cache_file = "AtomicCards.json" 235 # update the db with all valid mtg cards 236 def update_mtg_db(): 237 updated = None 238 if os.path.exists(cache_file): 239 updated = datetime.now() - datetime.fromtimestamp(os.path.getmtime(cache_file)) 240 if (not updated) or updated > timedelta(days=30): 241 print("Updating AtomicCards (~100MB)") 242 cards = requests.get("https://mtgjson.com/api/v5/AtomicCards.json") 243 cards_data = cards.json() 244 with open(cache_file, 'w') as f: 245 json.dump(cards_data, f) 246 return 247 248 def populate_mtg_table(): 249 with open(cache_file, 'r') as f: 250 cards_data = json.load(f) 251 cur = con.cursor() 252 cur.execute("delete from mtg") 253 print("Populating mtg table...") 254 count = 0 255 for card_name, card_variants in cards_data.get('data', {}).items(): 256 if card_variants: 257 card = card_variants[0] 258 name = card.get('name', card_name) 259 cost = card.get('manaCost', '') 260 card_type = " ".join(card.get('types', '')) 261 cur.execute("insert into mtg values (?, ?, ?)", (name, cost, card_type)) 262 con.commit() 263 return 264 265 if __name__ == "__main__": 266 load_schema() 267 update_mtg_db() 268 populate_mtg_table() 269 update_bans() 270 run_rounds() 271