Update ban list to scrape from dynamic banlist page - 3cb-data - Unnamed repository; edit this file 'description' to name the repository.

commit 445b263205d56388d7eede3de9fc60239f04acb0
parent aa1bd88895719d5e06d0462c6185bc75ec34fa0f
Author: alex wennerberg <alex@alexwennerberg.com>
Date:   Sun, 17 Aug 2025 16:23:27 -0400

Update ban list to scrape from dynamic banlist page

Replace Google Sheet parsing with HTML scraping of the dynamic banlist
page at sites.google.com. Now reads banned cards from bulleted list
instead of embedded spreadsheet for better maintainability.

Diffstat:
M getdata.py  | 14 +++++++++++---

1 file changed, 11 insertions(+), 3 deletions(-)
diff --git a/getdata.py b/getdata.py
@@ -5,6 +5,7 @@
 
 import requests, os, csv, re, io, sqlite3, titlecase, string, openpyxl, urllib
 from Levenshtein import distance
+from bs4 import BeautifulSoup
 
 con = sqlite3.connect("3cb.db")
 allcards = set(open('allcards.txt','r').read().splitlines())
@@ -63,9 +64,16 @@ def get_file_created_date(fileid):
     return None
 
 def update_bans():
-    curr_round = requests.get("https://sites.google.com/view/3cb-metashape/pairings-results")
-    sheet = file_to_wb("1NZuROOCctbq4p4-CAHE-jOC0675QQjuqVnUqdXFCVD8")["banlist"]
-    cards = [row[1].value for row in [*sheet.rows][3:108]]
+    banlist_page = requests.get("https://sites.google.com/view/3cb-metashape/dynamic-banlist")
+    soup = BeautifulSoup(banlist_page.text, 'html.parser')
+    
+    # Find all list items containing banned cards
+    cards = []
+    for li in soup.find_all('li'):
+        card_name = li.get_text(strip=True)
+        if card_name and card_name not in ["", " "]:
+            cards.append(card_name)
+    
     cur = con.cursor()
     cur.execute("drop table if exists ban")
     cur.execute("create table ban (name text)")

	3cb-data Unnamed repository; edit this file 'description' to name the repository.
	git clone git://git.alexwennerberg.com/3cb-data.git
	Log \| Files \| Refs \| README \| LICENSE