3cb-data

Unnamed repository; edit this file 'description' to name the repository.
git clone git://git.alexwennerberg.com/3cb-data.git
Log | Files | Refs | README | LICENSE

commit 445b263205d56388d7eede3de9fc60239f04acb0
parent aa1bd88895719d5e06d0462c6185bc75ec34fa0f
Author: alex wennerberg <alex@alexwennerberg.com>
Date:   Sun, 17 Aug 2025 16:23:27 -0400

Update ban list to scrape from dynamic banlist page

Replace Google Sheet parsing with HTML scraping of the dynamic banlist
page at sites.google.com. Now reads banned cards from bulleted list
instead of embedded spreadsheet for better maintainability.

Diffstat:
Mgetdata.py | 14+++++++++++---
1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/getdata.py b/getdata.py @@ -5,6 +5,7 @@ import requests, os, csv, re, io, sqlite3, titlecase, string, openpyxl, urllib from Levenshtein import distance +from bs4 import BeautifulSoup con = sqlite3.connect("3cb.db") allcards = set(open('allcards.txt','r').read().splitlines()) @@ -63,9 +64,16 @@ def get_file_created_date(fileid): return None def update_bans(): - curr_round = requests.get("https://sites.google.com/view/3cb-metashape/pairings-results") - sheet = file_to_wb("1NZuROOCctbq4p4-CAHE-jOC0675QQjuqVnUqdXFCVD8")["banlist"] - cards = [row[1].value for row in [*sheet.rows][3:108]] + banlist_page = requests.get("https://sites.google.com/view/3cb-metashape/dynamic-banlist") + soup = BeautifulSoup(banlist_page.text, 'html.parser') + + # Find all list items containing banned cards + cards = [] + for li in soup.find_all('li'): + card_name = li.get_text(strip=True) + if card_name and card_name not in ["", " "]: + cards.append(card_name) + cur = con.cursor() cur.execute("drop table if exists ban") cur.execute("create table ban (name text)")