#!/usr/bin/env python3 """ Scrape guilds with selenium You probably shouldn't use this, but if you really must: * Set the CHROME environment variable to the path to your chrome or chromium * It just gives you json, you have to dump later """ import base64 import json import os import os.path import pathlib import sys import zlib from playwright.async_api import async_playwright import asyncio def websocket_handler(out_file): async def inner(ws): print(f"Got websocket at `{ws.url}`") decompress = zlib.decompressobj() async def handle_message(msg): message = json.loads(decompress.decompress(msg)) message_type = message.get("t") out_file.write(json.dumps(message) + "\n") # The data we actually want, might as well flush if message_type in ["READY", "GUILD_CREATE"]: out_file.flush() print("Got message of type", message_type) ws.on("framereceived", handle_message) return inner async def main(): out_path = pathlib.Path.cwd() / "out" / "discord" / "events.json" out_path.parent.mkdir(parents=True, exist_ok=True) out_file = out_path.open("a") async with async_playwright() as p: browser = await p.chromium.launch_persistent_context( user_data_dir=os.path.expanduser("~/.cache/chromium-emoji-script"), executable_path=os.getenv("CHROME"), headless=False, ) page = await browser.new_page() page.on("websocket", websocket_handler(out_file)) await page.goto("https://discord.com/app") await asyncio.Future() if __name__ == "__main__": asyncio.run(main())