#!/usr/bin/env python3 """ Scrape guilds with selenium You probably shouldn't use this, but if you really must: * Set the CHROME environment variable to the path to your chrome or chromium * You probably want to connect to an existing chrome instance so you can log in first, run `chromium --user-data-dir=$HOME/.cache/chromium-emoji-script --remote-debugging-port=9222` to start chromium then `env CHROME=$(command -v chromium) ./scrape_guilds.py 127.0.0.1:99222` * It just gives you json, you have to dump later """ import base64 import json import os.path import os import sys import zlib from selenium import webdriver import trio def setup_driver(): options = webdriver.ChromeOptions() if location := os.getenv("CHROME"): options.binary_location = location options.add_argument("--start-maximized") options.add_argument( "--user-data-dir=" + os.path.expanduser("~/.cache/chromium-emoji-script") ) if len(sys.argv) > 1: options.add_experimental_option("debuggerAddress", sys.argv[1]) # options.add_experimental_option("detach", True) return webdriver.Chrome(options=options) async def handle_events(listener, out_file): valid_id = None decompress = None async for event in listener: typ = event.__class__.__name__ if typ == "WebSocketCreated": if event.url.startswith("wss://gateway.discord.gg/"): valid_id = event.request_id decompress = zlib.decompressobj() elif typ == "WebSocketFrameReceived": if event.request_id != valid_id: continue message = json.loads( decompress.decompress(base64.b64decode(event.response.payload_data)) ) message_type = message.get("t") await out_file.write(json.dumps(message) + "\n") # The data we actually want, might as well flush if message_type in ["READY", "GUILD_CREATE"]: await out_file.flush() print("Got message of type", message_type) async def main(): out_path = await trio.Path.cwd() / "out" / "discord" / "events.json" await out_path.parent.mkdir(parents=True, exist_ok=True) out_file = await out_path.open("a") driver = setup_driver() async with driver.bidi_connection() as conn: devtools, session = conn.devtools, conn.session await session.execute(devtools.network.enable()) listener = session.listen( devtools.network.WebSocketCreated, devtools.network.WebSocketFrameReceived, buffer_size=1024, ) await handle_events(listener, out_file) if __name__ == "__main__": trio.run(main)