diff --git a/flake.nix b/flake.nix index a547405..9d5633a 100644 --- a/flake.nix +++ b/flake.nix @@ -125,30 +125,8 @@ python3Packages.aiohttp python3Packages.black python3Packages.ipython - python3Packages.trio python3Packages.gql - (python3Packages.selenium.overrideAttrs (old: { - postInstall = - old.postInstall - + '' - for ver in v85 v126 v127 v128; do - DEVTOOLS=../common/devtools/chromium/$ver - for proto in js browser; do - python3 ../common/devtools/convert_protocol_to_json.py \ - $DEVTOOLS/"$proto"_protocol.pdl \ - --map_binary_to_string=true \ - $DEVTOOLS/"$proto"_protocol.json - done - mkdir -p $DST_PREFIX/common/devtools/$ver - python3 generate.py \ - $DEVTOOLS/browser_protocol.json \ - $DEVTOOLS/js_protocol.json \ - $DST_PREFIX/common/devtools/$ver - done - ''; - - nativeBuildInputs = old.nativeBuildInputs ++ [ python3Packages.inflection ]; - })) + python3Packages.playwright ]; CHROME = "${chromium}/bin/chromium"; diff --git a/scripts/emoji/scrape_guilds.py b/scripts/emoji/scrape_guilds.py index 63218b6..8101a87 100755 --- a/scripts/emoji/scrape_guilds.py +++ b/scripts/emoji/scrape_guilds.py @@ -3,82 +3,59 @@ Scrape guilds with selenium You probably shouldn't use this, but if you really must: * Set the CHROME environment variable to the path to your chrome or chromium -* You probably want to connect to an existing chrome instance so you can log in first, - run `chromium --user-data-dir=$HOME/.cache/chromium-emoji-script --remote-debugging-port=9222` - to start chromium then `env CHROME=$(command -v chromium) ./scrape_guilds.py 127.0.0.1:99222` * It just gives you json, you have to dump later """ import base64 import json -import os.path import os +import os.path +import pathlib import sys import zlib -from selenium import webdriver -import trio +from playwright.async_api import async_playwright +import asyncio -def setup_driver(): - options = webdriver.ChromeOptions() - if location := os.getenv("CHROME"): - options.binary_location = location - options.add_argument("--start-maximized") - options.add_argument( - "--user-data-dir=" + os.path.expanduser("~/.cache/chromium-emoji-script") - ) - if len(sys.argv) > 1: - options.add_experimental_option("debuggerAddress", sys.argv[1]) +def websocket_handler(out_file): + async def inner(ws): + print(f"Got websocket at `{ws.url}`") - # options.add_experimental_option("detach", True) - return webdriver.Chrome(options=options) + decompress = zlib.decompressobj() - -async def handle_events(listener, out_file): - valid_id = None - decompress = None - - async for event in listener: - typ = event.__class__.__name__ - if typ == "WebSocketCreated": - if event.url.startswith("wss://gateway.discord.gg/"): - valid_id = event.request_id - decompress = zlib.decompressobj() - elif typ == "WebSocketFrameReceived": - if event.request_id != valid_id: - continue - message = json.loads( - decompress.decompress(base64.b64decode(event.response.payload_data)) - ) + async def handle_message(msg): + message = json.loads(decompress.decompress(msg)) message_type = message.get("t") - await out_file.write(json.dumps(message) + "\n") + out_file.write(json.dumps(message) + "\n") # The data we actually want, might as well flush if message_type in ["READY", "GUILD_CREATE"]: - await out_file.flush() - + out_file.flush() print("Got message of type", message_type) + ws.on("framereceived", handle_message) + + return inner + async def main(): - out_path = await trio.Path.cwd() / "out" / "discord" / "events.json" - await out_path.parent.mkdir(parents=True, exist_ok=True) - out_file = await out_path.open("a") + out_path = pathlib.Path.cwd() / "out" / "discord" / "events.json" + out_path.parent.mkdir(parents=True, exist_ok=True) + out_file = out_path.open("a") - driver = setup_driver() - async with driver.bidi_connection() as conn: - devtools, session = conn.devtools, conn.session - - await session.execute(devtools.network.enable()) - listener = session.listen( - devtools.network.WebSocketCreated, - devtools.network.WebSocketFrameReceived, - buffer_size=1024, + async with async_playwright() as p: + browser = await p.chromium.launch_persistent_context( + user_data_dir=os.path.expanduser("~/.cache/chromium-emoji-script"), + executable_path=os.getenv("CHROME"), + headless=False, ) + page = await browser.new_page() + page.on("websocket", websocket_handler(out_file)) + await page.goto("https://discord.com/app") - await handle_events(listener, out_file) + await asyncio.Future() if __name__ == "__main__": - trio.run(main) + asyncio.run(main())