play/scripts/emoji/scrape_guilds.py

62 lines
1.7 KiB
Python
Executable file

#!/usr/bin/env python3
"""
Scrape guilds with selenium
You probably shouldn't use this, but if you really must:
* Set the CHROME environment variable to the path to your chrome or chromium
* It just gives you json, you have to dump later
"""
import base64
import json
import os
import os.path
import pathlib
import sys
import zlib
from playwright.async_api import async_playwright
import asyncio
def websocket_handler(out_file):
async def inner(ws):
print(f"Got websocket at `{ws.url}`")
decompress = zlib.decompressobj()
async def handle_message(msg):
message = json.loads(decompress.decompress(msg))
message_type = message.get("t")
out_file.write(json.dumps(message) + "\n")
# The data we actually want, might as well flush
if message_type in ["READY", "GUILD_CREATE"]:
out_file.flush()
print("Got message of type", message_type)
ws.on("framereceived", handle_message)
return inner
async def main():
out_path = pathlib.Path.cwd() / "out" / "discord" / "events.json"
out_path.parent.mkdir(parents=True, exist_ok=True)
out_file = out_path.open("a")
async with async_playwright() as p:
browser = await p.chromium.launch_persistent_context(
user_data_dir=os.path.expanduser("~/.cache/chromium-emoji-script"),
executable_path=os.getenv("CHROME"),
headless=False,
)
page = await browser.new_page()
page.on("websocket", websocket_handler(out_file))
await page.goto("https://discord.com/app")
await asyncio.Future()
if __name__ == "__main__":
asyncio.run(main())