2024-03-27 01:24:24 +00:00
|
|
|
#!/usr/bin/env python3
|
2024-03-27 18:25:41 +00:00
|
|
|
"""
|
|
|
|
Scrape guilds with selenium
|
|
|
|
You probably shouldn't use this, but if you really must:
|
|
|
|
* Set the CHROME environment variable to the path to your chrome or chromium
|
|
|
|
* You probably want to connect to an existing chrome instance so you can log in first,
|
|
|
|
run `chromium --user-data-dir=$HOME/.cache/chromium-emoji-script --remote-debugging-port=9222`
|
|
|
|
to start chromium then `env CHROME=$(command -v chromium) ./scrape_guilds.py 127.0.0.1:99222`
|
|
|
|
* It just gives you json, you have to dump later
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
import base64
|
|
|
|
import json
|
2024-03-27 01:24:24 +00:00
|
|
|
import os.path
|
|
|
|
import os
|
2024-03-27 18:25:41 +00:00
|
|
|
import sys
|
|
|
|
import zlib
|
|
|
|
|
2024-03-27 01:24:24 +00:00
|
|
|
from selenium import webdriver
|
2024-03-27 18:25:41 +00:00
|
|
|
import trio
|
|
|
|
|
|
|
|
|
|
|
|
def setup_driver():
|
|
|
|
options = webdriver.ChromeOptions()
|
|
|
|
if location := os.getenv("CHROME"):
|
|
|
|
options.binary_location = location
|
|
|
|
options.add_argument("--start-maximized")
|
|
|
|
options.add_argument(
|
|
|
|
"--user-data-dir=" + os.path.expanduser("~/.cache/chromium-emoji-script")
|
|
|
|
)
|
|
|
|
if len(sys.argv) > 1:
|
|
|
|
options.add_experimental_option("debuggerAddress", sys.argv[1])
|
|
|
|
|
|
|
|
# options.add_experimental_option("detach", True)
|
|
|
|
return webdriver.Chrome(options=options)
|
|
|
|
|
|
|
|
|
|
|
|
async def handle_events(listener, out_file):
|
|
|
|
valid_id = None
|
|
|
|
decompress = None
|
|
|
|
|
|
|
|
async for event in listener:
|
|
|
|
typ = event.__class__.__name__
|
|
|
|
if typ == "WebSocketCreated":
|
|
|
|
if event.url.startswith("wss://gateway.discord.gg/"):
|
|
|
|
valid_id = event.request_id
|
|
|
|
decompress = zlib.decompressobj()
|
|
|
|
elif typ == "WebSocketFrameReceived":
|
|
|
|
if event.request_id != valid_id:
|
|
|
|
continue
|
|
|
|
message = json.loads(
|
|
|
|
decompress.decompress(base64.b64decode(event.response.payload_data))
|
|
|
|
)
|
|
|
|
message_type = message.get("t")
|
|
|
|
await out_file.write(json.dumps(message) + "\n")
|
|
|
|
# The data we actually want, might as well flush
|
|
|
|
if message_type in ["READY", "GUILD_CREATE"]:
|
|
|
|
await out_file.flush()
|
|
|
|
|
|
|
|
print("Got message of type", message_type)
|
|
|
|
|
|
|
|
|
|
|
|
async def main():
|
|
|
|
out_path = await trio.Path.cwd() / "out" / "discord" / "events.json"
|
|
|
|
await out_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
out_file = await out_path.open("a")
|
|
|
|
|
|
|
|
driver = setup_driver()
|
|
|
|
async with driver.bidi_connection() as conn:
|
|
|
|
devtools, session = conn.devtools, conn.session
|
|
|
|
|
|
|
|
await session.execute(devtools.network.enable())
|
|
|
|
listener = session.listen(
|
|
|
|
devtools.network.WebSocketCreated,
|
|
|
|
devtools.network.WebSocketFrameReceived,
|
|
|
|
buffer_size=1024,
|
|
|
|
)
|
2024-03-27 01:24:24 +00:00
|
|
|
|
2024-03-27 18:25:41 +00:00
|
|
|
await handle_events(listener, out_file)
|
2024-03-27 01:24:24 +00:00
|
|
|
|
|
|
|
|
2024-03-27 18:25:41 +00:00
|
|
|
if __name__ == "__main__":
|
|
|
|
trio.run(main)
|