Skip to content

Commit 7018ff8

Browse files
authored
Implement Prometheus observability (#119)
1 parent d05c403 commit 7018ff8

File tree

9 files changed

+300
-3
lines changed

9 files changed

+300
-3
lines changed

bot/cogs/config.py

+2
Original file line numberDiff line numberDiff line change
@@ -623,6 +623,7 @@ async def blocklist_add(
623623
await tr.rollback()
624624
await ctx.send("Unable to block user")
625625
else:
626+
self.bot.metrics.features.blocked_users.inc()
626627
await tr.commit()
627628
self.bot.blocklist.replace(blocklist)
628629

@@ -680,6 +681,7 @@ async def blocklist_remove(self, ctx: GuildContext, entity: discord.Member) -> N
680681
await tr.rollback()
681682
await ctx.send("Unable to block user")
682683
else:
684+
self.bot.metrics.features.blocked_users.dec()
683685
await tr.commit()
684686
self.bot.blocklist.replace(blocklist)
685687
await block_ticket.cog.soft_unlock_ticket(

bot/cogs/ext/prometheus.py

+122
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
from __future__ import annotations
2+
3+
import platform
4+
from typing import TYPE_CHECKING
5+
6+
import discord
7+
from discord.ext import commands, tasks
8+
9+
try:
10+
11+
from prometheus_async.aio.web import start_http_server
12+
from prometheus_client import Counter, Enum, Gauge, Info, Summary
13+
except ImportError:
14+
raise RuntimeError(
15+
"Prometheus libraries are required to be installed. "
16+
"Either install those libraries or disable Prometheus extension"
17+
)
18+
19+
if TYPE_CHECKING:
20+
from bot.rodhaj import Rodhaj
21+
22+
METRIC_PREFIX = "discord_"
23+
24+
25+
class FeatureCollector:
26+
__slots__ = (
27+
"bot",
28+
"active_tickets",
29+
"closed_tickets",
30+
"locked_tickets",
31+
"blocked_users",
32+
)
33+
34+
def __init__(self, bot: Rodhaj):
35+
self.bot = bot
36+
self.active_tickets = Gauge(
37+
f"{METRIC_PREFIX}active_tickets", "Amount of active tickets"
38+
)
39+
self.closed_tickets = Counter(
40+
f"{METRIC_PREFIX}closed_tickets", "Number of closed tickets in this session"
41+
)
42+
self.locked_tickets = Gauge(
43+
f"{METRIC_PREFIX}locked_tickets",
44+
"Number of soft locked tickets in this session",
45+
)
46+
self.blocked_users = Gauge(
47+
f"{METRIC_PREFIX}blocked_users", "Number of currently blocked users"
48+
)
49+
50+
51+
# Maybe load all of these from an json file next time
52+
class Metrics:
53+
__slots__ = ("bot", "connected", "latency", "commands", "version", "features")
54+
55+
def __init__(self, bot: Rodhaj):
56+
self.bot = bot
57+
self.connected = Enum(
58+
f"{METRIC_PREFIX}connected",
59+
"Connected to Discord",
60+
["shard"],
61+
states=["connected", "disconnected"],
62+
)
63+
self.latency = Gauge(f"{METRIC_PREFIX}latency", "Latency to Discord", ["shard"])
64+
self.commands = Summary(f"{METRIC_PREFIX}commands", "Total commands executed")
65+
self.version = Info(f"{METRIC_PREFIX}version", "Versions of the bot")
66+
self.features = FeatureCollector(self.bot)
67+
68+
def get_commands(self) -> int:
69+
total_commands = 0
70+
for _ in self.bot.walk_commands():
71+
# As some of the commands are parents,
72+
# Grouped commands are also counted here
73+
total_commands += 1
74+
75+
return total_commands
76+
77+
def fill(self) -> None:
78+
self.version.info(
79+
{
80+
"build_version": self.bot.version,
81+
"dpy_version": discord.__version__,
82+
"python_version": platform.python_version(),
83+
}
84+
)
85+
self.commands.observe(self.get_commands())
86+
87+
async def start(self, host: str, port: int) -> None:
88+
await start_http_server(addr=host, port=port)
89+
90+
91+
class Prometheus(commands.Cog):
92+
"""Prometheus exporter extension for Rodhaj"""
93+
94+
def __init__(self, bot: Rodhaj):
95+
self.bot = bot
96+
self._connected_label = self.bot.metrics.connected.labels(None)
97+
98+
async def cog_load(self) -> None:
99+
self.latency_loop.start()
100+
101+
async def cog_unload(self) -> None:
102+
self.latency_loop.stop()
103+
104+
@tasks.loop(seconds=5)
105+
async def latency_loop(self) -> None:
106+
self.bot.metrics.latency.labels(None).set(self.bot.latency)
107+
108+
@commands.Cog.listener()
109+
async def on_connect(self) -> None:
110+
self._connected_label.state("connected")
111+
112+
@commands.Cog.listener()
113+
async def on_resumed(self) -> None:
114+
self._connected_label.state("connected")
115+
116+
@commands.Cog.listener()
117+
async def on_disconnect(self) -> None:
118+
self._connected_label.state("disconnected")
119+
120+
121+
async def setup(bot: Rodhaj) -> None:
122+
await bot.add_cog(Prometheus(bot))

bot/cogs/tickets.py

+5
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@ async def lock_ticket(
151151
async def soft_lock_ticket(
152152
self, thread: discord.Thread, reason: Optional[str] = None
153153
) -> discord.Thread:
154+
self.bot.metrics.features.locked_tickets.inc()
154155
tags = thread.applied_tags
155156
locked_tag = self.get_locked_tag(thread.parent)
156157

@@ -162,6 +163,7 @@ async def soft_lock_ticket(
162163
async def soft_unlock_ticket(
163164
self, thread: discord.Thread, reason: Optional[str] = None
164165
) -> discord.Thread:
166+
self.bot.metrics.features.locked_tickets.dec()
165167
tags = thread.applied_tags
166168
locked_tag = self.get_locked_tag(thread.parent)
167169

@@ -176,6 +178,8 @@ async def close_ticket(
176178
connection: Union[asyncpg.Pool, asyncpg.Connection],
177179
author: Optional[Union[discord.User, discord.Member]] = None,
178180
) -> Optional[discord.Thread]:
181+
self.bot.metrics.features.closed_tickets.inc()
182+
self.bot.metrics.features.active_tickets.dec()
179183
if isinstance(user, int):
180184
user = self.bot.get_user(user) or (await self.bot.fetch_user(user))
181185

@@ -280,6 +284,7 @@ async def create_ticket(self, ticket: TicketThread) -> Optional[TicketOutput]:
280284
status=False, ticket=created_ticket, msg="Could not create ticket"
281285
)
282286
else:
287+
self.bot.metrics.features.active_tickets.inc()
283288
await tr.commit()
284289
return TicketOutput(
285290
status=True,

bot/rodhaj.py

+13
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from aiohttp import ClientSession
1010
from cogs import EXTENSIONS, VERSION
1111
from cogs.config import Blocklist, GuildWebhookDispatcher
12+
from cogs.ext.prometheus import Metrics
1213
from discord.ext import commands
1314
from libs.tickets.structs import PartialConfig, ReservedTags, StatusChecklist
1415
from libs.tickets.utils import get_cached_thread, get_partial_ticket
@@ -56,6 +57,7 @@ def __init__(
5657
self.blocklist = Blocklist(self)
5758
self.default_prefix = "r>"
5859
self.logger = logging.getLogger("rodhaj")
60+
self.metrics = Metrics(self)
5961
self.session = session
6062
self.partial_config: Optional[PartialConfig] = None
6163
self.pool = pool
@@ -65,6 +67,7 @@ def __init__(
6567
)
6668
self._dev_mode = config.rodhaj.get("dev_mode", False)
6769
self._reloader = Reloader(self, Path(__file__).parent)
70+
self._prometheus = config.rodhaj.get("prometheus", {})
6871

6972
### Ticket related utils
7073
async def fetch_partial_config(self) -> Optional[PartialConfig]:
@@ -214,6 +217,16 @@ async def setup_hook(self) -> None:
214217
await self.blocklist.load()
215218
self.partial_config = await self.fetch_partial_config()
216219

220+
if self._prometheus.get("enabled", False):
221+
await self.load_extension("cogs.ext.prometheus")
222+
prom_host = self._prometheus.get("host", "127.0.0.1")
223+
prom_port = self._prometheus.get("port", 8555)
224+
225+
await self.metrics.start(host=prom_host, port=prom_port)
226+
self.logger.info("Prometheus Server started on %s:%s", prom_host, prom_port)
227+
228+
self.metrics.fill()
229+
217230
if self._dev_mode:
218231
self.logger.info("Dev mode is enabled. Loading Reloader")
219232
self._reloader.start()

docs/dev-guide/intro.rst

+22-1
Original file line numberDiff line numberDiff line change
@@ -103,4 +103,25 @@ pre-built Docker Compose file is provided. Setup instructions are as follows:
103103

104104
.. code-block:: bash
105105
106-
docker compose -f docker-compose-dev.yml up -d
106+
docker compose -f docker-compose-dev.yml up -d
107+
108+
Extensions
109+
==========
110+
111+
Rodhaj includes the following extensions as noted:
112+
113+
Prometheus Exporter
114+
^^^^^^^^^^^^^^^^^^^
115+
116+
Rodhaj currently includes an `Prometheus <https://prometheus.io/>`_ exporter.
117+
This exporter is intended to be used in production environments, where
118+
metrics surrounding ticket usage, bot health, and others would provide
119+
valuable insight. This exporter can be enabled by setting the
120+
``rodhaj.prometheus.enabled`` key within ``config.yml``.
121+
122+
.. note::
123+
124+
Prometheus client libraries are listed within the
125+
``requirements.txt`` file. By default, these libraries
126+
should be installed, but disabling the exporter will not
127+
affect the usage of these libraries.

docs/user-guide/features.rst

+14-1
Original file line numberDiff line numberDiff line change
@@ -52,4 +52,17 @@ Blocklist
5252
This feature acts very similar to an block/unblock feature. All blocked users
5353
as of writing will not get a message from the bot. Planned features with this feature
5454
include an timer to automatically remove those who are on the blocklist and
55-
an history feature to track past incidents.
55+
an history feature to track past incidents.
56+
57+
Prometheus Extension
58+
--------------------
59+
60+
In order to aid in observability, Rodhaj includes an `Prometheus <https://prometheus.io/>`_ exporter.
61+
This is included as an extension to Rodhaj, which when used, provides valuable information
62+
in regards to usage, and other metrics. This extension is designed primarily to be used in
63+
production environments.
64+
65+
.. note::
66+
67+
Disabling this extension will have no effect
68+
on the bot itself.

0 commit comments

Comments
 (0)