diff --git a/Cargo.lock b/Cargo.lock index ba9c469..9445288 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -125,9 +125,9 @@ checksum = "6c58ec36aac5066d5ca17df51b3e70279f5670a72102f5752cb7e7c856adfc70" [[package]] name = "cargo_toml" -version = "0.15.2" +version = "0.15.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f83bc2e401ed041b7057345ebc488c005efa0341d5541ce7004d30458d0090b" +checksum = "599aa35200ffff8f04c1925aa1acc92fa2e08874379ef42e210a80e527e60838" dependencies = [ "serde", "toml", @@ -313,6 +313,41 @@ dependencies = [ "typenum", ] +[[package]] +name = "darling" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d706e75d87e35569db781a9b5e2416cff1236a47ed380831f959382ccd5f858" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0c960ae2da4de88a91b2d920c2a7233b400bc33cb28453a2987822d8392519b" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn 1.0.109", +] + +[[package]] +name = "darling_macro" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b5a2f4ac4969822c62224815d069952656cadc7084fdca9751e6d959189b72" +dependencies = [ + "darling_core", + "quote", + "syn 1.0.109", +] + [[package]] name = "digest" version = "0.10.6" @@ -398,11 +433,17 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + [[package]] name = "form_urlencoded" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9c384f161156f5260c24a097c56119f9be8c798586aecc13afbcbe7b7e26bf8" +checksum = "a62bc1cf6f830c2ec14a513a9fb124d0a213a629668a4186f329db21fe045652" dependencies = [ "percent-encoding", ] @@ -552,11 +593,17 @@ dependencies = [ "digest", ] +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + [[package]] name = "idna" -version = "0.3.0" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e14ddfc70884202db2244c223200c204c2bda1bc6e0998d11b5e024d657209e6" +checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c" dependencies = [ "unicode-bidi", "unicode-normalization", @@ -598,9 +645,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" [[package]] name = "libc" -version = "0.2.144" +version = "0.2.146" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b00cc1c228a6782d0f076e7b232802e0c5689d41bb5df366f2a6b6621cfdfe1" +checksum = "f92be4933c13fd498862a9e02a3055f8a8d9c039ce33db97306fd5a6caa7f29b" [[package]] name = "libloading" @@ -711,11 +758,34 @@ dependencies = [ "libc", ] +[[package]] +name = "nvml-wrapper" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cd21b9f5a1cce3c3515c9ffa85f5c7443e07162dae0ccf4339bb7ca38ad3454" +dependencies = [ + "bitflags", + "libloading", + "nvml-wrapper-sys", + "static_assertions", + "thiserror", + "wrapcenum-derive", +] + +[[package]] +name = "nvml-wrapper-sys" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c961a2ea9e91c59a69b78e69090f6f5b867bb46c0c56de9482da232437c4987e" +dependencies = [ + "libloading", +] + [[package]] name = "once_cell" -version = "1.17.1" +version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" +checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" [[package]] name = "os_str_bytes" @@ -770,9 +840,9 @@ checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" [[package]] name = "percent-encoding" -version = "2.2.0" +version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e" +checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94" [[package]] name = "pest" @@ -803,6 +873,7 @@ dependencies = [ "ciborium", "heapless", "lazy_static", + "nvml-wrapper", "parking_lot", "pgrx", "pgrx-tests", @@ -1034,18 +1105,18 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.56" +version = "1.0.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b63bdb0cd06f1f4dedf69b254734f9b45af66e4a031e42a7480257d9898b435" +checksum = "dec2b086b7a862cf4de201096214fa870344cf922b2b30c167badb3af3195406" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.26" +version = "1.0.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4424af4bf778aae2051a77b60283332f386554255d722233d09fbfc7e30da2fc" +checksum = "1b9ab9c7eadfd8df19006f1cf1a4aed13540ed5cbc047010ece5826e10825488" dependencies = [ "proc-macro2", ] @@ -1130,9 +1201,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.8.2" +version = "1.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1a59b5d8e97dee33696bf13c5ba8ab85341c002922fba050069326b9c498974" +checksum = "d0ab3ca65655bb1e41f2a8c8cd662eb4fb035e67c3f78da1d61dffe89d07300f" dependencies = [ "aho-corasick", "memchr", @@ -1219,9 +1290,9 @@ checksum = "1685deded9b272198423bdbdb907d8519def2f26cf3699040e54e8c4fbd5c5ce" [[package]] name = "serde" -version = "1.0.163" +version = "1.0.164" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2113ab51b87a539ae008b5c6c02dc020ffa39afd2d83cffcb3f4eb2722cebec2" +checksum = "9e8c8cf938e98f769bc164923b06dce91cea1751522f46f8466461af04c9027d" dependencies = [ "serde_derive", ] @@ -1247,13 +1318,13 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.163" +version = "1.0.164" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c805777e3930c8883389c602315a24224bcc738b63905ef87cd1420353ea93e" +checksum = "d9735b638ccc51c28bf6914d90a2e9725b377144fc612c49a611fddd1b631d68" dependencies = [ "proc-macro2", "quote", - "syn 2.0.13", + "syn 2.0.18", ] [[package]] @@ -1269,9 +1340,9 @@ dependencies = [ [[package]] name = "serde_spanned" -version = "0.6.1" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0efd8caf556a6cebd3b285caf480045fcc1ac04f6bd786b09a6f11af30c4fcf4" +checksum = "93107647184f6027e3b7dcb2e11034cf95ffa1e3a682c67951963ac69c1c007d" dependencies = [ "serde", ] @@ -1355,6 +1426,12 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "stringprep" version = "0.1.2" @@ -1365,6 +1442,12 @@ dependencies = [ "unicode-normalization", ] +[[package]] +name = "strsim" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6446ced80d6c486436db5c078dde11a9f73d42b57fb273121e160b84f63d894c" + [[package]] name = "subtle" version = "2.4.1" @@ -1384,9 +1467,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.13" +version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c9da457c5285ac1f936ebd076af6dac17a61cfe7826f2076b4d015cf47bc8ec" +checksum = "32d41677bcbe24c20c52e7c70b0d8db04134c5d1066bf98662e2871ad200ea3e" dependencies = [ "proc-macro2", "quote", @@ -1546,9 +1629,9 @@ dependencies = [ [[package]] name = "toml" -version = "0.7.3" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b403acf6f2bb0859c93c7f0d967cb4a75a7ac552100f9322faf64dc047669b21" +checksum = "d6135d499e69981f9ff0ef2167955a5333c35e36f6937d382974566b3d5b94ec" dependencies = [ "serde", "serde_spanned", @@ -1558,18 +1641,18 @@ dependencies = [ [[package]] name = "toml_datetime" -version = "0.6.1" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ab8ed2edee10b50132aed5f331333428b011c99402b5a534154ed15746f9622" +checksum = "5a76a9312f5ba4c2dec6b9161fdf25d87ad8a09256ccea5a556fef03c706a10f" dependencies = [ "serde", ] [[package]] name = "toml_edit" -version = "0.19.8" +version = "0.19.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "239410c8609e8125456927e6707163a3b1fdb40561e4b803bc041f466ccfdc13" +checksum = "2380d56e8670370eee6566b0bfd4265f65b3f432e8c6d85623f728d4fa31f739" dependencies = [ "indexmap", "serde", @@ -1618,9 +1701,9 @@ checksum = "ccb97dac3243214f8d8507998906ca3e2e0b900bf9bf4870477f125b82e68f6e" [[package]] name = "unicode-bidi" -version = "0.3.8" +version = "0.3.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "099b7128301d285f79ddd55b9a83d5e6b9e97c92e0ea0daebee7263e932de992" +checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" [[package]] name = "unicode-ident" @@ -1645,9 +1728,9 @@ checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36" [[package]] name = "url" -version = "2.3.1" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d68c799ae75762b8c3fe375feb6600ef5602c883c5d21eb51c09f22b83c4643" +checksum = "50bff7831e19200a85b17131d085c25d7811bc4e186efdaf54bbd132994a88cb" dependencies = [ "form_urlencoded", "idna", @@ -1822,13 +1905,25 @@ checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" [[package]] name = "winnow" -version = "0.4.1" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae8970b36c66498d8ff1d66685dc86b91b29db0c7739899012f63a63814b4b28" +checksum = "61de7bac303dc551fe038e2b3cef0f571087a47571ea6e79a87692ac99b99699" dependencies = [ "memchr", ] +[[package]] +name = "wrapcenum-derive" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bcc065c85ad2c3bd12aa4118bf164835712e25080c392557801a13292c60aec" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "wyz" version = "0.5.1" diff --git a/Cargo.toml b/Cargo.toml index fd4af79..38462e6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,6 +21,7 @@ bytesize = "1.1.0" ciborium = "0.2.0" heapless = "0.7.16" lazy_static = "1.4.0" +nvml-wrapper = "0.9.0" parking_lot = "0.12.1" pgrx = { version = "=0.8.3", features = ["time-crate"] } serde = "1.0.152" diff --git a/README.md b/README.md index 77cdcf7..2075669 100644 --- a/README.md +++ b/README.md @@ -82,6 +82,19 @@ SELECT * FROM pg_stat_sysinfo_cache_summary(); ``` +NVIDIA GPU statistics are available if the `nvidia-smi` command is available + +```sql +---- +SELECT device_id, device_name, total_memory_mb, used_memory_mb, temperature_c from pg_gpu_info(); + device_id | device_name | total_memory_mb | used_memory_mb | temperature_c +-----------+-------------+-----------------+----------------+--------------- + 0 | Tesla T4 | 16106.12736 | 12277.972992 | 59 + 1 | Tesla T4 | 16106.12736 | 13227.982848 | 58 + 2 | Tesla T4 | 16106.12736 | 2043.871232 | 56 + 3 | Tesla T4 | 16106.12736 | 9325.182976 | 53 +(4 rows) +``` ## Configuration Changes The `pg_stat_sysinfo.interval` can be updated by changing `postgres.conf` and diff --git a/src/lib.rs b/src/lib.rs index edd3d6f..fa6a06a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,6 +13,7 @@ mod crate_info; mod init; mod settings; mod shmem_ring_buffer; +mod nvidia_collector; pgrx::pg_module_magic!(); @@ -98,3 +99,24 @@ fn maprows<'a, I: Iterator + 'a>( TableIterator::new(translated) } + +#[pg_extern] +fn pg_gpu_info() -> +TableIterator< + 'static, + ( + name!(device_id, i32), + name!(device_name, String), + name!(total_memory_mb, f64), + name!(free_memory_mb, f64), + name!(used_memory_mb, f64), + name!(temperature_c, i32), + name!(process_info, pgrx::JsonB), + name!(gpu_utilization, i32), + name!(memory_utilization, i32) + ), +> { + TableIterator::new(nvidia_collector::get_cuda_information().into_iter()) +} + + diff --git a/src/nvidia_collector.rs b/src/nvidia_collector.rs new file mode 100644 index 0000000..4b47e18 --- /dev/null +++ b/src/nvidia_collector.rs @@ -0,0 +1,54 @@ +use nvml_wrapper::enum_wrappers::device::TemperatureSensor; +use nvml_wrapper::Nvml; + +pub type GPUResponse = (i32, String, f64, f64, f64, i32, pgrx::JsonB, i32, i32); + +pub fn get_cuda_information() -> Vec { + let nvml = + Nvml::init().expect("Failed to initialize NVML, do you have NVIDIA drivers installed?"); + let device_count = nvml.device_count().unwrap(); + let mut records: Vec = Vec::new(); + + for i in 0..device_count { + let device = nvml.device_by_index(i).unwrap(); + let device_name = device.name().unwrap(); + let index = device.index().unwrap() as i32; + let memory_info = device.memory_info().unwrap(); + let total_memory = memory_info.total as f64 / 1000000.00; // in mb + let free_memory = memory_info.free as f64 / 1000000.00; // in mb + let used_memory = memory_info.used as f64 / 1000000.00; // in mb + let temperature = device.temperature(TemperatureSensor::Gpu).unwrap() as i32; + + // let process_info = device.running_compute_processes().unwrap(); + + let utilization = device.utilization_rates().unwrap(); + + let process_infos: serde_json::Value = serde_json::json!(device + .running_compute_processes() + .unwrap() + .into_iter() + .map(|process_info| { + serde_json::json!({ + "pid": process_info.pid, + "gpu_instance_id": process_info.gpu_instance_id, + "compute_instance_id": process_info.compute_instance_id, + }) + }) + .collect::>()); + + let record = ( + index, + device_name, + total_memory, + free_memory, + used_memory, + temperature, + pgrx::JsonB(process_infos), + utilization.gpu as i32, + utilization.memory as i32, + ); + + records.push(record); + } + records +}