Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,7 @@ tower = "0.5"
tower-http = "0.6"
tracing = "0.1"
tracing-appender = "0.2"
tracing-opentelemetry = "0.31.0"
tracing-subscriber = { version = "0.3", features = ["env-filter", "json", "fmt"] }
typetag = "0.2"
uuid = { version = "1.17", features = ["serde", "v4", "fast-rng"] }
Expand Down
17 changes: 16 additions & 1 deletion docs/how-to/how-to-change-log-level-on-the-fly.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,19 @@ Log Level changed from Some("info") to "trace,flow=debug"%

The data is a string in the format of `global_level,module1=level1,module2=level2,...` that follows the same rule of `RUST_LOG`.

The module is the module name of the log, and the level is the log level. The log level can be one of the following: `trace`, `debug`, `info`, `warn`, `error`, `off`(case insensitive).
The module is the module name of the log, and the level is the log level. The log level can be one of the following: `trace`, `debug`, `info`, `warn`, `error`, `off`(case insensitive).

# Enable/Disable Trace on the Fly

## HTTP API

example:
```bash
curl --data "true" 127.0.0.1:4000/debug/enable_trace
```
And database will reply with something like:
```
trace enabled%
```

Possible values are "true" or "false".
2 changes: 1 addition & 1 deletion src/common/telemetry/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,5 +35,5 @@ tokio.workspace = true
tracing = "0.1"
tracing-appender.workspace = true
tracing-log = "0.2"
tracing-opentelemetry = "0.31.0"
tracing-opentelemetry.workspace = true
tracing-subscriber.workspace = true
5 changes: 4 additions & 1 deletion src/common/telemetry/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,10 @@ mod panic_hook;
pub mod tracing_context;
mod tracing_sampler;

pub use logging::{RELOAD_HANDLE, init_default_ut_logging, init_global_logging};
pub use logging::{
LOG_RELOAD_HANDLE, TRACE_RELOAD_HANDLE, get_or_init_tracer, init_default_ut_logging,
init_global_logging,
};
pub use metric::dump_metrics;
pub use panic_hook::set_panic_hook;
pub use {common_error, tracing, tracing_subscriber};
147 changes: 107 additions & 40 deletions src/common/telemetry/src/logging.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,15 @@ use opentelemetry::trace::TracerProvider;
use opentelemetry::{KeyValue, global};
use opentelemetry_otlp::{Protocol, SpanExporter, WithExportConfig, WithHttpConfig};
use opentelemetry_sdk::propagation::TraceContextPropagator;
use opentelemetry_sdk::trace::Sampler;
use opentelemetry_sdk::trace::{Sampler, Tracer};
use opentelemetry_semantic_conventions::resource;
use serde::{Deserialize, Serialize};
use tracing_appender::non_blocking::WorkerGuard;
use tracing_appender::rolling::{RollingFileAppender, Rotation};
use tracing_log::LogTracer;
use tracing_subscriber::filter::{FilterFn, Targets};
use tracing_subscriber::fmt::Layer;
use tracing_subscriber::layer::SubscriberExt;
use tracing_subscriber::layer::{Layered, SubscriberExt};
use tracing_subscriber::prelude::*;
use tracing_subscriber::{EnvFilter, Registry, filter};

Expand All @@ -48,10 +48,31 @@ pub const DEFAULT_OTLP_HTTP_ENDPOINT: &str = "http://localhost:4318/v1/traces";
/// The default logs directory.
pub const DEFAULT_LOGGING_DIR: &str = "logs";

// Handle for reloading log level
pub static RELOAD_HANDLE: OnceCell<tracing_subscriber::reload::Handle<Targets, Registry>> =
/// Handle for reloading log level
pub static LOG_RELOAD_HANDLE: OnceCell<tracing_subscriber::reload::Handle<Targets, Registry>> =
OnceCell::new();

type TraceReloadHandle = tracing_subscriber::reload::Handle<
Vec<
tracing_opentelemetry::OpenTelemetryLayer<
Layered<tracing_subscriber::reload::Layer<Targets, Registry>, Registry>,
Tracer,
>,
>,
Layered<tracing_subscriber::reload::Layer<Targets, Registry>, Registry>,
>;

/// Handle for reloading trace level
pub static TRACE_RELOAD_HANDLE: OnceCell<TraceReloadHandle> = OnceCell::new();

static TRACER: OnceCell<Mutex<TraceState>> = OnceCell::new();

#[derive(Debug)]
enum TraceState {
Ready(Tracer),
Deferred(TraceContext),
}

/// The logging options that used to initialize the logger.
#[derive(Clone, Debug, Serialize, Deserialize)]
#[serde(default)]
Expand Down Expand Up @@ -167,6 +188,13 @@ impl PartialEq for LoggingOptions {

impl Eq for LoggingOptions {}

#[derive(Clone, Debug)]
struct TraceContext {
app_name: String,
node_id: String,
logging_opts: LoggingOptions,
}

impl Default for LoggingOptions {
fn default() -> Self {
Self {
Expand Down Expand Up @@ -242,6 +270,7 @@ pub fn init_global_logging(
) -> Vec<WorkerGuard> {
static START: Once = Once::new();
let mut guards = vec![];
let node_id = node_id.unwrap_or_else(|| "none".to_string());

START.call_once(|| {
// Enable log compatible layer to convert log record to tracing span.
Expand Down Expand Up @@ -357,10 +386,39 @@ pub fn init_global_logging(

let (dyn_filter, reload_handle) = tracing_subscriber::reload::Layer::new(filter.clone());

RELOAD_HANDLE
LOG_RELOAD_HANDLE
.set(reload_handle)
.expect("reload handle already set, maybe init_global_logging get called twice?");

let mut initial_tracer = None;
let trace_state = if opts.enable_otlp_tracing {
let tracer = create_tracer(app_name, &node_id, opts);
initial_tracer = Some(tracer.clone());
TraceState::Ready(tracer)
} else {
TraceState::Deferred(TraceContext {
app_name: app_name.to_string(),
node_id: node_id.clone(),
logging_opts: opts.clone(),
})
};

TRACER
.set(Mutex::new(trace_state))
.expect("trace state already initialized");

let initial_trace_layers = initial_tracer
.as_ref()
.map(|tracer| vec![tracing_opentelemetry::layer().with_tracer(tracer.clone())])
.unwrap_or_else(Vec::new);

let (dyn_trace_layer, trace_reload_handle) =
tracing_subscriber::reload::Layer::new(initial_trace_layers);

TRACE_RELOAD_HANDLE
.set(trace_reload_handle)
.unwrap_or_else(|_| panic!("failed to set trace reload handle"));

// Must enable 'tokio_unstable' cfg to use this feature.
// For example: `RUSTFLAGS="--cfg tokio_unstable" cargo run -F common-telemetry/console -- standalone start`
#[cfg(feature = "tokio-console")]
Expand All @@ -383,6 +441,7 @@ pub fn init_global_logging(

Registry::default()
.with(dyn_filter)
.with(dyn_trace_layer)
.with(tokio_console_layer)
.with(stdout_logging_layer)
.with(file_logging_layer)
Expand All @@ -396,53 +455,61 @@ pub fn init_global_logging(
#[cfg(not(feature = "tokio-console"))]
let subscriber = Registry::default()
.with(dyn_filter)
.with(dyn_trace_layer)
.with(stdout_logging_layer)
.with(file_logging_layer)
.with(err_file_logging_layer)
.with(slow_query_logging_layer);

if opts.enable_otlp_tracing {
global::set_text_map_propagator(TraceContextPropagator::new());

let sampler = opts
.tracing_sample_ratio
.as_ref()
.map(create_sampler)
.map(Sampler::ParentBased)
.unwrap_or(Sampler::ParentBased(Box::new(Sampler::AlwaysOn)));

let provider = opentelemetry_sdk::trace::SdkTracerProvider::builder()
.with_batch_exporter(build_otlp_exporter(opts))
.with_sampler(sampler)
.with_resource(
opentelemetry_sdk::Resource::builder_empty()
.with_attributes([
KeyValue::new(resource::SERVICE_NAME, app_name.to_string()),
KeyValue::new(
resource::SERVICE_INSTANCE_ID,
node_id.unwrap_or("none".to_string()),
),
KeyValue::new(resource::SERVICE_VERSION, common_version::version()),
KeyValue::new(resource::PROCESS_PID, std::process::id().to_string()),
])
.build(),
)
.build();
let tracer = provider.tracer("greptimedb");
global::set_text_map_propagator(TraceContextPropagator::new());

tracing::subscriber::set_global_default(
subscriber.with(tracing_opentelemetry::layer().with_tracer(tracer)),
)
tracing::subscriber::set_global_default(subscriber)
.expect("error setting global tracing subscriber");
} else {
tracing::subscriber::set_global_default(subscriber)
.expect("error setting global tracing subscriber");
}
});

guards
}

fn create_tracer(app_name: &str, node_id: &str, opts: &LoggingOptions) -> Tracer {
let sampler = opts
.tracing_sample_ratio
.as_ref()
.map(create_sampler)
.map(Sampler::ParentBased)
.unwrap_or(Sampler::ParentBased(Box::new(Sampler::AlwaysOn)));

let resource = opentelemetry_sdk::Resource::builder_empty()
.with_attributes([
KeyValue::new(resource::SERVICE_NAME, app_name.to_string()),
KeyValue::new(resource::SERVICE_INSTANCE_ID, node_id.to_string()),
KeyValue::new(resource::SERVICE_VERSION, common_version::version()),
KeyValue::new(resource::PROCESS_PID, std::process::id().to_string()),
])
.build();

opentelemetry_sdk::trace::SdkTracerProvider::builder()
.with_batch_exporter(build_otlp_exporter(opts))
.with_sampler(sampler)
.with_resource(resource)
.build()
.tracer("greptimedb")
}

/// Ensure that the OTLP tracer has been constructed, building it lazily if needed.
pub fn get_or_init_tracer() -> Result<Tracer, &'static str> {
let state = TRACER.get().ok_or("trace state is not initialized")?;
let mut guard = state.lock().expect("trace state lock poisoned");

match &mut *guard {
TraceState::Ready(tracer) => Ok(tracer.clone()),
TraceState::Deferred(context) => {
let tracer = create_tracer(&context.app_name, &context.node_id, &context.logging_opts);
*guard = TraceState::Ready(tracer.clone());
Ok(tracer)
}
}
}

fn build_otlp_exporter(opts: &LoggingOptions) -> SpanExporter {
let protocol = opts
.otlp_export_protocol
Expand Down
1 change: 1 addition & 0 deletions src/servers/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ once_cell.workspace = true
openmetrics-parser = "0.4"
simd-json.workspace = true
socket2 = "0.5"
tracing-opentelemetry.workspace = true
# use crates.io version once the following PRs is merged into the nextest release
# 1. fix: Use After Free in PacketReader in https://github.com/databendlabs/opensrv/pull/67
# 2. Use ring, instead of aws-lc-rs in https://github.com/databendlabs/opensrv/pull/72
Expand Down
2 changes: 2 additions & 0 deletions src/servers/src/http.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ pub mod authorize;
#[cfg(feature = "dashboard")]
mod dashboard;
pub mod dyn_log;
pub mod dyn_trace;
pub mod event;
pub mod extractor;
pub mod handler;
Expand Down Expand Up @@ -908,6 +909,7 @@ impl HttpServer {
Router::new()
// handler for changing log level dynamically
.route("/log_level", routing::post(dyn_log::dyn_log_handler))
.route("/enable_trace", routing::post(dyn_trace::dyn_trace_handler))
.nest(
"/prof",
Router::new()
Expand Down
4 changes: 2 additions & 2 deletions src/servers/src/http/dyn_log.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
use axum::http::StatusCode;
use axum::response::IntoResponse;
use common_telemetry::tracing_subscriber::filter;
use common_telemetry::{RELOAD_HANDLE, info};
use common_telemetry::{LOG_RELOAD_HANDLE, info};
use snafu::OptionExt;

use crate::error::{InternalSnafu, InvalidParameterSnafu, Result};
Expand All @@ -29,7 +29,7 @@ pub async fn dyn_log_handler(level: String) -> Result<impl IntoResponse> {
.build()
})?;
let mut old_filter = None;
RELOAD_HANDLE
LOG_RELOAD_HANDLE
.get()
.context(InternalSnafu {
err_msg: "Reload handle not initialized",
Expand Down
Loading
Loading