Skip to content

8261242: [Linux] OSContainer::is_containerized() returns true when run outside a container #1661

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: pr/1660
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions make/data/hotspot-symbols/symbols-linux
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#

JVM_handle_linux_signal
JVM_IsContainerized
JVM_IsUseContainerSupport
numa_error
numa_warn
119 changes: 100 additions & 19 deletions src/hotspot/os/linux/cgroupSubsystem_linux.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,9 @@ CgroupSubsystem* CgroupSubsystemFactory::create() {
// Construct the subsystem, free resources and return
// Note: any index in cg_infos will do as the path is the same for
// all controllers.
CgroupController* unified = new CgroupV2Controller(cg_infos[MEMORY_IDX]._mount_path, cg_infos[MEMORY_IDX]._cgroup_path);
CgroupController* unified = new CgroupV2Controller(cg_infos[MEMORY_IDX]._mount_path,
cg_infos[MEMORY_IDX]._cgroup_path,
cg_infos[MEMORY_IDX]._read_only);
log_debug(os, container)("Detected cgroups v2 unified hierarchy");
cleanup(cg_infos);
return new CgroupV2Subsystem(unified);
Expand Down Expand Up @@ -100,19 +102,19 @@ CgroupSubsystem* CgroupSubsystemFactory::create() {
CgroupInfo info = cg_infos[i];
if (info._data_complete) { // pids controller might have incomplete data
if (strcmp(info._name, "memory") == 0) {
memory = new CgroupV1MemoryController(info._root_mount_path, info._mount_path);
memory = new CgroupV1MemoryController(info._root_mount_path, info._mount_path, info._read_only);
memory->set_subsystem_path(info._cgroup_path);
} else if (strcmp(info._name, "cpuset") == 0) {
cpuset = new CgroupV1Controller(info._root_mount_path, info._mount_path);
cpuset = new CgroupV1Controller(info._root_mount_path, info._mount_path, info._read_only);
cpuset->set_subsystem_path(info._cgroup_path);
} else if (strcmp(info._name, "cpu") == 0) {
cpu = new CgroupV1Controller(info._root_mount_path, info._mount_path);
cpu = new CgroupV1Controller(info._root_mount_path, info._mount_path, info._read_only);
cpu->set_subsystem_path(info._cgroup_path);
} else if (strcmp(info._name, "cpuacct") == 0) {
cpuacct = new CgroupV1Controller(info._root_mount_path, info._mount_path);
cpuacct = new CgroupV1Controller(info._root_mount_path, info._mount_path, info._read_only);
cpuacct->set_subsystem_path(info._cgroup_path);
} else if (strcmp(info._name, "pids") == 0) {
pids = new CgroupV1Controller(info._root_mount_path, info._mount_path);
pids = new CgroupV1Controller(info._root_mount_path, info._mount_path, info._read_only);
pids->set_subsystem_path(info._cgroup_path);
}
} else {
Expand All @@ -127,7 +129,8 @@ void CgroupSubsystemFactory::set_controller_paths(CgroupInfo* cg_infos,
int controller,
const char* name,
char* mount_path,
char* root_path) {
char* root_path,
bool read_only) {
if (cg_infos[controller]._mount_path != nullptr) {
// On some systems duplicate controllers get mounted in addition to
// the main cgroup controllers most likely under /sys/fs/cgroup. In that
Expand All @@ -139,16 +142,74 @@ void CgroupSubsystemFactory::set_controller_paths(CgroupInfo* cg_infos,
os::free(cg_infos[controller]._root_mount_path);
cg_infos[controller]._mount_path = os::strdup(mount_path);
cg_infos[controller]._root_mount_path = os::strdup(root_path);
cg_infos[controller]._read_only = read_only;
} else {
log_debug(os, container)("Duplicate %s controllers detected. Picking %s, skipping %s.",
name, cg_infos[controller]._mount_path, mount_path);
}
} else {
cg_infos[controller]._mount_path = os::strdup(mount_path);
cg_infos[controller]._root_mount_path = os::strdup(root_path);
cg_infos[controller]._read_only = read_only;
}
}

/*
* Determine whether or not the mount options, which are comma separated,
* contain the 'ro' string.
*/
static bool find_ro_opt(char* mount_opts) {
char* token;
char* mo_ptr = mount_opts;
// mount options are comma-separated (man proc).
while ((token = strsep(&mo_ptr, ",")) != NULL) {
if (strcmp(token, "ro") == 0) {
return true;
}
}
return false;
}

/*
* Read values of a /proc/self/mountinfo line into variables. For cgroups v1
* super options are needed. On cgroups v2 super options are not used.
*
* The scanning of a single mountinfo line entry is as follows:
*
* 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue
* (1) (2) (3):(4) (5) (6) (7) (8) (9) (10) (11) (12)
*
* The numbers in parentheses are labels for the descriptions below:
*
* (1) mount ID: matched with '%*d' and discarded
* (2) parent ID: matched with '%*d' and discarded
* (3) major: ---,---> major, minor separated by ':'. matched with '%*d:%*d' and discarded
* (4) minor: ---'
* (5) root: matched with '%s' and captured in 'tmproot'. Must be non-empty.
* (6) mount point: matched with '%s' and captured in 'tmpmount'. Must be non-empty.
* (7) mount options: matched with '%s' and captured in 'mount_opts'. Must be non-empty.
* (8) optional fields: ---,---> matched with '%*[^-]-'. Anything not a hyphen, followed by a hyphen
* (9) separator: ---' and discarded. Note: The discarded match is space characters if there
* are no optionals. Otherwise it includes the optional fields as well.
* (10) filesystem type: matched with '%s' and captured in 'tmp_fs_type'
* (11) mount source: matched with '%*s' and discarded
* (12) super options: matched with '%s' and captured in 'tmpcgroups'
*/
static inline bool match_mount_info_line(char* line,
char* tmproot,
char* tmpmount,
char* mount_opts,
char* tmp_fs_type,
char* tmpcgroups) {
return sscanf(line,
"%*d %*d %*d:%*d %s %s %s%*[^-]- %s %*s %s",
tmproot,
tmpmount,
mount_opts,
tmp_fs_type,
tmpcgroups) == 5;
}

bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
const char* proc_cgroups,
const char* proc_self_cgroup,
Expand Down Expand Up @@ -320,26 +381,40 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
char tmproot[MAXPATHLEN+1];
char tmpmount[MAXPATHLEN+1];
char tmpcgroups[MAXPATHLEN+1];
char mount_opts[MAXPATHLEN+1];
char *cptr = tmpcgroups;
char *token;

// Cgroup v2 relevant info. We only look for the _mount_path iff is_cgroupsV2 so
// as to avoid memory stomping of the _mount_path pointer later on in the cgroup v1
// block in the hybrid case.
if (is_cgroupsV2 && sscanf(p, "%*d %*d %*d:%*d %s %s %*[^-]- %s %*s %*s", tmproot, tmpmount, tmp_fs_type) == 3) {
/* Cgroup v2 relevant info. We only look for the _mount_path iff is_cgroupsV2 so
* as to avoid memory stomping of the _mount_path pointer later on in the cgroup v1
* block in the hybrid case.
*
* We collect the read only mount option in the cgroup infos so as to have that
* info ready when determining is_containerized().
*/
if (is_cgroupsV2 && match_mount_info_line(p,
tmproot,
tmpmount,
mount_opts,
tmp_fs_type,
tmpcgroups /* unused */)) {
// we likely have an early match return (e.g. cgroup fs match), be sure we have cgroup2 as fstype
if (strcmp("cgroup2", tmp_fs_type) == 0) {
cgroupv2_mount_point_found = true;
any_cgroup_mounts_found = true;
// For unified we only have a single line with cgroup2 fs type.
// Therefore use that option for all CG info structs.
bool ro_option = find_ro_opt(mount_opts);
for (int i = 0; i < CG_INFO_LENGTH; i++) {
set_controller_paths(cg_infos, i, "(cg2, unified)", tmpmount, tmproot);
set_controller_paths(cg_infos, i, "(cg2, unified)", tmpmount, tmproot, ro_option);
}
}
}

/* Cgroup v1 relevant info
*
* Find the cgroup mount point for memory, cpuset, cpu, cpuacct, pids
* Find the cgroup mount point for memory, cpuset, cpu, cpuacct, pids. For each controller
* determine whether or not they show up as mounted read only or not.
*
* Example for docker:
* 219 214 0:29 /docker/7208cebd00fa5f2e342b1094f7bed87fa25661471a4637118e65f1c995be8a34 /sys/fs/cgroup/memory ro,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory
Expand All @@ -348,32 +423,38 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
* 34 28 0:29 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,memory
*
* 44 31 0:39 / /sys/fs/cgroup/pids rw,nosuid,nodev,noexec,relatime shared:23 - cgroup cgroup rw,pids
*
*/
if (sscanf(p, "%*d %*d %*d:%*d %s %s %*[^-]- %s %*s %s", tmproot, tmpmount, tmp_fs_type, tmpcgroups) == 4) {
if (match_mount_info_line(p, tmproot, tmpmount, mount_opts, tmp_fs_type, tmpcgroups)) {
if (strcmp("cgroup", tmp_fs_type) != 0) {
// Skip cgroup2 fs lines on hybrid or unified hierarchy.
continue;
}
while ((token = strsep(&cptr, ",")) != nullptr) {
if (strcmp(token, "memory") == 0) {
any_cgroup_mounts_found = true;
set_controller_paths(cg_infos, MEMORY_IDX, token, tmpmount, tmproot);
bool ro_option = find_ro_opt(mount_opts);
set_controller_paths(cg_infos, MEMORY_IDX, token, tmpmount, tmproot, ro_option);
cg_infos[MEMORY_IDX]._data_complete = true;
} else if (strcmp(token, "cpuset") == 0) {
any_cgroup_mounts_found = true;
set_controller_paths(cg_infos, CPUSET_IDX, token, tmpmount, tmproot);
bool ro_option = find_ro_opt(mount_opts);
set_controller_paths(cg_infos, CPUSET_IDX, token, tmpmount, tmproot, ro_option);
cg_infos[CPUSET_IDX]._data_complete = true;
} else if (strcmp(token, "cpu") == 0) {
any_cgroup_mounts_found = true;
set_controller_paths(cg_infos, CPU_IDX, token, tmpmount, tmproot);
bool ro_option = find_ro_opt(mount_opts);
set_controller_paths(cg_infos, CPU_IDX, token, tmpmount, tmproot, ro_option);
cg_infos[CPU_IDX]._data_complete = true;
} else if (strcmp(token, "cpuacct") == 0) {
any_cgroup_mounts_found = true;
set_controller_paths(cg_infos, CPUACCT_IDX, token, tmpmount, tmproot);
bool ro_option = find_ro_opt(mount_opts);
set_controller_paths(cg_infos, CPUACCT_IDX, token, tmpmount, tmproot, ro_option);
cg_infos[CPUACCT_IDX]._data_complete = true;
} else if (strcmp(token, "pids") == 0) {
any_cgroup_mounts_found = true;
set_controller_paths(cg_infos, PIDS_IDX, token, tmpmount, tmproot);
bool ro_option = find_ro_opt(mount_opts);
set_controller_paths(cg_infos, PIDS_IDX, token, tmpmount, tmproot, ro_option);
cg_infos[PIDS_IDX]._data_complete = true;
}
}
Expand Down
7 changes: 6 additions & 1 deletion src/hotspot/os/linux/cgroupSubsystem_linux.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@
class CgroupController: public CHeapObj<mtInternal> {
public:
virtual char* subsystem_path() = 0;
virtual bool is_read_only() = 0;

/* Read a numerical value as unsigned long
*
Expand Down Expand Up @@ -208,6 +209,7 @@ class CgroupSubsystem: public CHeapObj<mtInternal> {
virtual jlong memory_and_swap_limit_in_bytes() = 0;
virtual jlong memory_soft_limit_in_bytes() = 0;
virtual jlong memory_max_usage_in_bytes() = 0;
virtual bool is_containerized() = 0;

virtual char * cpu_cpuset_cpus() = 0;
virtual char * cpu_cpuset_memory_nodes() = 0;
Expand All @@ -230,6 +232,7 @@ class CgroupInfo : public StackObj {
char* _name;
int _hierarchy_id;
bool _enabled;
bool _read_only; // whether or not the mount path is mounted read-only
bool _data_complete; // indicating cgroup v1 data is complete for this controller
char* _cgroup_path; // cgroup controller path from /proc/self/cgroup
char* _root_mount_path; // root mount path from /proc/self/mountinfo. Unused for cgroup v2
Expand All @@ -240,6 +243,7 @@ class CgroupInfo : public StackObj {
_name = nullptr;
_hierarchy_id = -1;
_enabled = false;
_read_only = false;
_data_complete = false;
_cgroup_path = nullptr;
_root_mount_path = nullptr;
Expand Down Expand Up @@ -271,7 +275,8 @@ class CgroupSubsystemFactory: AllStatic {
int controller,
const char* name,
char* mount_path,
char* root_path);
char* root_path,
bool read_only);
// Determine the cgroup type (version 1 or version 2), given
// relevant paths to files. Sets 'flags' accordingly.
static bool determine_type(CgroupInfo* cg_infos,
Expand Down
11 changes: 11 additions & 0 deletions src/hotspot/os/linux/cgroupV1Subsystem_linux.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,17 @@ jlong CgroupV1Subsystem::memory_soft_limit_in_bytes() {
}
}

bool CgroupV1Subsystem::is_containerized() {
// containerized iff all required controllers are mounted
// read-only. See OSContainer::is_containerized() for
// the full logic.
//
return _memory->controller()->is_read_only() &&
_cpu->controller()->is_read_only() &&
_cpuacct->is_read_only() &&
_cpuset->is_read_only();
}

/* memory_usage_in_bytes
*
* Return the amount of used memory for this process.
Expand Down
8 changes: 6 additions & 2 deletions src/hotspot/os/linux/cgroupV1Subsystem_linux.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,19 +36,22 @@ class CgroupV1Controller: public CgroupController {
/* mountinfo contents */
char *_root;
char *_mount_point;
bool _read_only;

/* Constructed subsystem directory */
char *_path;

public:
CgroupV1Controller(char *root, char *mountpoint) {
CgroupV1Controller(char *root, char *mountpoint, bool ro) {
_root = os::strdup(root);
_mount_point = os::strdup(mountpoint);
_path = nullptr;
_read_only = ro;
}

virtual void set_subsystem_path(char *cgroup_path);
char *subsystem_path() { return _path; }
bool is_read_only() { return _read_only; }
};

class CgroupV1MemoryController: public CgroupV1Controller {
Expand All @@ -65,7 +68,7 @@ class CgroupV1MemoryController: public CgroupV1Controller {
void set_hierarchical(bool value) { _uses_mem_hierarchy = value; }

public:
CgroupV1MemoryController(char *root, char *mountpoint) : CgroupV1Controller(root, mountpoint) {
CgroupV1MemoryController(char *root, char *mountpoint, bool ro) : CgroupV1Controller(root, mountpoint, ro) {
_uses_mem_hierarchy = false;
}

Expand Down Expand Up @@ -94,6 +97,7 @@ class CgroupV1Subsystem: public CgroupSubsystem {

jlong pids_max();
jlong pids_current();
bool is_containerized();

void print_version_specific_info(outputStream* st);

Expand Down
4 changes: 4 additions & 0 deletions src/hotspot/os/linux/cgroupV2Subsystem_linux.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,10 @@ int CgroupV2Subsystem::cpu_quota() {
return limit;
}

bool CgroupV2Subsystem::is_containerized() {
return _unified->is_read_only();
}

char* CgroupV2Subsystem::cpu_cpuset_cpus() {
char cpus[1024];
CONTAINER_READ_STRING_CHECKED(_unified, "/cpuset.cpus", "cpuset.cpus", cpus, 1024);
Expand Down
6 changes: 5 additions & 1 deletion src/hotspot/os/linux/cgroupV2Subsystem_linux.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,19 +33,22 @@ class CgroupV2Controller: public CgroupController {
char *_mount_path;
/* The cgroup path for the controller */
char *_cgroup_path;
bool _read_only;

/* Constructed full path to the subsystem directory */
char *_path;
static char* construct_path(char* mount_path, char *cgroup_path);

public:
CgroupV2Controller(char * mount_path, char *cgroup_path) {
CgroupV2Controller(char * mount_path, char *cgroup_path, bool ro) {
_mount_path = mount_path;
_cgroup_path = os::strdup(cgroup_path);
_path = construct_path(mount_path, cgroup_path);
_read_only = ro;
}

char *subsystem_path() { return _path; }
bool is_read_only() { return _read_only; }
};

class CgroupV2Subsystem: public CgroupSubsystem {
Expand Down Expand Up @@ -80,6 +83,7 @@ class CgroupV2Subsystem: public CgroupSubsystem {
jlong pids_max();
jlong pids_current();

bool is_containerized();
void print_version_specific_info(outputStream* st);

const char * container_type() {
Expand Down
Loading