diff --git a/compiler/rustc_codegen_llvm/src/back/write.rs b/compiler/rustc_codegen_llvm/src/back/write.rs
index 85a06f457ebea..62998003ca114 100644
--- a/compiler/rustc_codegen_llvm/src/back/write.rs
+++ b/compiler/rustc_codegen_llvm/src/back/write.rs
@@ -862,7 +862,7 @@ pub(crate) fn codegen(
                     .generic_activity_with_arg("LLVM_module_codegen_embed_bitcode", &*module.name);
                 let thin_bc =
                     module.thin_lto_buffer.as_deref().expect("cannot find embedded bitcode");
-                embed_bitcode(cgcx, llcx, llmod, &config.bc_cmdline, &thin_bc);
+                embed_bitcode(cgcx, llcx, llmod, &thin_bc);
             }
         }
 
@@ -1058,7 +1058,6 @@ fn embed_bitcode(
     cgcx: &CodegenContext<LlvmCodegenBackend>,
     llcx: &llvm::Context,
     llmod: &llvm::Module,
-    cmdline: &str,
     bitcode: &[u8],
 ) {
     // We're adding custom sections to the output object file, but we definitely
@@ -1074,7 +1073,9 @@ fn embed_bitcode(
     // * Mach-O - this is for macOS. Inspecting the source code for the native
     //   linker here shows that the `.llvmbc` and `.llvmcmd` sections are
     //   automatically skipped by the linker. In that case there's nothing extra
-    //   that we need to do here.
+    //   that we need to do here. We do need to make sure that the
+    //   `__LLVM,__cmdline` section exists even though it is empty as otherwise
+    //   ld64 rejects the object file.
     //
     // * Wasm - the native LLD linker is hard-coded to skip `.llvmbc` and
     //   `.llvmcmd` sections, so there's nothing extra we need to do.
@@ -1111,7 +1112,7 @@ fn embed_bitcode(
         llvm::set_linkage(llglobal, llvm::Linkage::PrivateLinkage);
         llvm::LLVMSetGlobalConstant(llglobal, llvm::True);
 
-        let llconst = common::bytes_in_context(llcx, cmdline.as_bytes());
+        let llconst = common::bytes_in_context(llcx, &[]);
         let llglobal = llvm::add_global(llmod, common::val_ty(llconst), c"rustc.embedded.cmdline");
         llvm::set_initializer(llglobal, llconst);
         let section = if cgcx.target_is_like_darwin {
@@ -1128,7 +1129,7 @@ fn embed_bitcode(
         let section_flags = if cgcx.is_pe_coff { "n" } else { "e" };
         let asm = create_section_with_flags_asm(".llvmbc", section_flags, bitcode);
         llvm::append_module_inline_asm(llmod, &asm);
-        let asm = create_section_with_flags_asm(".llvmcmd", section_flags, cmdline.as_bytes());
+        let asm = create_section_with_flags_asm(".llvmcmd", section_flags, &[]);
         llvm::append_module_inline_asm(llmod, &asm);
     }
 }
diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs
index aa29afb7f5b11..7ceb9885716eb 100644
--- a/compiler/rustc_codegen_ssa/src/back/write.rs
+++ b/compiler/rustc_codegen_ssa/src/back/write.rs
@@ -76,12 +76,9 @@ pub struct ModuleConfig {
     /// Names of additional optimization passes to run.
     pub passes: Vec<String>,
     /// Some(level) to optimize at a certain level, or None to run
-    /// absolutely no optimizations (used for the metadata module).
+    /// absolutely no optimizations (used for the allocator module).
     pub opt_level: Option<config::OptLevel>,
 
-    /// Some(level) to optimize binary size, or None to not affect program size.
-    pub opt_size: Option<config::OptLevel>,
-
     pub pgo_gen: SwitchWithOptPath,
     pub pgo_use: Option<PathBuf>,
     pub pgo_sample_use: Option<PathBuf>,
@@ -102,7 +99,6 @@ pub struct ModuleConfig {
     pub emit_obj: EmitObj,
     pub emit_thin_lto: bool,
     pub emit_thin_lto_summary: bool,
-    pub bc_cmdline: String,
 
     // Miscellaneous flags. These are mostly copied from command-line
     // options.
@@ -110,7 +106,6 @@ pub struct ModuleConfig {
     pub lint_llvm_ir: bool,
     pub no_prepopulate_passes: bool,
     pub no_builtins: bool,
-    pub time_module: bool,
     pub vectorize_loop: bool,
     pub vectorize_slp: bool,
     pub merge_functions: bool,
@@ -171,7 +166,6 @@ impl ModuleConfig {
             passes: if_regular!(sess.opts.cg.passes.clone(), vec![]),
 
             opt_level: opt_level_and_size,
-            opt_size: opt_level_and_size,
 
             pgo_gen: if_regular!(
                 sess.opts.cg.profile_generate.clone(),
@@ -221,17 +215,12 @@ impl ModuleConfig {
                 sess.opts.output_types.contains_key(&OutputType::ThinLinkBitcode),
                 false
             ),
-            bc_cmdline: sess.target.bitcode_llvm_cmdline.to_string(),
 
             verify_llvm_ir: sess.verify_llvm_ir(),
             lint_llvm_ir: sess.opts.unstable_opts.lint_llvm_ir,
             no_prepopulate_passes: sess.opts.cg.no_prepopulate_passes,
             no_builtins: no_builtins || sess.target.no_builtins,
 
-            // Exclude metadata and allocator modules from time_passes output,
-            // since they throw off the "LLVM passes" measurement.
-            time_module: if_regular!(true, false),
-
             // Copy what clang does by turning on loop vectorization at O2 and
             // slp vectorization at O3.
             vectorize_loop: !sess.opts.cg.no_vectorize_loops
@@ -1740,7 +1729,7 @@ fn spawn_work<'a, B: ExtraBackendMethods>(
     llvm_start_time: &mut Option<VerboseTimingGuard<'a>>,
     work: WorkItem<B>,
 ) {
-    if cgcx.config(work.module_kind()).time_module && llvm_start_time.is_none() {
+    if llvm_start_time.is_none() {
         *llvm_start_time = Some(cgcx.prof.verbose_generic_activity("LLVM_passes"));
     }
 
diff --git a/compiler/rustc_codegen_ssa/src/base.rs b/compiler/rustc_codegen_ssa/src/base.rs
index b4556ced0b3fb..10a25bb3caaa1 100644
--- a/compiler/rustc_codegen_ssa/src/base.rs
+++ b/compiler/rustc_codegen_ssa/src/base.rs
@@ -681,33 +681,6 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
 
     let ongoing_codegen = start_async_codegen(backend.clone(), tcx, target_cpu, autodiff_items);
 
-    // Codegen an allocator shim, if necessary.
-    if let Some(kind) = allocator_kind_for_codegen(tcx) {
-        let llmod_id =
-            cgu_name_builder.build_cgu_name(LOCAL_CRATE, &["crate"], Some("allocator")).to_string();
-        let module_llvm = tcx.sess.time("write_allocator_module", || {
-            backend.codegen_allocator(
-                tcx,
-                &llmod_id,
-                kind,
-                // If allocator_kind is Some then alloc_error_handler_kind must
-                // also be Some.
-                tcx.alloc_error_handler_kind(()).unwrap(),
-            )
-        });
-
-        ongoing_codegen.wait_for_signal_to_codegen_item();
-        ongoing_codegen.check_for_errors(tcx.sess);
-
-        // These modules are generally cheap and won't throw off scheduling.
-        let cost = 0;
-        submit_codegened_module_to_llvm(
-            &ongoing_codegen.coordinator,
-            ModuleCodegen::new_allocator(llmod_id, module_llvm),
-            cost,
-        );
-    }
-
     // For better throughput during parallel processing by LLVM, we used to sort
     // CGUs largest to smallest. This would lead to better thread utilization
     // by, for example, preventing a large CGU from being processed last and
@@ -823,6 +796,35 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
         }
     }
 
+    // Codegen an allocator shim, if necessary.
+    // Do this last to ensure the LLVM_passes timer doesn't start while no CGUs have been codegened
+    // yet for the backend to optimize.
+    if let Some(kind) = allocator_kind_for_codegen(tcx) {
+        let llmod_id =
+            cgu_name_builder.build_cgu_name(LOCAL_CRATE, &["crate"], Some("allocator")).to_string();
+        let module_llvm = tcx.sess.time("write_allocator_module", || {
+            backend.codegen_allocator(
+                tcx,
+                &llmod_id,
+                kind,
+                // If allocator_kind is Some then alloc_error_handler_kind must
+                // also be Some.
+                tcx.alloc_error_handler_kind(()).unwrap(),
+            )
+        });
+
+        ongoing_codegen.wait_for_signal_to_codegen_item();
+        ongoing_codegen.check_for_errors(tcx.sess);
+
+        // These modules are generally cheap and won't throw off scheduling.
+        let cost = 0;
+        submit_codegened_module_to_llvm(
+            &ongoing_codegen.coordinator,
+            ModuleCodegen::new_allocator(llmod_id, module_llvm),
+            cost,
+        );
+    }
+
     ongoing_codegen.codegen_finished(tcx);
 
     // Since the main thread is sometimes blocked during codegen, we keep track
diff --git a/compiler/rustc_target/src/spec/json.rs b/compiler/rustc_target/src/spec/json.rs
index d27c1929aef74..ba7a3a3969e38 100644
--- a/compiler/rustc_target/src/spec/json.rs
+++ b/compiler/rustc_target/src/spec/json.rs
@@ -167,7 +167,6 @@ impl Target {
         forward!(main_needs_argc_argv);
         forward!(has_thread_local);
         forward!(obj_is_bitcode);
-        forward!(bitcode_llvm_cmdline);
         forward_opt!(max_atomic_width);
         forward_opt!(min_atomic_width);
         forward!(atomic_cas);
@@ -359,7 +358,6 @@ impl ToJson for Target {
         target_option_val!(main_needs_argc_argv);
         target_option_val!(has_thread_local);
         target_option_val!(obj_is_bitcode);
-        target_option_val!(bitcode_llvm_cmdline);
         target_option_val!(min_atomic_width);
         target_option_val!(max_atomic_width);
         target_option_val!(atomic_cas);
@@ -552,7 +550,6 @@ struct TargetSpecJson {
     main_needs_argc_argv: Option<bool>,
     has_thread_local: Option<bool>,
     obj_is_bitcode: Option<bool>,
-    bitcode_llvm_cmdline: Option<StaticCow<str>>,
     max_atomic_width: Option<u64>,
     min_atomic_width: Option<u64>,
     atomic_cas: Option<bool>,
diff --git a/compiler/rustc_target/src/spec/mod.rs b/compiler/rustc_target/src/spec/mod.rs
index 033590e01a67d..0c7ff7dd16564 100644
--- a/compiler/rustc_target/src/spec/mod.rs
+++ b/compiler/rustc_target/src/spec/mod.rs
@@ -2620,8 +2620,6 @@ pub struct TargetOptions {
     /// If we give emcc .o files that are actually .bc files it
     /// will 'just work'.
     pub obj_is_bitcode: bool,
-    /// Content of the LLVM cmdline section associated with embedded bitcode.
-    pub bitcode_llvm_cmdline: StaticCow<str>,
 
     /// Don't use this field; instead use the `.min_atomic_width()` method.
     pub min_atomic_width: Option<u64>,
@@ -2984,7 +2982,6 @@ impl Default for TargetOptions {
             allow_asm: true,
             has_thread_local: false,
             obj_is_bitcode: false,
-            bitcode_llvm_cmdline: "".into(),
             min_atomic_width: None,
             max_atomic_width: None,
             atomic_cas: true,