diff --git a/csrc/inject/LibraryInjector.cpp b/csrc/inject/LibraryInjector.cpp
index a17e9f8..9871fe3 100644
--- a/csrc/inject/LibraryInjector.cpp
+++ b/csrc/inject/LibraryInjector.cpp
@@ -235,10 +235,18 @@ LibraryInjector::initializeInjectionEnvironment(long &code_injection_address,
   // Copy original registers to working registers
   *working_registers = *original_registers;
 
-  // Find a good address to copy code to
+  // Find a good address to copy code to.
+  // The findFreeMemoryAddress function returns the END of the first executable
+  // region minus a safe offset, placing shellcode in the alignment padding area
+  // that is typically unused but still has execute permissions.
   code_injection_address =
       ProcessUtils::findFreeMemoryAddress(target_process_id_) + 8;
 
+  if (process_tracer_.isDebugMode()) {
+    std::cout << "[DEBUG] PyFlightProfiler: Using injection address at 0x"
+              << std::hex << code_injection_address << std::dec << std::endl;
+  }
+
   // Set the target's rip to the injection address
   // Advance by 2 bytes because rip gets incremented by the size of the current
   // instruction
diff --git a/csrc/inject/ProcessTracer.cpp b/csrc/inject/ProcessTracer.cpp
index 988a354..7ccdb50 100644
--- a/csrc/inject/ProcessTracer.cpp
+++ b/csrc/inject/ProcessTracer.cpp
@@ -96,6 +96,28 @@ bool ProcessTracer::continueExecution() {
   int result = ptrace(PTRACE_CONT, process_id_, NULL, NULL);
   CHECK_PTRACE_RESULT(result, PTRACE_CONT);
 
+  // Wait for the target process to stop (e.g., hit INT3 breakpoint)
+  int wait_status;
+  pid_t waited_pid = waitpid(process_id_, &wait_status, 0);
+  if (waited_pid != process_id_) {
+    if (debug_mode_) {
+      std::cerr << "[ERROR] PyFlightProfiler: waitpid(" << process_id_
+                << ") failed or returned unexpected pid " << waited_pid << ": "
+                << strerror(errno) << std::endl;
+    }
+    return false;
+  }
+
+  // Check if the process stopped (not exited or terminated)
+  if (!WIFSTOPPED(wait_status)) {
+    if (debug_mode_) {
+      std::cerr << "[ERROR] PyFlightProfiler: process " << process_id_
+                << " did not stop as expected, wait_status=" << wait_status
+                << std::endl;
+    }
+    return false;
+  }
+
   // Make sure the target process received SIGTRAP after stopping.
   return verifySignalStatus();
 }
@@ -201,16 +223,15 @@ bool ProcessTracer::writeMemory(unsigned long address, const void *buffer,
  * @return siginfo_t structure containing signal information
  */
 siginfo_t ProcessTracer::getSignalInfo() {
-  sleepMs(5);
-
   siginfo_t signal_info;
-  // When PTRACE_GETSIGINFO returns -1, tracee may not reach int3 point, so
-  // spin on it waiting at most 500ms
-  for (int i = 0; i < 100; i++) {
+  // With waitpid() in continueExecution(), the process should already be
+  // stopped. Retry a few times just in case, but much shorter timeout is
+  // needed.
+  for (int i = 0; i < 10; i++) {
     if (ptrace(PTRACE_GETSIGINFO, process_id_, NULL, &signal_info) != -1) {
       return signal_info;
     }
-    sleepMs(5);
+    sleepMs(1);
   }
 
   // this is mostly due to gil lock not released, so injected code cannot
diff --git a/csrc/inject/ProcessUtils.cpp b/csrc/inject/ProcessUtils.cpp
index 1c412ef..1517a56 100644
--- a/csrc/inject/ProcessUtils.cpp
+++ b/csrc/inject/ProcessUtils.cpp
@@ -15,10 +15,16 @@
 /**
  * @brief Find a free memory address in the target process
  *
- * Parses /proc/[pid]/maps to find a memory region with execute permissions.
+ * Parses /proc/[pid]/maps to find the END of the first executable memory
+ * region. We use the end of the region (with a small offset back) because:
+ * 1. The end of code segments typically has alignment padding (unused space)
+ * 2. This avoids overwriting active code at the beginning of the segment
+ * 3. The padding area is still executable (same permissions as the code
+ * segment)
  *
  * @param process_id PID of the target process
- * @return Address of free memory, or 0 on failure
+ * @return Address of free memory (end of executable region minus offset), or 0
+ * on failure
  */
 long ProcessUtils::findFreeMemoryAddress(pid_t process_id) {
   std::string filename = "/proc/" + std::to_string(process_id) + "/maps";
@@ -30,27 +36,40 @@ long ProcessUtils::findFreeMemoryAddress(pid_t process_id) {
   }
 
   std::string line;
-  long address = 0;
+  long end_address = 0;
 
   while (std::getline(maps_file, line)) {
     std::istringstream iss(line);
     std::string range, permissions, offset, device, inode, path;
 
     if (iss >> range >> permissions >> offset >> device >> inode) {
-      // Extract address from range (format: address1-address2)
-      size_t dash_pos = range.find('-');
-      if (dash_pos != std::string::npos) {
-        std::string address_str = range.substr(0, dash_pos);
-        address = std::stol(address_str, nullptr, 16);
-      }
-
+      // Check if this is an executable region
       if (permissions.find('x') != std::string::npos) {
+        // Extract end address from range (format: start_address-end_address)
+        size_t dash_pos = range.find('-');
+        if (dash_pos != std::string::npos) {
+          std::string end_address_str = range.substr(dash_pos + 1);
+          end_address = std::stol(end_address_str, nullptr, 16);
+        }
         break;
       }
     }
   }
 
-  return address;
+  // Calculate the minimum safe offset for shellcode injection.
+  // The shellcode (inject_shared_library function) is approximately:
+  // - ~80 bytes of assembly instructions (stack ops, calls, int3 breakpoints)
+  // - +2 bytes NOP prefix (for syscall restart handling)
+  // - +16 bytes alignment padding (x86_64 ABI requires 16-byte stack alignment)
+  // - +16 bytes safety margin
+  // Total: ~114 bytes, rounded up to 128 bytes (0x80) for 16-byte alignment
+  const long SHELLCODE_MIN_SIZE = 128;
+
+  // Use the end of the executable region minus the minimum required offset.
+  // This minimizes the risk of overwriting active code while ensuring enough
+  // space for the shellcode in the alignment padding area.
+  return (end_address > SHELLCODE_MIN_SIZE) ? (end_address - SHELLCODE_MIN_SIZE)
+                                            : 0;
 }
 
 /**