Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions tensorflow_quantum/core/ops/noise/noisy_expectation_op_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,16 @@ def test_noisy_expectation_inputs(self):
[[-1]] * batch_size)
# pylint: enable=too-many-function-args

with self.assertRaisesRegex(tf.errors.InvalidArgumentError,
'less than or equal to 10,000,000'):
# pylint: disable=too-many-function-args
noisy_expectation_op.expectation(
util.convert_to_tensor(circuit_batch), symbol_names,
symbol_values_array,
util.convert_to_tensor([[x] for x in pauli_sums]),
[[20000000]] * batch_size)
# pylint: enable=too-many-function-args

with self.assertRaisesRegex(tf.errors.InvalidArgumentError,
expected_regex='do not match'):
# wrong symbol_values size.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,16 @@ def test_noisy_expectation_inputs(self):
[[-1]] * batch_size)
# pylint: enable=too-many-function-args

with self.assertRaisesRegex(tf.errors.InvalidArgumentError,
'less than or equal to 10,000,000'):
# pylint: disable=too-many-function-args
noisy_sampled_expectation_op.sampled_expectation(
util.convert_to_tensor(circuit_batch), symbol_names,
symbol_values_array,
util.convert_to_tensor([[x] for x in pauli_sums]),
[[20000000]] * batch_size)
# pylint: enable=too-many-function-args

with self.assertRaisesRegex(tf.errors.InvalidArgumentError,
expected_regex='do not match'):
# wrong symbol_values size.
Expand Down
7 changes: 7 additions & 0 deletions tensorflow_quantum/core/ops/noise/noisy_samples_op_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,13 @@ def test_simulate_samples_inputs(self):
symbol_names, symbol_values_array,
['junk'])

with self.assertRaisesRegex(tf.errors.InvalidArgumentError,
'must be between 0 and 10,000,000'):
# num_samples is too large (above capacity limit).
noisy_samples_op.samples(util.convert_to_tensor(circuit_batch),
symbol_names, symbol_values_array,
[20000000])

with self.assertRaisesRegex(TypeError, 'missing'):
# too few tensors.
# pylint: disable=no-value-for-parameter
Expand Down
43 changes: 24 additions & 19 deletions tensorflow_quantum/core/ops/noise/tfq_noisy_expectation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,8 @@ class TfqNoisyExpectationOp : public tensorflow::OpKernel {

Status parse_status = ::tensorflow::Status();
auto p_lock = absl::Mutex();
auto construct_f = [&](int64_t start, int64_t end) {
for (int64_t i = start; i < end; i++) {
auto construct_f = [&](int start, int end) {
for (int i = start; i < end; i++) {
Status local = NoisyQsimCircuitFromProgram(
programs[i], maps[i], num_qubits[i], false, &qsim_circuits[i]);
NESTED_FN_STATUS_SYNC(parse_status, local, p_lock);
Expand All @@ -132,8 +132,8 @@ class TfqNoisyExpectationOp : public tensorflow::OpKernel {
programs.size(), num_cycles, construct_f);
OP_REQUIRES_OK(context, parse_status);

uint64_t max_num_qubits = 0;
for (const uint64_t num : num_qubits) {
int max_num_qubits = 0;

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we avoid downgrading these ?

for (const int num : num_qubits) {
max_num_qubits = std::max(max_num_qubits, num);
}

Expand Down Expand Up @@ -173,7 +173,7 @@ class TfqNoisyExpectationOp : public tensorflow::OpKernel {
qsim::MultiQubitGateFuser, Simulator>;

// Begin simulation.
uint64_t largest_nq = 1;
int largest_nq = 1;
Simulator sim = Simulator(tfq_for);
StateSpace ss = StateSpace(tfq_for);
auto sv = ss.Create(largest_nq);
Expand All @@ -187,15 +187,16 @@ class TfqNoisyExpectationOp : public tensorflow::OpKernel {
}
}
random_gen.Init(tensorflow::random::New64(), tensorflow::random::New64());
auto local_gen =
random_gen.ReserveSamples128(ncircuits.size() * max_n_shots + 1);
int64_t num_samples_needed =
static_cast<int64_t>(ncircuits.size()) * max_n_shots + 1;
auto local_gen = random_gen.ReserveSamples128(num_samples_needed);
tensorflow::random::SimplePhilox rand_source(&local_gen);

// Simulate programs one by one. Parallelizing over state vectors
// we no longer parallelize over circuits. Each time we encounter a
// a larger circuit we will grow the Statevector as necessary.
for (size_t i = 0; i < ncircuits.size(); i++) {
uint64_t nq = num_qubits[i];
int nq = num_qubits[i];

// (#679) Just ignore empty program
if (ncircuits[i].channels.size() == 0) {
Expand Down Expand Up @@ -256,7 +257,7 @@ class TfqNoisyExpectationOp : public tensorflow::OpKernel {
}

void ComputeSmall(const std::vector<int>& num_qubits,
const uint64_t max_num_qubits,
const int max_num_qubits,
const std::vector<NoisyQsimCircuit>& ncircuits,
const std::vector<std::vector<PauliSum>>& pauli_sums,
const std::vector<std::vector<int>>& num_samples,
Expand Down Expand Up @@ -294,23 +295,23 @@ class TfqNoisyExpectationOp : public tensorflow::OpKernel {

Status compute_status = ::tensorflow::Status();
auto c_lock = absl::Mutex();
auto DoWork = [&](int64_t start, int64_t end) {
auto DoWork = [&](int start, int end) {
// Begin simulation.
const auto tfq_for = qsim::SequentialFor(1);
uint64_t largest_nq = 1;
int largest_nq = 1;
Simulator sim = Simulator(tfq_for);
StateSpace ss = StateSpace(tfq_for);
auto sv = ss.Create(largest_nq);
auto scratch = ss.Create(largest_nq);

int n_rand = ncircuits.size() * max_n_shots + 1;
int64_t n_rand = static_cast<int64_t>(ncircuits.size()) * max_n_shots + 1;
n_rand = (n_rand + num_threads) / num_threads;
auto local_gen =
random_gen.ReserveSamples128(ncircuits.size() * max_n_shots + 1);
auto local_gen = random_gen.ReserveSamples128(
static_cast<int64_t>(ncircuits.size()) * max_n_shots + 1);
Comment on lines +309 to +310

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

In ComputeSmall, each thread running DoWork should only reserve its portion of the random samples (n_rand) from the shared random_gen. Currently, n_rand is calculated but unused, and instead each thread reserves the entire undivided amount (static_cast<int64_t>(ncircuits.size()) * max_n_shots + 1). This results in reserving num_threads times more samples than actually needed, wasting the random number generator's state space and causing unnecessary overhead.

      auto local_gen = random_gen.ReserveSamples128(n_rand);

Comment on lines +307 to +310

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

In ComputeSmall, each thread in the parallel loop reserves random samples from the global generator. However, instead of reserving the per-thread required samples (n_rand), each thread is reserving the total combined samples for all threads (static_cast<int64_t>(ncircuits.size()) * max_n_shots + 1). This leads to excessive skipping of the Philox random number generator state and unnecessary overhead. Using n_rand directly (as is correctly done in tfq_noisy_sampled_expectation.cc) resolves this inefficiency.

      int64_t n_rand = static_cast<int64_t>(ncircuits.size()) * max_n_shots + 1;
      n_rand = (n_rand + num_threads) / num_threads;
      auto local_gen = random_gen.ReserveSamples128(n_rand);

tensorflow::random::SimplePhilox rand_source(&local_gen);

for (size_t i = 0; i < ncircuits.size(); i++) {
uint64_t nq = num_qubits[i];
int nq = num_qubits[i];
int rep_offset = rep_offsets[start][i];

// (#679) Just ignore empty program
Expand Down Expand Up @@ -343,8 +344,10 @@ class TfqNoisyExpectationOp : public tensorflow::OpKernel {

// Compute expectations across all ops using this trajectory.
for (size_t j = 0; j < pauli_sums[i].size(); j++) {
int p_reps = (num_samples[i][j] + num_threads - 1) / num_threads;
if (run_samples[j] >= p_reps + rep_offset) {
int64_t p_reps =
(static_cast<int64_t>(num_samples[i][j]) + num_threads - 1) /
num_threads;
if (static_cast<int64_t>(run_samples[j]) >= p_reps + rep_offset) {
continue;
}
float exp_v = 0.0;
Expand All @@ -360,8 +363,10 @@ class TfqNoisyExpectationOp : public tensorflow::OpKernel {
// Check if we have run enough trajectories for all ops.
bool break_loop = true;
for (size_t j = 0; j < num_samples[i].size(); j++) {
int p_reps = (num_samples[i][j] + num_threads - 1) / num_threads;
if (run_samples[j] < p_reps + rep_offset) {
int64_t p_reps =
(static_cast<int64_t>(num_samples[i][j]) + num_threads - 1) /
num_threads;
if (static_cast<int64_t>(run_samples[j]) < p_reps + rep_offset) {
break_loop = false;
break;
}
Expand Down
40 changes: 23 additions & 17 deletions tensorflow_quantum/core/ops/noise/tfq_noisy_sampled_expectation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,8 @@ class TfqNoisySampledExpectationOp : public tensorflow::OpKernel {

Status parse_status = ::tensorflow::Status();
auto p_lock = absl::Mutex();
auto construct_f = [&](int64_t start, int64_t end) {
for (int64_t i = start; i < end; i++) {
auto construct_f = [&](int start, int end) {
for (int i = start; i < end; i++) {
Status local = NoisyQsimCircuitFromProgram(
programs[i], maps[i], num_qubits[i], false, &qsim_circuits[i]);
NESTED_FN_STATUS_SYNC(parse_status, local, p_lock);
Expand All @@ -133,8 +133,8 @@ class TfqNoisySampledExpectationOp : public tensorflow::OpKernel {
programs.size(), num_cycles, construct_f);
OP_REQUIRES_OK(context, parse_status);

uint64_t max_num_qubits = 0;
for (const uint64_t num : num_qubits) {
int max_num_qubits = 0;
for (const int num : num_qubits) {
max_num_qubits = std::max(max_num_qubits, num);
}

Expand Down Expand Up @@ -174,7 +174,7 @@ class TfqNoisySampledExpectationOp : public tensorflow::OpKernel {
qsim::MultiQubitGateFuser, Simulator>;

// Begin simulation.
uint64_t largest_nq = 1;
int largest_nq = 1;
Simulator sim = Simulator(tfq_for);
StateSpace ss = StateSpace(tfq_for);
auto sv = ss.Create(largest_nq);
Expand All @@ -191,15 +191,16 @@ class TfqNoisySampledExpectationOp : public tensorflow::OpKernel {
}
}
random_gen.Init(tensorflow::random::New64(), tensorflow::random::New64());
auto local_gen = random_gen.ReserveSamples128(
ncircuits.size() * (1 + max_psum_length) * max_n_shots);
int64_t num_samples_needed = static_cast<int64_t>(ncircuits.size()) *
(1 + max_psum_length) * max_n_shots;
auto local_gen = random_gen.ReserveSamples128(num_samples_needed);
tensorflow::random::SimplePhilox rand_source(&local_gen);

// Simulate programs one by one. Parallelizing over state vectors
// we no longer parallelize over circuits. Each time we encounter a
// a larger circuit we will grow the Statevector as necessary.
for (size_t i = 0; i < ncircuits.size(); i++) {
uint64_t nq = num_qubits[i];
int nq = num_qubits[i];

// (#679) Just ignore empty program
if (ncircuits[i].channels.empty()) {
Expand Down Expand Up @@ -260,7 +261,7 @@ class TfqNoisySampledExpectationOp : public tensorflow::OpKernel {
}

void ComputeSmall(const std::vector<int>& num_qubits,
const uint64_t max_num_qubits,
const int max_num_qubits,
const std::vector<NoisyQsimCircuit>& ncircuits,
const std::vector<std::vector<PauliSum>>& pauli_sums,
const std::vector<std::vector<int>>& num_samples,
Expand Down Expand Up @@ -301,22 +302,23 @@ class TfqNoisySampledExpectationOp : public tensorflow::OpKernel {

Status compute_status = ::tensorflow::Status();
auto c_lock = absl::Mutex();
auto DoWork = [&](int64_t start, int64_t end) {
auto DoWork = [&](int start, int end) {
// Begin simulation.
const auto tfq_for = qsim::SequentialFor(1);
uint64_t largest_nq = 1;
int largest_nq = 1;
Simulator sim = Simulator(tfq_for);
StateSpace ss = StateSpace(tfq_for);
auto sv = ss.Create(largest_nq);
auto scratch = ss.Create(largest_nq);

int num_rand = ncircuits.size() * (1 + max_psum_length) * max_n_shots;
int64_t num_rand = static_cast<int64_t>(ncircuits.size()) *
(1 + max_psum_length) * max_n_shots;
num_rand = (num_rand + num_threads) / num_threads + 1;
auto local_gen = random_gen.ReserveSamples128(num_rand);
tensorflow::random::SimplePhilox rand_source(&local_gen);

for (size_t i = 0; i < ncircuits.size(); i++) {
uint64_t nq = num_qubits[i];
int nq = num_qubits[i];
int rep_offset = rep_offsets[start][i];

// (#679) Just ignore empty program
Expand Down Expand Up @@ -349,8 +351,10 @@ class TfqNoisySampledExpectationOp : public tensorflow::OpKernel {

// Compute expectations across all ops using this trajectory.
for (size_t j = 0; j < pauli_sums[i].size(); j++) {
int p_reps = (num_samples[i][j] + num_threads - 1) / num_threads;
if (run_samples[j] >= p_reps + rep_offset) {
int64_t p_reps =
(static_cast<int64_t>(num_samples[i][j]) + num_threads - 1) /
num_threads;
if (static_cast<int64_t>(run_samples[j]) >= p_reps + rep_offset) {
continue;
}
float exp_v = 0.0;
Expand All @@ -366,8 +370,10 @@ class TfqNoisySampledExpectationOp : public tensorflow::OpKernel {
// Check if we have run enough trajectories for all ops.
bool break_loop = true;
for (size_t j = 0; j < num_samples[i].size(); j++) {
int p_reps = (num_samples[i][j] + num_threads - 1) / num_threads;
if (run_samples[j] < p_reps + rep_offset) {
int64_t p_reps =
(static_cast<int64_t>(num_samples[i][j]) + num_threads - 1) /
num_threads;
if (static_cast<int64_t>(run_samples[j]) < p_reps + rep_offset) {
break_loop = false;
break;
}
Expand Down
Loading