CNOT gate implementation using cuStateVec. The example prepares the Bell state $\lvert\Phi^+\rangle = \frac{1}{\sqrt{2}}(\lvert 00\rangle + \lvert 11\rangle)$.
// Compile: nvcc main.cu -o main -lcustatevec // Run: ./main #include <stdio.h> #include <math.h> #include <cuda_runtime.h> #include <custatevec.h> int main() { const int nQubits = 2; const int dim = 1 << nQubits; cuDoubleComplex h_sv[4] = {{1,0},{0,0},{0,0},{0,0}}; // |00> cuDoubleComplex *d_sv; cudaMalloc(&d_sv, dim * sizeof(cuDoubleComplex)); cudaMemcpy(d_sv, h_sv, dim * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice); custatevecHandle_t handle; custatevecCreate(&handle); // Apply H to qubit 0: |00> -> |+0> double s = M_SQRT1_2; cuDoubleComplex h_gate[4] = {{s,0},{s,0},{s,0},{-s,0}}; int32_t h_targets[] = {0}; custatevecApplyMatrix( handle, d_sv, CUDA_C_64F, nQubits, h_gate, CUDA_C_64F, CUSTATEVEC_MATRIX_LAYOUT_ROW, 0, h_targets, 1, NULL, NULL, 0, CUSTATEVEC_COMPUTE_64F, NULL, 0); // Apply CNOT: control=qubit 0, target=qubit 1 -> Bell state cuDoubleComplex x_gate[4] = {{0,0},{1,0},{1,0},{0,0}}; int32_t targets[] = {1}; int32_t controls[] = {0}; int32_t ctrl_vals[] = {1}; custatevecApplyMatrix( handle, d_sv, CUDA_C_64F, nQubits, x_gate, CUDA_C_64F, CUSTATEVEC_MATRIX_LAYOUT_ROW, 0, targets, 1, controls, ctrl_vals, 1, CUSTATEVEC_COMPUTE_64F, NULL, 0); cudaMemcpy(h_sv, d_sv, dim * sizeof(cuDoubleComplex), cudaMemcpyDeviceToHost); printf("|00>: (%.4f, %.4f)\n", cuCreal(h_sv[0]), cuCimag(h_sv[0])); printf("|01>: (%.4f, %.4f)\n", cuCreal(h_sv[1]), cuCimag(h_sv[1])); printf("|10>: (%.4f, %.4f)\n", cuCreal(h_sv[2]), cuCimag(h_sv[2])); printf("|11>: (%.4f, %.4f)\n", cuCreal(h_sv[3]), cuCimag(h_sv[3])); // |00>: (0.7071, 0.0000) // |01>: (0.0000, 0.0000) // |10>: (0.0000, 0.0000) // |11>: (0.7071, 0.0000) <- Bell state |Phi+> custatevecDestroy(handle); cudaFree(d_sv); return 0; }