Z gate implementation using cuStateVec. The Z gate applied to $\lvert 0\rangle$ is invisible at measurement (it leaves $\lvert 0\rangle$ unchanged), so this example prepares $\lvert +\rangle$ first to make the phase flip observable via interference.
// Compile: nvcc main.cu -o main -lcustatevec // Run: ./main #include <stdio.h> #include <math.h> #include <cuda_runtime.h> #include <custatevec.h> int main() { const int nQubits = 1; const int dim = 1 << nQubits; cuDoubleComplex h_sv[2] = {{1,0},{0,0}}; // |0> cuDoubleComplex *d_sv; cudaMalloc(&d_sv, dim * sizeof(cuDoubleComplex)); cudaMemcpy(d_sv, h_sv, dim * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice); custatevecHandle_t handle; custatevecCreate(&handle); int32_t targets[] = {0}; // Apply H: |0> -> |+> double s = M_SQRT1_2; cuDoubleComplex h_gate[4] = {{s,0},{s,0},{s,0},{-s,0}}; custatevecApplyMatrix( handle, d_sv, CUDA_C_64F, nQubits, h_gate, CUDA_C_64F, CUSTATEVEC_MATRIX_LAYOUT_ROW, 0, targets, 1, NULL, NULL, 0, CUSTATEVEC_COMPUTE_64F, NULL, 0); // Apply Z: |+> -> |-> cuDoubleComplex z_gate[4] = {{1,0},{0,0},{0,0},{-1,0}}; custatevecApplyMatrix( handle, d_sv, CUDA_C_64F, nQubits, z_gate, CUDA_C_64F, CUSTATEVEC_MATRIX_LAYOUT_ROW, 0, targets, 1, NULL, NULL, 0, CUSTATEVEC_COMPUTE_64F, NULL, 0); cudaMemcpy(h_sv, d_sv, dim * sizeof(cuDoubleComplex), cudaMemcpyDeviceToHost); for (int i = 0; i < dim; i++) printf("|%d>: (%.4f, %.4f)\n", i, cuCreal(h_sv[i]), cuCimag(h_sv[i])); // |0>: ( 0.7071, 0.0000) // |1>: (-0.7071, 0.0000) <- |-> custatevecDestroy(handle); cudaFree(d_sv); return 0; }