iSWAP gate implementation using cuStateVec. The example prepares $\lvert 10\rangle$ and applies iSWAP to produce $i\lvert 01\rangle$.
// Compile: nvcc main.cu -o main -lcustatevec // Run: ./main #include <stdio.h> #include <cuda_runtime.h> #include <custatevec.h> int main() { const int nQubits = 2; const int dim = 1 << nQubits; // Prepare |10>: q0=1, q1=0 -> index 2 (binary 10) cuDoubleComplex h_sv[4] = {0}; h_sv[2] = make_cuDoubleComplex(1.0, 0.0); cuDoubleComplex *d_sv; cudaMalloc(&d_sv, dim * sizeof(cuDoubleComplex)); cudaMemcpy(d_sv, h_sv, dim * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice); custatevecHandle_t handle; custatevecCreate(&handle); // iSWAP gate matrix (4x4, row-major) cuDoubleComplex gate[16] = { {1,0},{0,0},{0,0},{0,0}, {0,0},{0,0},{0,1},{0,0}, {0,0},{0,1},{0,0},{0,0}, {0,0},{0,0},{0,0},{1,0} }; int32_t targets[] = {0, 1}; custatevecApplyMatrix( handle, d_sv, CUDA_C_64F, nQubits, gate, CUDA_C_64F, CUSTATEVEC_MATRIX_LAYOUT_ROW, 0, targets, 2, NULL, NULL, 0, CUSTATEVEC_COMPUTE_64F, NULL, 0); cudaMemcpy(h_sv, d_sv, dim * sizeof(cuDoubleComplex), cudaMemcpyDeviceToHost); printf("|01>: (%.4f, %.4f)\n", cuCreal(h_sv[1]), cuCimag(h_sv[1])); printf("|10>: (%.4f, %.4f)\n", cuCreal(h_sv[2]), cuCimag(h_sv[2])); // |01>: (0.0000, 1.0000) <- i|01> // |10>: (0.0000, 0.0000) custatevecDestroy(handle); cudaFree(d_sv); return 0; }