20 REAL(KIND=8),
DIMENSION(:,:,:),
INTENT(IN) :: v1_in
21 REAL(KIND=8),
DIMENSION(:,:,:),
ALLOCATABLE :: v_out
24 INTEGER,
OPTIONAL :: opt_nb_plane
25 INTEGER :: np, bloc_size, nb_field, &
26 m_max, m_max_c, rang, nb_procs, mpid, m_max_pad, n_r_pad
27 INTEGER(KIND=8) :: fftw_plan_multi_c2r
28 COMPLEX(KIND=8),
ALLOCATABLE,
DIMENSION(:,:,:) :: cu
29 INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code
30 REAL(KIND=8),
ALLOCATABLE,
DIMENSION(:,:,:) :: dist_field, combined_field
31 INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
32 INTEGER,
DIMENSION(1) :: dim, inembed, onembed
35 #include "petsc/finclude/petsc.h"
36 mpi_comm :: communicator
38 CALL mpi_comm_size(communicator, nb_procs, code)
39 CALL mpi_comm_rank(communicator, rang, code)
42 nb_field =
SIZE(v1_in,2)
43 m_max_c =
SIZE(v1_in,3)
44 m_max = m_max_c*nb_procs
45 IF (mod(nb_field,2)/=0 .OR. m_max_c==0)
THEN
46 CALL
error_petsc(.OR.
'Bug in FFT_PAR_REAL: MOD(nb_field,2)/=0 m_max_c==0')
51 IF (modulo(np,nb_procs)==0)
THEN
52 bloc_size = np/nb_procs
54 CALL
error_petsc(
'Bug in FFT_PAR_REAL: np is not a multiple of nb_procs')
57 IF (present(opt_nb_plane))
THEN
58 IF (opt_nb_plane> 2*m_max-1)
THEN
59 m_max_pad = (opt_nb_plane+1)/2
68 ALLOCATE(cu(m_max_pad,nb_field/2, bloc_size))
69 ALLOCATE(dist_field(m_max_c,nb_field,np))
70 ALLOCATE(combined_field(m_max_c,nb_field,np))
73 dist_field(i,:,:) = transpose(v1_in(:,:,i))
76 longueur_tranche=bloc_size*m_max_c*nb_field
78 mpid=mpi_double_precision
79 CALL mpi_alltoall(dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
80 mpid, communicator, code)
85 shiftc = (nb-1)*bloc_size
86 shiftl = (nb-1)*m_max_c
93 cu(shiftl+1:shiftl+m_max_c,nf,n) = cmplx(combined_field(:,2*nf-1,jindex),&
94 -combined_field(:,2*nf,jindex),kind=8)/2
98 cu(1,:,:) = 2*cmplx(
REAL(cu(1,:,:),KIND=8),0.d0,kind=8)
110 onembed(1)= m_max_pad
111 howmany = bloc_size*nb_field/2
117 IF (
ALLOCATED(v_out))
DEALLOCATE(v_out)
118 ALLOCATE(v_out(n_r_pad,nb_field/2,bloc_size))
120 CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
121 onembed, ostride, odist, v_out, inembed, istride, idist, fftw_estimate)
122 CALL dfftw_execute(fftw_plan_multi_c2r)
124 DEALLOCATE(cu, dist_field, combined_field)
135 REAL(KIND=8),
DIMENSION(:,:,:),
INTENT(IN) :: v1_in, v2_in
136 REAL(KIND=8),
DIMENSION(:,:,:),
INTENT(OUT) :: v_out
137 REAL(KIND=8),
DIMENSION(:),
OPTIONAL,
INTENT(INOUT) :: temps
138 INTEGER,
INTENT(IN) :: nb_procs, bloc_size, m_max_pad
140 INTEGER :: np, np_tot, nb_field, &
141 m_max, m_max_c, mpid, n_r_pad
142 INTEGER(KIND=8) :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
143 COMPLEX(KIND=8),
DIMENSION(m_max_pad,SIZE(V1_in,2),bloc_size) :: cu
144 COMPLEX(KIND=8),
DIMENSION(2*m_max_pad-1,SIZE(V1_in,2)/2,bloc_size) :: prod_cu
145 REAL(KIND=8),
DIMENSION(2*m_max_pad-1,SIZE(V1_in,2),bloc_size) :: ru
146 REAL(KIND=8),
DIMENSION(2*m_max_pad-1,SIZE(V1_in,2)/2,bloc_size) :: prod_ru
147 COMPLEX(KIND=8),
DIMENSION(SIZE(V1_in,2)/2,bloc_size) :: intermediate
148 REAL(KIND=8),
DIMENSION(SIZE(V1_in,3),2*SIZE(V1_in,2),bloc_size*nb_procs) :: dist_field, combined_field
149 COMPLEX(KIND=8),
DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu, dist_prod_cu
150 COMPLEX(KIND=8),
DIMENSION(SIZE(V1_in,3),bloc_size*nb_procs,SIZE(V1_in,2)/2) :: out_prod_cu
153 INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code
157 INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
158 INTEGER,
DIMENSION(1) :: dim, inembed, onembed
168 #include "petsc/finclude/petsc.h"
169 mpi_comm :: communicator
171 IF (present(temps)) temps = 0.d0
173 nb_field=
SIZE(v1_in,2)
174 m_max_c =
SIZE(v1_in,3)
175 m_max = m_max_c*nb_procs
176 np_tot = nb_procs*bloc_size
178 n_r_pad=2*m_max_pad-1
180 IF (mod(nb_field,2)/=0 .OR. m_max_c==0)
THEN
193 dist_field(i,1:nb_field,1:np) = transpose(v1_in(:,:,i))
194 dist_field(i,nb_field+1:2*nb_field,1:np) = transpose(v2_in(:,:,i))
196 IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -
t
198 IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 1.d100
200 longueur_tranche=bloc_size*m_max_c*nb_field*2
203 mpid=mpi_double_precision
204 CALL mpi_alltoall(dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
205 mpid, communicator, code)
206 IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -
t
214 shiftc = (nb-1)*bloc_size
215 shiftl = (nb-1)*m_max_c
223 cu(shiftl+1:shiftl+m_max_c,nf,n) = cmplx(combined_field(:,2*nf-1,jindex),&
224 -combined_field(:,2*nf,jindex),kind=8)/2
228 cu(1,:,:) = 2*cmplx(
REAL(cu(1,:,:),KIND=8),0.d0,kind=8)
234 IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -
t
237 fft_dim=1; istride=1; ostride=1;
241 idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
242 odist=m_max_pad; onembed(1)=m_max_pad
245 howmany=bloc_size*nb_field
248 CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
249 onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
251 CALL dfftw_execute(fftw_plan_multi_c2r)
254 IF (nb_field==6)
THEN
255 prod_ru(:,1,:) = ru(:,2,:)*ru(:,6,:) - ru(:,3,:)*ru(:,5,:)
256 prod_ru(:,2,:) = ru(:,3,:)*ru(:,4,:) - ru(:,1,:)*ru(:,6,:)
257 prod_ru(:,3,:) = ru(:,1,:)*ru(:,5,:) - ru(:,2,:)*ru(:,4,:)
262 CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru, &
263 inembed, istride, idist, prod_cu, onembed, ostride, odist, fftw_estimate)
265 CALL dfftw_execute(fftw_plan_multi_r2c)
268 prod_cu = prod_cu/n_r_pad
270 IF (present(temps)) temps(2) = temps(2) + mpi_wtime() -
t
275 combined_prod_cu(:,:,1)=prod_cu(1,:,:)
278 combined_prod_cu(:,:,n)=2*conjg(prod_cu(n,:,:))
281 IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -
t
284 longueur_tranche=bloc_size*m_max_c*nb_field
285 mpid=mpi_double_precision
286 CALL mpi_alltoall(combined_prod_cu,longueur_tranche,mpid, dist_prod_cu,longueur_tranche, &
287 mpid,communicator,code)
288 IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -
t
293 shiftc = (nb-1)*bloc_size
294 shiftl = (nb-1)*m_max_c
295 intermediate = dist_prod_cu(:,:,shiftl+i)
297 IF (n+shiftc > np ) cycle
298 DO i_field = 1, nb_field/2
299 v_out(n+shiftc, i_field*2-1, i) =
REAL (intermediate(i_field,n),kind=8)
300 v_out(n+shiftc, i_field*2 , i) = aimag(intermediate(i_field,n))
305 IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -
t
317 REAL(KIND=8),
DIMENSION(:,:,:),
INTENT(IN) :: v1_in, v2_in
318 REAL(KIND=8),
DIMENSION(:,:,:),
INTENT(OUT) :: v_out
319 REAL(KIND=8),
DIMENSION(:),
OPTIONAL,
INTENT(INOUT) :: temps
320 LOGICAL,
OPTIONAL,
INTENT(IN) :: padding
321 INTEGER,
INTENT(IN) :: bloc_size, m_max_pad, nb_procs
322 INTEGER :: np, np_tot, nb_field, m_max, m_max_c, mpid, n_r_pad
323 INTEGER(KIND=8) :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
325 COMPLEX(KIND=8),
DIMENSION(m_max_pad, SIZE(V1_in,2), bloc_size) :: cu
326 REAL(KIND=8),
DIMENSION(2*m_max_pad-1,SIZE(V1_in,2),bloc_size) :: ru
327 COMPLEX(KIND=8),
DIMENSION(m_max_pad,SIZE(V1_in,2)/2,bloc_size) :: prod_cu
328 REAL(KIND=8),
DIMENSION(2*m_max_pad-1,SIZE(V1_in,2)/2,bloc_size) :: prod_ru
329 COMPLEX(KIND=8),
DIMENSION(SIZE(V1_in,2)/2,bloc_size) :: intermediate
330 REAL(KIND=8),
DIMENSION(SIZE(V1_in,3),2*SIZE(V1_in,2),bloc_size*nb_procs) :: dist_field, combined_field
331 COMPLEX(KIND=8),
DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu
332 COMPLEX(KIND=8),
DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: dist_prod_cu
335 INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code
339 INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
340 INTEGER,
DIMENSION(1) :: dim, inembed, onembed
343 #include "petsc/finclude/petsc.h"
344 mpi_comm :: communicator
346 IF (present(temps)) temps = 0.d0
349 nb_field=
SIZE(v1_in,2)
350 m_max_c =
SIZE(v1_in,3)
351 m_max = m_max_c*nb_procs
352 n_r_pad=2*m_max_pad-1
353 np_tot = nb_procs*bloc_size
355 IF (mod(nb_field,2)/=0 .OR. m_max_c==0)
THEN
373 dist_field(i,1:nb_field,1:np) = transpose(v1_in(:,:,i))
374 dist_field(i,nb_field+1:2*nb_field,1:np) = transpose(v2_in(:,:,i))
376 IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -
t
378 IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 1.d100
380 longueur_tranche=bloc_size*m_max_c*nb_field*2
383 mpid=mpi_double_precision
384 CALL mpi_alltoall(dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
385 mpid, communicator, code)
386 IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -
t
394 shiftc = (nb-1)*bloc_size
395 shiftl = (nb-1)*m_max_c
403 cu(shiftl+1:shiftl+m_max_c,nf,n) = cmplx(combined_field(:,2*nf-1,jindex),&
404 -combined_field(:,2*nf,jindex),kind=8)/2
408 cu(1,:,:) = 2*cmplx(
REAL(cu(1,:,:),KIND=8),0.d0,kind=8)
414 IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -
t
417 fft_dim=1; istride=1; ostride=1;
421 idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
422 odist=m_max_pad; onembed(1)=m_max_pad
425 howmany=bloc_size*nb_field
429 CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
430 onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
432 CALL dfftw_execute(fftw_plan_multi_c2r)
435 IF (nb_field==6)
THEN
436 prod_ru(:,1,:) = ru(:,2,:)*ru(:,6,:) - ru(:,3,:)*ru(:,5,:)
437 prod_ru(:,2,:) = ru(:,3,:)*ru(:,4,:) - ru(:,1,:)*ru(:,6,:)
438 prod_ru(:,3,:) = ru(:,1,:)*ru(:,5,:) - ru(:,2,:)*ru(:,4,:)
443 CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru, &
444 inembed, istride, idist, prod_cu, onembed, ostride, odist, fftw_estimate)
446 CALL dfftw_execute(fftw_plan_multi_r2c)
449 prod_cu = prod_cu/n_r_pad
451 IF (present(temps)) temps(2) = temps(2) + mpi_wtime() -
t
456 combined_prod_cu(:,:,1)=prod_cu(1,:,:)
459 combined_prod_cu(:,:,n)=2*conjg(prod_cu(n,:,:))
462 IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -
t
465 longueur_tranche=bloc_size*m_max_c*nb_field
466 mpid=mpi_double_precision
467 CALL mpi_alltoall(combined_prod_cu,longueur_tranche,mpid, dist_prod_cu,longueur_tranche, &
468 mpid,communicator,code)
469 IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -
t
474 shiftc = (nb-1)*bloc_size
475 shiftl = (nb-1)*m_max_c
476 intermediate = dist_prod_cu(:,:,shiftl+i)
478 IF (n+shiftc > np ) cycle
479 DO i_field = 1, nb_field/2
480 v_out(n+shiftc, i_field*2-1, i) =
REAL (intermediate(i_field,n),kind=8)
481 v_out(n+shiftc, i_field*2 , i) = aimag(intermediate(i_field,n))
486 IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -
t
492 bloc_size, m_max_pad,l_g, opt_norm_out, opt_m_vel, opt_norm, temps, padding)
500 REAL(KIND=8),
DIMENSION(:,:,:),
INTENT(IN) :: v1_in, v2_in
501 REAL(KIND=8),
DIMENSION(:,:,:),
INTENT(OUT) :: v_out
502 REAL(KIND=8),
DIMENSION(:,:),
OPTIONAL,
INTENT(OUT) :: opt_norm_out
503 REAL(KIND=8),
DIMENSION(:,:),
OPTIONAL,
INTENT(IN) :: opt_norm
504 REAL(KIND=8),
DIMENSION(:),
OPTIONAL,
INTENT(INOUT) :: temps
505 LOGICAL,
OPTIONAL,
INTENT(IN) :: padding
506 INTEGER,
INTENT(IN) :: bloc_size, m_max_pad, nb_procs
507 REAL(KIND=8),
OPTIONAL,
INTENT(IN) :: opt_m_vel
508 INTEGER,
INTENT(IN) :: l_g
509 INTEGER,
INTENT(IN) :: pb
510 INTEGER :: np, np_tot, nb_field1, nb_field2, m_max, m_max_c, mpid, n_r_pad
511 INTEGER(KIND=8) :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
513 COMPLEX(KIND=8),
DIMENSION(m_max_pad, (SIZE(V1_in,2)+SIZE(V2_in,2))/2, bloc_size) :: cu
514 REAL(KIND=8),
DIMENSION(2*m_max_pad-1,(SIZE(V1_in,2)+SIZE(V2_in,2))/2,bloc_size) :: ru
515 COMPLEX(KIND=8),
DIMENSION(m_max_pad,SIZE(V1_in,2)/2,bloc_size) :: prod_cu
516 REAL(KIND=8),
DIMENSION(2*m_max_pad-1,SIZE(V1_in,2)/2,bloc_size) :: prod_ru
517 COMPLEX(KIND=8),
DIMENSION(SIZE(V1_in,2)/2, bloc_size) :: intermediate
518 REAL(KIND=8),
DIMENSION(2*m_max_pad-1, bloc_size) :: norm_grad_phi
519 REAL(KIND=8),
DIMENSION(2*m_max_pad-1, bloc_size) :: norm_vel
520 REAL(KIND=8),
DIMENSION(2*m_max_pad-1, bloc_size/l_G) :: norm_vel_int
522 REAL(KIND=8),
DIMENSION(SIZE(V1_in,3),SIZE(V1_in,2)+SIZE(V2_in,2),bloc_size*nb_procs) :: dist_field, combined_field
523 COMPLEX(KIND=8),
DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu
524 COMPLEX(KIND=8),
DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: dist_prod_cu
527 INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code, l
531 INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
532 INTEGER,
DIMENSION(1) :: dim, inembed, onembed
535 #include "petsc/finclude/petsc.h"
536 mpi_comm :: communicator
538 IF (present(temps)) temps = 0.d0
541 nb_field1=
SIZE(v1_in,2)
542 nb_field2=
SIZE(v2_in,2)
543 m_max_c =
SIZE(v1_in,3)
544 m_max = m_max_c*nb_procs
545 n_r_pad=2*m_max_pad-1
546 np_tot = nb_procs*bloc_size
548 IF (mod(nb_field1,2)/=0 .OR. mod(nb_field2,2)/=0 .OR. m_max_c==0)
THEN
566 dist_field(i,1:nb_field1,1:np) = transpose(v1_in(:,:,i))
567 dist_field(i,nb_field1+1:nb_field1+nb_field2,1:np) = transpose(v2_in(:,:,i))
569 IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -
t
571 IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 1.d100
573 longueur_tranche=bloc_size*m_max_c*(nb_field1+nb_field2)
576 mpid=mpi_double_precision
577 CALL mpi_alltoall(dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
578 mpid, communicator, code)
579 IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -
t
587 shiftc = (nb-1)*bloc_size
588 shiftl = (nb-1)*m_max_c
590 DO nf = 1, (nb_field1+nb_field2)/2
596 cu(shiftl+1:shiftl+m_max_c,nf,n) = cmplx(combined_field(:,2*nf-1,jindex),&
597 -combined_field(:,2*nf,jindex),kind=8)/2
601 cu(1,:,:) = 2*cmplx(
REAL(cu(1,:,:),KIND=8),0.d0,kind=8)
607 IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -
t
610 fft_dim=1; istride=1; ostride=1;
614 idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
615 odist=m_max_pad; onembed(1)=m_max_pad
618 howmany=bloc_size*(nb_field1+nb_field2)/2
621 CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
622 onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
624 CALL dfftw_execute(fftw_plan_multi_c2r)
628 IF (nb_field1 == 6 .AND. nb_field2 == 6)
THEN
629 norm_vel(:,:) = sqrt(ru(:,4,:)**2 + ru(:,5,:)**2 + ru(:,6,:)**2)
630 DO i = 1, 2*m_max_pad - 1
631 DO l = 1, bloc_size/l_g
632 x = maxval(norm_vel(i,(l-1)*l_g+1:l*l_g))
633 norm_vel_int(i,l) = x
636 DO l = 1, bloc_size/l_g
637 DO i = 2, 2*m_max_pad - 2
638 norm_vel(i,(l-1)*l_g+1:l*l_g) = maxval(norm_vel_int(i-1:i+1,l))
640 norm_vel(1,(l-1)*l_g+1:l*l_g) = max(norm_vel_int(1,l),norm_vel_int(2,l),norm_vel_int(2*m_max_pad - 1,l))
641 norm_vel(2*m_max_pad - 1,(l-1)*l_g+1:l*l_g) = &
642 max(norm_vel_int(2*m_max_pad - 2,l),norm_vel_int(2*m_max_pad - 1,l),norm_vel_int(1,l))
644 prod_ru(:,1,:) = (opt_m_vel - norm_vel(:,:))*ru(:,1,:)
645 prod_ru(:,2,:) = (opt_m_vel - norm_vel(:,:))*ru(:,2,:)
646 prod_ru(:,3,:) = (opt_m_vel - norm_vel(:,:))*ru(:,3,:)
648 opt_norm_out = norm_vel
651 IF (nb_field1 == 6 .AND. nb_field2 == 2)
THEN
652 norm_grad_phi(:,:) = sqrt(ru(:,1,:)**2 + ru(:,2,:)**2 + ru(:,3,:)**2) + 1.d-14
653 prod_ru(:,1,:) = opt_norm(:,:)*ru(:,4,:)*(1.d0 - ru(:,4,:))*ru(:,1,:)/norm_grad_phi(:,:)
654 prod_ru(:,2,:) = opt_norm(:,:)*ru(:,4,:)*(1.d0 - ru(:,4,:))*ru(:,2,:)/norm_grad_phi(:,:)
655 prod_ru(:,3,:) = opt_norm(:,:)*ru(:,4,:)*(1.d0 - ru(:,4,:))*ru(:,3,:)/norm_grad_phi(:,:)
658 CALL
error_petsc(
'error in problem type while calling FFT_PAR_COMPRESSIVE_VISC_DCL ')
661 howmany = bloc_size*nb_field1/2
663 CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru, &
664 inembed, istride, idist, prod_cu, onembed, ostride, odist, fftw_estimate)
666 CALL dfftw_execute(fftw_plan_multi_r2c)
669 prod_cu = prod_cu/n_r_pad
671 IF (present(temps)) temps(2) = temps(2) + mpi_wtime() -
t
676 combined_prod_cu(:,:,1)=prod_cu(1,:,:)
679 combined_prod_cu(:,:,n)=2*conjg(prod_cu(n,:,:))
682 IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -
t
684 longueur_tranche=bloc_size*m_max_c*nb_field1
687 mpid=mpi_double_precision
688 CALL mpi_alltoall(combined_prod_cu,longueur_tranche,mpid, dist_prod_cu,longueur_tranche, &
689 mpid,communicator,code)
691 IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -
t
697 shiftc = (nb-1)*bloc_size
698 shiftl = (nb-1)*m_max_c
699 intermediate = dist_prod_cu(:,:,shiftl+i)
701 IF (n+shiftc > np ) cycle
702 DO i_field = 1, nb_field1/2
703 v_out(n+shiftc, i_field*2-1, i) =
REAL (intermediate(i_field,n),kind=8)
704 v_out(n+shiftc, i_field*2 , i) = aimag(intermediate(i_field,n))
709 IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -
t
721 REAL(KIND=8),
DIMENSION(:,:,:),
INTENT(IN) :: v1_in, v2_in
722 REAL(KIND=8),
DIMENSION(:,:,:),
INTENT(OUT) :: c_out
723 REAL(KIND=8),
DIMENSION(:),
OPTIONAL,
INTENT(INOUT) :: temps
724 INTEGER,
INTENT(IN) :: nb_procs, bloc_size, m_max_pad
725 COMPLEX(KIND=8),
DIMENSION(m_max_pad, SIZE(V1_in,2), bloc_size) :: cu
726 REAL(KIND=8),
DIMENSION(2*m_max_pad-1,SIZE(V1_in,2),bloc_size) :: ru
727 COMPLEX(KIND=8),
DIMENSION(m_max_pad,bloc_size) :: prod_cu
728 REAL(KIND=8),
DIMENSION(2*m_max_pad-1,bloc_size) :: prod_ru
729 COMPLEX(KIND=8),
DIMENSION(bloc_size) :: intermediate
730 REAL(KIND=8),
DIMENSION(SIZE(V1_in,3),2*SIZE(V1_in,2),bloc_size*nb_procs) :: dist_field, combined_field
731 COMPLEX(KIND=8),
DIMENSION(bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu
732 COMPLEX(KIND=8),
DIMENSION(bloc_size,SIZE(V1_in,3)*nb_procs) :: dist_prod_cu
734 INTEGER :: np, np_tot, nb_field, m_max, m_max_c, mpid, n_r_pad
735 INTEGER(KIND=8) :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
736 INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code
739 INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
740 INTEGER,
DIMENSION(1) :: dim, inembed, onembed
743 #include "petsc/finclude/petsc.h"
744 mpi_comm :: communicator
746 IF (present(temps)) temps = 0.d0
749 nb_field=
SIZE(v1_in,2)
750 m_max_c =
SIZE(v1_in,3)
751 m_max = m_max_c*nb_procs
752 np_tot = nb_procs*bloc_size
753 n_r_pad=2*m_max_pad-1
755 IF (mod(nb_field,2)/=0 .OR. m_max_c==0)
THEN
768 dist_field(i,1:nb_field,1:np) = transpose(v1_in(:,:,i))
769 dist_field(i,nb_field+1:2*nb_field,1:np) = transpose(v2_in(:,:,i))
771 IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -
t
773 IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 1.d100
775 longueur_tranche=bloc_size*m_max_c*nb_field*2
778 mpid=mpi_double_precision
779 CALL mpi_alltoall(dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
780 mpid, communicator, code)
781 IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -
t
789 shiftc = (nb-1)*bloc_size
790 shiftl = (nb-1)*m_max_c
798 cu(shiftl+1:shiftl+m_max_c,nf,n) = cmplx(combined_field(:,2*nf-1,jindex),&
799 -combined_field(:,2*nf,jindex),kind=8)/2
803 cu(1,:,:) = 2*cmplx(
REAL(cu(1,:,:),KIND=8),0.d0,kind=8)
809 IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -
t
812 fft_dim=1; istride=1; ostride=1;
816 idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
817 odist=m_max_pad; onembed(1)=m_max_pad
820 howmany=bloc_size*nb_field
824 CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
825 onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
827 CALL dfftw_execute(fftw_plan_multi_c2r)
830 IF (nb_field==6)
THEN
831 prod_ru(:,:) = ru(:,1,:)*ru(:,4,:) + ru(:,2,:)*ru(:,5,:) + ru(:,3,:)*ru(:,6,:)
835 howmany = bloc_size*1
836 CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru, &
837 inembed, istride, idist, prod_cu, onembed, ostride, odist, fftw_estimate)
839 CALL dfftw_execute(fftw_plan_multi_r2c)
842 prod_cu = prod_cu/n_r_pad
844 IF (present(temps)) temps(2) = temps(2) + mpi_wtime() -
t
848 combined_prod_cu(:,1)=prod_cu(1,:)
850 combined_prod_cu(:,n)=2*conjg(prod_cu(n,:))
853 IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -
t
856 longueur_tranche=bloc_size*m_max_c*2
857 mpid=mpi_double_precision
858 CALL mpi_alltoall(combined_prod_cu,longueur_tranche,mpid, dist_prod_cu,longueur_tranche, &
859 mpid,communicator,code)
860 IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -
t
865 shiftc = (nb-1)*bloc_size
866 shiftl = (nb-1)*m_max_c
867 intermediate = dist_prod_cu(:,shiftl+i)
869 IF (n+shiftc > np ) cycle
870 c_out(n+shiftc, 1, i) =
REAL (intermediate(n),kind=8)
871 c_out(n+shiftc, 2 , i) = aimag(intermediate(n))
875 IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -
t
879 SUBROUTINE fft_par_prod_dcl(communicator, c1_in, c2_in, c_out, nb_procs, bloc_size, m_max_pad, temps)
888 REAL(KIND=8),
DIMENSION(:,:,:),
INTENT(IN) :: c1_in, c2_in
889 REAL(KIND=8),
DIMENSION(:,:,:),
INTENT(OUT) :: c_out
890 REAL(KIND=8),
DIMENSION(:),
OPTIONAL,
INTENT(INOUT) :: temps
891 INTEGER,
INTENT(IN) :: nb_procs, bloc_size, m_max_pad
893 COMPLEX(KIND=8),
DIMENSION(m_max_pad, 2, bloc_size) :: cu
894 REAL(KIND=8),
DIMENSION(2*m_max_pad-1, 2, bloc_size) :: ru
895 COMPLEX(KIND=8),
DIMENSION(m_max_pad, bloc_size) :: prod_cu
896 REAL(KIND=8),
DIMENSION(2*m_max_pad-1,bloc_size) :: prod_ru
897 REAL(KIND=8),
DIMENSION(SIZE(c1_in,3),4, bloc_size*nb_procs) :: dist_field, combined_field
898 COMPLEX(KIND=8),
DIMENSION(bloc_size,SIZE(c1_in,3)*nb_procs) :: dist_prod_cu, combined_prod_cu
899 COMPLEX(KIND=8),
DIMENSION(bloc_size) :: intermediate
901 INTEGER :: np, np_tot, m_max, m_max_c, mpid, n_r_pad
902 INTEGER(KIND=8) :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
903 INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code
906 INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
907 INTEGER,
DIMENSION(1) :: dim, inembed, onembed
910 #include "petsc/finclude/petsc.h"
911 mpi_comm :: communicator
913 IF (present(temps)) temps = 0.d0
916 m_max_c =
SIZE(c1_in,3)
917 m_max = m_max_c*nb_procs
918 np_tot = nb_procs*bloc_size
919 n_r_pad=2*m_max_pad-1
933 dist_field(i,1:2,1:np) = transpose(c1_in(:,:,i))
934 dist_field(i,3:4,1:np) = transpose(c2_in(:,:,i))
936 IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -
t
938 IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 1.d100
940 longueur_tranche=bloc_size*m_max_c*4
943 mpid=mpi_double_precision
944 CALL mpi_alltoall(dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
945 mpid, communicator, code)
946 IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -
t
954 shiftc = (nb-1)*bloc_size
955 shiftl = (nb-1)*m_max_c
963 cu(shiftl+1:shiftl+m_max_c,nf,n) = cmplx(combined_field(:,2*nf-1,jindex),&
964 -combined_field(:,2*nf,jindex),kind=8)/2
968 cu(1,:,:) = 2*cmplx(
REAL(cu(1,:,:),KIND=8),0.d0,kind=8)
974 IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -
t
977 fft_dim=1; istride=1; ostride=1;
981 idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
982 odist=m_max_pad; onembed(1)=m_max_pad
988 CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
989 onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
990 CALL dfftw_execute(fftw_plan_multi_c2r)
993 prod_ru(:,:) = ru(:,1,:)*ru(:,2,:)
996 howmany = bloc_size*1
997 CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru, &
998 inembed, istride, idist, prod_cu, onembed, ostride, odist, fftw_estimate)
999 CALL dfftw_execute(fftw_plan_multi_r2c)
1002 prod_cu = prod_cu/n_r_pad
1004 IF (present(temps)) temps(2) = temps(2) + mpi_wtime() -
t
1008 combined_prod_cu(:,1)=prod_cu(1,:)
1010 combined_prod_cu(:,n)=2*conjg(prod_cu(n,:))
1013 IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -
t
1016 longueur_tranche=bloc_size*m_max_c*2
1017 mpid=mpi_double_precision
1018 CALL mpi_alltoall(combined_prod_cu,longueur_tranche,mpid, dist_prod_cu,longueur_tranche, &
1019 mpid,communicator,code)
1020 IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -
t
1025 shiftc = (nb-1)*bloc_size
1026 shiftl = (nb-1)*m_max_c
1027 intermediate = dist_prod_cu(:,shiftl+i)
1029 IF (n+shiftc > np ) cycle
1030 c_out(n+shiftc, 1, i) =
REAL (intermediate(n),kind=8)
1031 c_out(n+shiftc, 2 , i) = aimag(intermediate(n))
1035 IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -
t
1047 REAL(KIND=8),
DIMENSION(:,:,:),
INTENT(IN) :: v1_in, v2_in
1048 REAL(KIND=8),
DIMENSION(:,:,:),
INTENT(OUT) :: v_out
1049 INTEGER,
INTENT(IN) :: bloc_size, m_max_pad, nb_procs
1050 COMPLEX(KIND=8),
DIMENSION(m_max_pad, SIZE(V1_in,2), bloc_size) :: cu
1051 REAL(KIND=8),
DIMENSION(2*m_max_pad-1,SIZE(V1_in,2),bloc_size) :: ru
1052 COMPLEX(KIND=8),
DIMENSION(m_max_pad,bloc_size) :: prod_cu
1053 REAL(KIND=8),
DIMENSION(2*m_max_pad-1,bloc_size) :: prod_ru
1054 COMPLEX(KIND=8),
DIMENSION(bloc_size) :: intermediate
1055 REAL(KIND=8),
DIMENSION(SIZE(V1_in,3),2*SIZE(V1_in,2),bloc_size*nb_procs) :: dist_field, combined_field
1056 COMPLEX(KIND=8),
DIMENSION(bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu
1057 COMPLEX(KIND=8),
DIMENSION(bloc_size,SIZE(V1_in,3)*nb_procs) :: dist_prod_cu
1058 REAL(KIND=8),
DIMENSION(:),
OPTIONAL,
INTENT(INOUT) :: temps
1059 LOGICAL,
OPTIONAL,
INTENT(IN) :: padding
1060 INTEGER :: np, np_tot, nb_field, m_max, m_max_c, mpid, n_r_pad
1061 INTEGER(KIND=8) :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
1063 INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code
1067 INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
1068 INTEGER,
DIMENSION(1) :: dim, inembed, onembed
1071 #include "petsc/finclude/petsc.h"
1072 mpi_comm :: communicator
1074 IF (present(temps)) temps = 0.d0
1077 nb_field=
SIZE(v1_in,2)
1078 m_max_c =
SIZE(v1_in,3)
1079 m_max = m_max_c*nb_procs
1080 n_r_pad=2*m_max_pad-1
1081 np_tot = nb_procs*bloc_size
1083 IF (mod(nb_field,2)/=0 .OR. m_max_c==0)
THEN
1101 dist_field(i,1:nb_field,1:np) = transpose(v1_in(:,:,i))
1102 dist_field(i,nb_field+1:2*nb_field,1:np) = transpose(v2_in(:,:,i))
1104 IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -
t
1106 IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 1.d100
1108 longueur_tranche=bloc_size*m_max_c*nb_field*2
1111 mpid=mpi_double_precision
1112 CALL mpi_alltoall(dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
1113 mpid, communicator, code)
1114 IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -
t
1122 shiftc = (nb-1)*bloc_size
1123 shiftl = (nb-1)*m_max_c
1131 cu(shiftl+1:shiftl+m_max_c,nf,n) = cmplx(combined_field(:,2*nf-1,jindex),&
1132 -combined_field(:,2*nf,jindex),kind=8)/2
1136 cu(1,:,:) = 2*cmplx(
REAL(cu(1,:,:),KIND=8),0.d0,kind=8)
1142 IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -
t
1145 fft_dim=1; istride=1; ostride=1;
1149 idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
1150 odist=m_max_pad; onembed(1)=m_max_pad
1153 howmany=bloc_size*nb_field
1157 CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
1158 onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
1160 CALL dfftw_execute(fftw_plan_multi_c2r)
1163 IF (nb_field==6)
THEN
1164 prod_ru(:,:) = ru(:,1,:)*ru(:,4,:) + ru(:,2,:)*ru(:,5,:) + ru(:,3,:)*ru(:,6,:)
1169 CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru, &
1170 inembed, istride, idist, prod_cu, onembed, ostride, odist, fftw_estimate)
1172 CALL dfftw_execute(fftw_plan_multi_r2c)
1175 prod_cu = prod_cu/n_r_pad
1177 IF (present(temps)) temps(2) = temps(2) + mpi_wtime() -
t
1182 combined_prod_cu(:,1)=prod_cu(1,:)
1185 combined_prod_cu(:,n)=2*conjg(prod_cu(n,:))
1188 IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -
t
1191 longueur_tranche=bloc_size*m_max_c*2
1192 mpid=mpi_double_precision
1193 CALL mpi_alltoall(combined_prod_cu,longueur_tranche,mpid, dist_prod_cu,longueur_tranche, &
1194 mpid,communicator,code)
1195 IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -
t
1200 shiftc = (nb-1)*bloc_size
1201 shiftl = (nb-1)*m_max_c
1202 intermediate = dist_prod_cu(:,shiftl+i)
1204 IF (n+shiftc > np ) cycle
1205 v_out(n+shiftc, 1, i) =
REAL (intermediate(n),kind=8)
1206 v_out(n+shiftc, 2, i) = aimag(intermediate(n))
1210 IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -
t
1224 REAL(KIND=8),
DIMENSION(:,:,:),
INTENT(IN) :: v1_in, v2_in
1225 REAL(KIND=8),
DIMENSION(:,:,:),
INTENT(OUT) :: v_out
1226 REAL(KIND=8),
DIMENSION(:),
OPTIONAL,
INTENT(INOUT) :: temps
1227 LOGICAL,
OPTIONAL,
INTENT(IN) :: padding
1229 LOGICAL,
SAVE :: once=.true.
1230 INTEGER,
SAVE :: np_ref, np, np_tot, bloc_size, nb_field, &
1231 m_max, m_max_c, n_r, rang, nb_procs, mpid, m_max_pad, n_r_pad
1232 INTEGER(KIND=8),
SAVE :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
1233 COMPLEX(KIND=8),
ALLOCATABLE,
DIMENSION(:,:,:),
SAVE :: cu, prod_cu
1234 REAL(KIND=8),
ALLOCATABLE,
DIMENSION(:,:,:),
SAVE :: ru, prod_ru
1238 INTEGER :: np_glob, np_loc, reste, np_alloc, nn, nb_bloc, n_sup, n_inf, n_up, n_cache
1239 INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code
1241 REAL(KIND=8),
ALLOCATABLE,
DIMENSION(:,:,:) :: dist_field, combined_field
1242 COMPLEX(KIND=8),
ALLOCATABLE,
DIMENSION(:,:,:) :: combined_prod_cu, dist_prod_cu, out_prod_cu
1245 COMPLEX(KIND=8),
ALLOCATABLE,
DIMENSION(:,:) :: intermediate
1250 INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
1251 INTEGER,
DIMENSION(1) :: dim, inembed, onembed
1261 #include "petsc/finclude/petsc.h"
1262 mpi_comm :: communicator
1264 CALL mpi_comm_size(communicator,nb_procs,code)
1265 CALL mpi_comm_rank(communicator,rang,code)
1267 IF (present(temps)) temps = 0.d0
1270 IF (
SIZE(v1_in,1).NE.np_ref)
THEN
1272 np_ref =
SIZE(v1_in,1)
1279 np_glob =
SIZE(v1_in,1)
1280 m_max_c =
SIZE(v1_in,3)
1281 np_loc = n_cache/(12*m_max_c)
1282 nb_bloc = max(np_glob/np_loc,1)
1284 100 np_loc = np_glob/nb_bloc
1285 reste = np_glob - np_loc*nb_bloc
1286 np_alloc = np_loc + reste
1287 reste = np_alloc*nb_bloc - np_glob
1288 IF (reste>np_alloc)
THEN
1289 nb_bloc = nb_bloc - 1
1298 nb_field=
SIZE(v1_in,2)
1299 m_max_c =
SIZE(v1_in,3)
1300 m_max = m_max_c*nb_procs
1302 IF (mod(nb_field,2)/=0 .OR. m_max_c==0)
THEN
1311 IF (modulo(np,nb_procs)==0)
THEN
1312 bloc_size = np/nb_procs
1314 bloc_size = np/nb_procs + 1
1316 np_tot = nb_procs*bloc_size
1322 IF (present(padding))
THEN
1324 m_max_pad = 3*m_max/2
1326 WRITE(*,*)
' NO PADDING '
1330 m_max_pad = 3*m_max/2
1332 n_r_pad=2*m_max_pad-1
1334 IF (
ALLOCATED(ru))
DEALLOCATE(ru,cu,prod_ru,prod_cu)
1335 ALLOCATE(cu(m_max_pad,nb_field,bloc_size))
1336 ALLOCATE(ru(n_r_pad, nb_field,bloc_size))
1337 ALLOCATE(prod_cu(m_max_pad,nb_field/2,bloc_size))
1338 ALLOCATE(prod_ru(n_r_pad, nb_field/2,bloc_size))
1340 ALLOCATE(intermediate(nb_field/2,bloc_size))
1341 ALLOCATE( dist_field(m_max_c,2*nb_field,np_tot))
1342 ALLOCATE(combined_field(m_max_c,2*nb_field,np_tot))
1343 ALLOCATE(dist_prod_cu(nb_field/2,bloc_size,m_max))
1344 ALLOCATE(combined_prod_cu(nb_field/2,bloc_size,m_max))
1345 ALLOCATE(out_prod_cu(m_max_c,np_tot,nb_field/2))
1356 IF (nn == nb_bloc)
THEN
1357 n_up = np_glob - n_inf + 1
1361 n_sup = n_inf + n_up - 1
1364 dist_field(i,1:nb_field,1:n_up) = transpose(v1_in(n_inf:n_sup,:,i))
1365 dist_field(i,nb_field+1:2*nb_field,1:n_up) = transpose(v2_in(n_inf:n_sup,:,i))
1367 IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -
t
1369 IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 1.d100
1371 longueur_tranche=bloc_size*m_max_c*nb_field*2
1374 mpid=mpi_double_precision
1375 CALL mpi_alltoall(dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
1376 mpid, communicator, code)
1377 IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -
t
1385 shiftc = (nb-1)*bloc_size
1386 shiftl = (nb-1)*m_max_c
1394 cu(shiftl+1:shiftl+m_max_c,nf,n) = cmplx(combined_field(:,2*nf-1,jindex),&
1395 -combined_field(:,2*nf,jindex),kind=8)/2
1399 cu(1,:,:) = 2*cmplx(
REAL(cu(1,:,:),KIND=8),0.d0,kind=8)
1405 IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -
t
1408 fft_dim=1; istride=1; ostride=1;
1412 idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
1413 odist=m_max_pad; onembed(1)=m_max_pad
1416 howmany=bloc_size*nb_field
1420 IF (once) CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
1421 onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
1423 CALL dfftw_execute(fftw_plan_multi_c2r)
1426 IF (nb_field==6)
THEN
1427 prod_ru(:,1,:) = ru(:,2,:)*ru(:,6,:) - ru(:,3,:)*ru(:,5,:)
1428 prod_ru(:,2,:) = ru(:,3,:)*ru(:,4,:) - ru(:,1,:)*ru(:,6,:)
1429 prod_ru(:,3,:) = ru(:,1,:)*ru(:,5,:) - ru(:,2,:)*ru(:,4,:)
1434 IF (once) CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru, &
1435 inembed, istride, idist, prod_cu, onembed, ostride, odist, fftw_estimate)
1437 CALL dfftw_execute(fftw_plan_multi_r2c)
1440 prod_cu = prod_cu/n_r_pad
1442 IF (present(temps)) temps(2) = temps(2) + mpi_wtime() -
t
1447 combined_prod_cu(:,:,1)=prod_cu(1,:,:)
1450 combined_prod_cu(:,:,n)=2*conjg(prod_cu(n,:,:))
1453 IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -
t
1456 longueur_tranche=bloc_size*m_max_c*nb_field
1457 mpid=mpi_double_precision
1458 CALL mpi_alltoall(combined_prod_cu,longueur_tranche,mpid, dist_prod_cu,longueur_tranche, &
1459 mpid,communicator,code)
1460 IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -
t
1465 shiftc = (nb-1)*bloc_size
1466 shiftl = (nb-1)*m_max_c
1467 intermediate = dist_prod_cu(:,:,shiftl+i)
1469 IF (n_inf-1+n+shiftc > np_glob ) cycle
1470 DO i_field = 1, nb_field/2
1471 v_out(n_inf-1+n+shiftc, i_field*2-1, i) =
REAL (intermediate(i_field,n),kind=8)
1472 v_out(n_inf-1+n+shiftc, i_field*2 , i) = aimag(intermediate(i_field,n))
1477 IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -
t
1483 DEALLOCATE(dist_field, combined_field, dist_prod_cu, combined_prod_cu, intermediate, out_prod_cu)
1495 REAL(KIND=8),
DIMENSION(:,:,:),
INTENT(IN) :: v1_in, v2_in
1496 REAL(KIND=8),
DIMENSION(:,:,:),
INTENT(OUT) :: c_out
1497 REAL(KIND=8),
DIMENSION(:),
OPTIONAL,
INTENT(INOUT) :: temps
1498 LOGICAL,
OPTIONAL,
INTENT(IN) :: padding
1500 LOGICAL,
SAVE :: once=.true.
1501 INTEGER,
SAVE :: np_ref, np, np_tot, bloc_size, nb_field, &
1502 m_max, m_max_c, n_r, rang, nb_procs, mpid, m_max_pad, n_r_pad
1503 INTEGER(KIND=8),
SAVE :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
1504 COMPLEX(KIND=8),
ALLOCATABLE,
DIMENSION(:,:,:),
SAVE :: cu
1505 REAL(KIND=8),
ALLOCATABLE,
DIMENSION(:,:,:),
SAVE :: ru
1506 COMPLEX(KIND=8),
ALLOCATABLE,
DIMENSION(:,:),
SAVE :: prod_cu
1507 REAL(KIND=8),
ALLOCATABLE,
DIMENSION(:,:),
SAVE :: prod_ru
1510 INTEGER :: np_glob, np_loc, reste, np_alloc, nn, nb_bloc, n_sup, n_inf, n_up, n_cache
1511 INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code
1513 REAL(KIND=8),
ALLOCATABLE,
DIMENSION(:,:,:) :: dist_field, combined_field
1514 COMPLEX(KIND=8),
ALLOCATABLE,
DIMENSION(:,:) :: combined_prod_cu, dist_prod_cu, out_prod_cu
1517 COMPLEX(KIND=8),
ALLOCATABLE,
DIMENSION(:) :: intermediate
1521 INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
1522 INTEGER,
DIMENSION(1) :: dim, inembed, onembed
1532 #include "petsc/finclude/petsc.h"
1533 mpi_comm :: communicator
1535 CALL mpi_comm_size(communicator,nb_procs,code)
1536 CALL mpi_comm_rank(communicator,rang,code)
1538 IF (present(temps)) temps = 0.d0
1541 IF (
SIZE(v1_in,1).NE.np_ref)
THEN
1543 np_ref =
SIZE(v1_in,1)
1549 np_glob =
SIZE(v1_in,1)
1550 m_max_c =
SIZE(v1_in,3)
1551 np_loc = n_cache/(12*m_max_c)
1552 nb_bloc = max(np_glob/np_loc,1)
1554 100 np_loc = np_glob/nb_bloc
1555 reste = np_glob - np_loc*nb_bloc
1556 np_alloc = np_loc + reste
1557 reste = np_alloc*nb_bloc - np_glob
1558 IF (reste>np_alloc)
THEN
1559 nb_bloc = nb_bloc - 1
1568 nb_field=
SIZE(v1_in,2)
1569 m_max_c =
SIZE(v1_in,3)
1570 m_max = m_max_c*nb_procs
1572 IF (mod(nb_field,2)/=0 .OR. m_max_c==0)
THEN
1581 IF (modulo(np,nb_procs)==0)
THEN
1582 bloc_size = np/nb_procs
1584 bloc_size = np/nb_procs + 1
1586 np_tot = nb_procs*bloc_size
1592 IF (present(padding))
THEN
1594 m_max_pad = 3*m_max/2
1596 WRITE(*,*)
' NO PADDING '
1600 m_max_pad = 3*m_max/2
1602 n_r_pad=2*m_max_pad-1
1604 IF (
ALLOCATED(ru))
DEALLOCATE(ru,cu,prod_ru,prod_cu)
1605 ALLOCATE(cu(m_max_pad,nb_field,bloc_size))
1606 ALLOCATE(ru(n_r_pad, nb_field,bloc_size))
1607 ALLOCATE(prod_cu(m_max_pad,bloc_size))
1608 ALLOCATE(prod_ru(n_r_pad, bloc_size))
1610 ALLOCATE(intermediate(bloc_size))
1611 ALLOCATE( dist_field(m_max_c,2*nb_field,np_tot))
1612 ALLOCATE(combined_field(m_max_c,2*nb_field,np_tot))
1613 ALLOCATE(dist_prod_cu(bloc_size,m_max))
1614 ALLOCATE(combined_prod_cu(bloc_size,m_max))
1615 ALLOCATE(out_prod_cu(m_max_c,np_tot))
1625 IF (nn == nb_bloc)
THEN
1626 n_up = np_glob - n_inf + 1
1630 n_sup = n_inf + n_up - 1
1633 dist_field(i,1:nb_field,1:n_up) = transpose(v1_in(n_inf:n_sup,:,i))
1634 dist_field(i,nb_field+1:2*nb_field,1:n_up) = transpose(v2_in(n_inf:n_sup,:,i))
1636 IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -
t
1638 IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 1.d100
1640 longueur_tranche=bloc_size*m_max_c*nb_field*2
1643 mpid=mpi_double_precision
1644 CALL mpi_alltoall(dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
1645 mpid, communicator, code)
1646 IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -
t
1654 shiftc = (nb-1)*bloc_size
1655 shiftl = (nb-1)*m_max_c
1663 cu(shiftl+1:shiftl+m_max_c,nf,n) = cmplx(combined_field(:,2*nf-1,jindex),&
1664 -combined_field(:,2*nf,jindex),kind=8)/2
1668 cu(1,:,:) = 2*cmplx(
REAL(cu(1,:,:),KIND=8),0.d0,kind=8)
1674 IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -
t
1677 fft_dim=1; istride=1; ostride=1;
1681 idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
1682 odist=m_max_pad; onembed(1)=m_max_pad
1685 howmany=bloc_size*nb_field
1689 IF (once) CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
1690 onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
1692 CALL dfftw_execute(fftw_plan_multi_c2r)
1695 IF (nb_field==6)
THEN
1696 prod_ru(:,:) = ru(:,1,:)*ru(:,4,:) + ru(:,2,:)*ru(:,5,:) + ru(:,3,:)*ru(:,6,:)
1700 howmany = bloc_size*1
1701 IF (once) CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru, &
1702 inembed, istride, idist, prod_cu, onembed, ostride, odist, fftw_estimate)
1704 CALL dfftw_execute(fftw_plan_multi_r2c)
1707 prod_cu = prod_cu/n_r_pad
1709 IF (present(temps)) temps(2) = temps(2) + mpi_wtime() -
t
1713 combined_prod_cu(:,1)=prod_cu(1,:)
1715 combined_prod_cu(:,n)=2*conjg(prod_cu(n,:))
1718 IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -
t
1721 longueur_tranche=bloc_size*m_max_c*2
1722 mpid=mpi_double_precision
1723 CALL mpi_alltoall(combined_prod_cu,longueur_tranche,mpid, dist_prod_cu,longueur_tranche, &
1724 mpid,communicator,code)
1725 IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -
t
1731 shiftc = (nb-1)*bloc_size
1732 shiftl = (nb-1)*m_max_c
1733 intermediate = dist_prod_cu(:,shiftl+i)
1735 IF (n_inf-1+n+shiftc > np_glob ) cycle
1736 c_out(n_inf-1+n+shiftc, 1, i) =
REAL (intermediate(n),kind=8)
1737 c_out(n_inf-1+n+shiftc, 2 , i) = aimag(intermediate(n))
1741 IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -
t
1747 DEALLOCATE(dist_field, combined_field, dist_prod_cu, combined_prod_cu, intermediate, out_prod_cu)
1759 REAL(KIND=8),
DIMENSION(:,:,:),
INTENT(IN) :: c1_in, c2_in
1760 REAL(KIND=8),
DIMENSION(:,:,:),
INTENT(OUT) :: c_out
1761 REAL(KIND=8),
DIMENSION(:),
OPTIONAL,
INTENT(INOUT) :: temps
1762 LOGICAL,
OPTIONAL,
INTENT(IN) :: padding
1764 LOGICAL,
SAVE :: once=.true.
1765 INTEGER,
SAVE :: np_ref, np, np_tot, bloc_size, &
1766 m_max, m_max_c, n_r, rang, nb_procs, mpid, m_max_pad, n_r_pad
1767 INTEGER(KIND=8),
SAVE :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
1768 COMPLEX(KIND=8),
ALLOCATABLE,
DIMENSION(:,:,:),
SAVE :: cu
1769 REAL(KIND=8),
ALLOCATABLE,
DIMENSION(:,:,:),
SAVE :: ru
1770 COMPLEX(KIND=8),
ALLOCATABLE,
DIMENSION(:,:),
SAVE :: prod_cu
1771 REAL(KIND=8),
ALLOCATABLE,
DIMENSION(:,:),
SAVE :: prod_ru
1774 INTEGER :: np_glob, np_loc, reste, np_alloc, nn, nb_bloc, n_sup, n_inf, n_up, n_cache
1775 INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code
1777 REAL(KIND=8),
ALLOCATABLE,
DIMENSION(:,:,:) :: dist_field, combined_field
1778 COMPLEX(KIND=8),
ALLOCATABLE,
DIMENSION(:,:) :: combined_prod_cu, dist_prod_cu, out_prod_cu
1781 COMPLEX(KIND=8),
ALLOCATABLE,
DIMENSION(:) :: intermediate
1785 INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
1786 INTEGER,
DIMENSION(1) :: dim, inembed, onembed
1796 #include "petsc/finclude/petsc.h"
1797 mpi_comm :: communicator
1799 CALL mpi_comm_size(communicator,nb_procs,code)
1800 CALL mpi_comm_rank(communicator,rang,code)
1802 IF (present(temps)) temps = 0.d0
1805 IF (
SIZE(c1_in,1).NE.np_ref)
THEN
1807 np_ref =
SIZE(c1_in,1)
1813 np_glob =
SIZE(c1_in,1)
1814 m_max_c =
SIZE(c1_in,3)
1815 np_loc = n_cache/(12*m_max_c)
1816 nb_bloc = max(np_glob/np_loc,1)
1818 100 np_loc = np_glob/nb_bloc
1819 reste = np_glob - np_loc*nb_bloc
1820 np_alloc = np_loc + reste
1821 reste = np_alloc*nb_bloc - np_glob
1822 IF (reste>np_alloc)
THEN
1823 nb_bloc = nb_bloc - 1
1832 m_max_c =
SIZE(c1_in,3)
1833 m_max = m_max_c*nb_procs
1835 IF (m_max_c==0)
THEN
1844 IF (modulo(np,nb_procs)==0)
THEN
1845 bloc_size = np/nb_procs
1847 bloc_size = np/nb_procs + 1
1849 np_tot = nb_procs*bloc_size
1855 IF (present(padding))
THEN
1857 m_max_pad = 3*m_max/2
1859 WRITE(*,*)
' NO PADDING '
1863 m_max_pad = 3*m_max/2
1865 n_r_pad=2*m_max_pad-1
1867 IF (
ALLOCATED(ru))
DEALLOCATE(ru,cu,prod_ru,prod_cu)
1868 ALLOCATE(cu(m_max_pad,2,bloc_size))
1869 ALLOCATE(ru(n_r_pad, 2,bloc_size))
1870 ALLOCATE(prod_cu(m_max_pad,bloc_size))
1871 ALLOCATE(prod_ru(n_r_pad, bloc_size))
1873 ALLOCATE(intermediate(bloc_size))
1874 ALLOCATE( dist_field(m_max_c,4,np_tot))
1875 ALLOCATE(combined_field(m_max_c,4,np_tot))
1876 ALLOCATE(dist_prod_cu(bloc_size,m_max))
1877 ALLOCATE(combined_prod_cu(bloc_size,m_max))
1878 ALLOCATE(out_prod_cu(m_max_c,np_tot))
1888 IF (nn == nb_bloc)
THEN
1889 n_up = np_glob - n_inf + 1
1893 n_sup = n_inf + n_up - 1
1896 dist_field(i,1:2,1:n_up) = transpose(c1_in(n_inf:n_sup,:,i))
1897 dist_field(i,3:4,1:n_up) = transpose(c2_in(n_inf:n_sup,:,i))
1899 IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -
t
1901 IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 1.d100
1903 longueur_tranche=bloc_size*m_max_c*4
1906 mpid=mpi_double_precision
1907 CALL mpi_alltoall(dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
1908 mpid, communicator, code)
1909 IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -
t
1917 shiftc = (nb-1)*bloc_size
1918 shiftl = (nb-1)*m_max_c
1926 cu(shiftl+1:shiftl+m_max_c,nf,n) = cmplx(combined_field(:,2*nf-1,jindex),&
1927 -combined_field(:,2*nf,jindex),kind=8)/2
1931 cu(1,:,:) = 2*cmplx(
REAL(cu(1,:,:),KIND=8),0.d0,kind=8)
1937 IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -
t
1940 fft_dim=1; istride=1; ostride=1;
1944 idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
1945 odist=m_max_pad; onembed(1)=m_max_pad
1951 IF (once) CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
1952 onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
1953 CALL dfftw_execute(fftw_plan_multi_c2r)
1956 prod_ru(:,:) = ru(:,1,:)*ru(:,2,:)
1959 howmany = bloc_size*1
1960 IF (once) CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru, &
1961 inembed, istride, idist, prod_cu, onembed, ostride, odist, fftw_estimate)
1962 CALL dfftw_execute(fftw_plan_multi_r2c)
1965 prod_cu = prod_cu/n_r_pad
1967 IF (present(temps)) temps(2) = temps(2) + mpi_wtime() -
t
1971 combined_prod_cu(:,1)=prod_cu(1,:)
1973 combined_prod_cu(:,n)=2*conjg(prod_cu(n,:))
1976 IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -
t
1979 longueur_tranche=bloc_size*m_max_c*2
1980 mpid=mpi_double_precision
1981 CALL mpi_alltoall(combined_prod_cu,longueur_tranche,mpid, dist_prod_cu,longueur_tranche, &
1982 mpid,communicator,code)
1983 IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -
t
1988 shiftc = (nb-1)*bloc_size
1989 shiftl = (nb-1)*m_max_c
1990 intermediate = dist_prod_cu(:,shiftl+i)
1992 IF (n_inf-1+n+shiftc > np_glob ) cycle
1993 c_out(n_inf-1+n+shiftc, 1, i) =
REAL (intermediate(n),kind=8)
1994 c_out(n_inf-1+n+shiftc, 2 , i) = aimag(intermediate(n))
1998 IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -
t
2004 DEALLOCATE(dist_field, combined_field, dist_prod_cu, combined_prod_cu, intermediate, out_prod_cu)
2015 REAL(KIND=8),
DIMENSION(:,:,:),
INTENT(IN) :: c_in
2016 REAL(KIND=8),
DIMENSION(:,:,:),
INTENT(OUT) :: c_out
2017 REAL(KIND=8),
DIMENSION(:),
OPTIONAL,
INTENT(INOUT) :: temps
2018 LOGICAL,
OPTIONAL,
INTENT(IN) :: padding
2020 LOGICAL,
SAVE :: once=.true.
2021 INTEGER,
SAVE :: np_ref, np, np_tot, bloc_size, &
2022 m_max, m_max_c, n_r, rang, nb_procs, mpid, m_max_pad, n_r_pad
2023 INTEGER(KIND=8),
SAVE :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
2024 COMPLEX(KIND=8),
ALLOCATABLE,
DIMENSION(:,:),
SAVE :: cu
2025 REAL(KIND=8),
ALLOCATABLE,
DIMENSION(:,:),
SAVE :: ru
2026 COMPLEX(KIND=8),
ALLOCATABLE,
DIMENSION(:,:),
SAVE :: prod_cu
2027 REAL(KIND=8),
ALLOCATABLE,
DIMENSION(:,:),
SAVE :: prod_ru
2030 INTEGER :: np_glob, np_loc, reste, np_alloc, nn, nb_bloc, n_sup, n_inf, n_up, n_cache
2031 INTEGER :: nb, shiftc, shiftl, jindex, longueur_tranche, i, n, code
2033 REAL(KIND=8),
ALLOCATABLE,
DIMENSION(:,:,:) :: dist_field, combined_field
2034 COMPLEX(KIND=8),
ALLOCATABLE,
DIMENSION(:,:) :: combined_prod_cu, dist_prod_cu
2037 COMPLEX(KIND=8),
ALLOCATABLE,
DIMENSION(:) :: intermediate
2041 INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
2042 INTEGER,
DIMENSION(1) :: dim, inembed, onembed
2052 #include "petsc/finclude/petsc.h"
2053 mpi_comm :: communicator
2055 CALL mpi_comm_size(communicator,nb_procs,code)
2056 CALL mpi_comm_rank(communicator,rang,code)
2058 IF (present(temps)) temps = 0.d0
2061 IF (
SIZE(c_in,1).NE.np_ref)
THEN
2063 np_ref =
SIZE(c_in,1)
2069 np_glob =
SIZE(c_in,1)
2070 m_max_c =
SIZE(c_in,3)
2071 np_loc = n_cache/(12*m_max_c)
2072 nb_bloc = max(np_glob/np_loc,1)
2074 100 np_loc = np_glob/nb_bloc
2075 reste = np_glob - np_loc*nb_bloc
2076 np_alloc = np_loc + reste
2077 reste = np_alloc*nb_bloc - np_glob
2078 IF (reste>np_alloc)
THEN
2079 nb_bloc = nb_bloc - 1
2088 m_max_c =
SIZE(c_in,3)
2089 m_max = m_max_c*nb_procs
2091 IF (m_max_c==0)
THEN
2100 IF (modulo(np,nb_procs)==0)
THEN
2101 bloc_size = np/nb_procs
2103 bloc_size = np/nb_procs + 1
2105 np_tot = nb_procs*bloc_size
2111 IF (present(padding))
THEN
2113 m_max_pad = 3*m_max/2
2115 WRITE(*,*)
' NO PADDING '
2119 m_max_pad = 3*m_max/2
2121 n_r_pad=2*m_max_pad-1
2123 IF (
ALLOCATED(ru))
DEALLOCATE(ru,cu,prod_ru,prod_cu)
2124 ALLOCATE(cu(m_max_pad,bloc_size))
2125 ALLOCATE(ru(n_r_pad, bloc_size))
2126 ALLOCATE(prod_cu(m_max_pad,bloc_size))
2127 ALLOCATE(prod_ru(n_r_pad, bloc_size))
2129 ALLOCATE(intermediate(bloc_size))
2130 ALLOCATE( dist_field(m_max_c,2,np_tot))
2131 ALLOCATE(combined_field(m_max_c,2,np_tot))
2132 ALLOCATE(dist_prod_cu(bloc_size,m_max))
2133 ALLOCATE(combined_prod_cu(bloc_size,m_max))
2144 IF (nn == nb_bloc)
THEN
2145 n_up = np_glob - n_inf + 1
2149 n_sup = n_inf + n_up - 1
2152 dist_field(i,:,1:n_up) = transpose(c_in(n_inf:n_sup,:,i))
2154 IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -
t
2156 IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 1.d100
2158 longueur_tranche=bloc_size*m_max_c*2
2161 mpid=mpi_double_precision
2162 CALL mpi_alltoall(dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
2163 mpid, communicator, code)
2164 IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -
t
2172 shiftc = (nb-1)*bloc_size
2173 shiftl = (nb-1)*m_max_c
2178 cu(shiftl+1:shiftl+m_max_c,n) = cmplx(combined_field(:,1,jindex),&
2179 -combined_field(:,2,jindex),kind=8)/2
2182 cu(1,:) = 2*cmplx(
REAL(cu(1,:),KIND=8),0.d0,kind=8)
2188 IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -
t
2191 fft_dim=1; istride=1; ostride=1;
2195 idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
2196 odist=m_max_pad; onembed(1)=m_max_pad
2201 IF (once) CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
2202 onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate);
2203 CALL dfftw_execute(fftw_plan_multi_c2r)
2206 prod_ru = ru*(ru**2-1)
2210 IF (once) CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru, &
2211 inembed, istride, idist, prod_cu, onembed, ostride, odist, fftw_estimate);
2212 CALL dfftw_execute(fftw_plan_multi_r2c)
2215 prod_cu = prod_cu/n_r_pad
2217 IF (present(temps)) temps(2) = temps(2) + mpi_wtime() -
t
2221 combined_prod_cu(:,1)=prod_cu(1,:)
2223 combined_prod_cu(:,n)=2*conjg(prod_cu(n,:))
2226 IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -
t
2229 longueur_tranche=bloc_size*m_max_c*2
2230 mpid=mpi_double_precision
2231 CALL mpi_alltoall(combined_prod_cu,longueur_tranche,mpid, dist_prod_cu,longueur_tranche, &
2232 mpid,communicator,code)
2233 IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -
t
2238 shiftc = (nb-1)*bloc_size
2239 shiftl = (nb-1)*m_max_c
2240 intermediate = dist_prod_cu(:,shiftl+i)
2242 IF (n_inf-1+n+shiftc > np_glob ) cycle
2243 c_out(n_inf-1+n+shiftc, 1, i) =
REAL (intermediate(n),kind=8)
2244 c_out(n_inf-1+n+shiftc, 2, i) = aimag(intermediate(n))
2248 IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -
t
2254 DEALLOCATE(dist_field, combined_field, dist_prod_cu, combined_prod_cu, intermediate)
2259 SUBROUTINE ref(communicator,V1_in, V2_in, V_out, temps)
2267 REAL(KIND=8),
DIMENSION(:,:,:),
INTENT(IN) :: v1_in, v2_in
2268 REAL(KIND=8),
DIMENSION(:,:,:),
INTENT(OUT) :: v_out
2269 REAL(KIND=8),
DIMENSION(:),
OPTIONAL,
INTENT(INOUT) :: temps
2272 LOGICAL,
SAVE :: once=.true.
2273 INTEGER,
SAVE :: np, np_tot, bloc_size, nb_field, m_max, m_max_c, n_r, rang, nb_procs, mpid
2274 INTEGER(KIND=8),
SAVE :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
2275 COMPLEX(KIND=8),
ALLOCATABLE,
DIMENSION(:,:,:),
SAVE :: cu, prod_cu
2276 REAL(KIND=8),
ALLOCATABLE,
DIMENSION(:,:,:),
SAVE :: ru, prod_ru
2280 INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code
2282 REAL(KIND=8),
ALLOCATABLE,
DIMENSION(:,:,:),
SAVE :: dist_field, combined_field
2283 COMPLEX(KIND=8),
ALLOCATABLE,
DIMENSION(:,:,:),
SAVE :: combined_prod_cu, dist_prod_cu, out_prod_cu
2286 COMPLEX(KIND=8),
ALLOCATABLE,
DIMENSION(:,:) :: intermediate
2291 INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
2292 INTEGER,
DIMENSION(1) :: dim, inembed, onembed
2295 #include "petsc/finclude/petsc.h"
2296 mpi_comm :: communicator
2301 IF (present(temps)) temps = 0.d0
2304 IF ((
SIZE(v1_in,1).NE.np) .OR. (
SIZE(v1_in,2).NE.nb_field) .OR. (
SIZE(v1_in,3).NE.m_max_c))
THEN
2310 CALL mpi_comm_size(communicator,nb_procs,code)
2311 CALL mpi_comm_rank(communicator,rang,code)
2314 nb_field=
SIZE(v1_in,2)
2315 m_max_c =
SIZE(v1_in,3)
2316 m_max = m_max_c*nb_procs
2318 IF (mod(nb_field,2)/=0 .OR. m_max_c==0)
THEN
2326 IF (modulo(np,nb_procs)==0)
THEN
2327 bloc_size = np/nb_procs
2329 bloc_size = np/nb_procs + 1
2331 np_tot = nb_procs*bloc_size
2333 IF (
ALLOCATED(ru))
DEALLOCATE(ru,cu,prod_ru,prod_cu)
2334 ALLOCATE(cu(m_max,nb_field,bloc_size))
2335 ALLOCATE(ru(n_r, nb_field,bloc_size))
2336 ALLOCATE(prod_cu(m_max,nb_field/2,bloc_size))
2337 ALLOCATE(prod_ru(n_r, nb_field/2,bloc_size))
2342 ALLOCATE(intermediate(nb_field/2,bloc_size))
2343 ALLOCATE( dist_field(m_max_c,2*nb_field,np_tot))
2344 ALLOCATE(combined_field(m_max_c,2*nb_field,np_tot))
2345 ALLOCATE(dist_prod_cu(nb_field/2,bloc_size,m_max), combined_prod_cu(nb_field/2,bloc_size,m_max))
2346 ALLOCATE(out_prod_cu(m_max_c,np_tot,nb_field/2))
2358 dist_field(i,1:nb_field,1:np) = transpose(v1_in(:,:,i))
2359 dist_field(i,nb_field+1:2*nb_field,1:np) = transpose(v2_in(:,:,i))
2367 IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -
t
2369 IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 1.d100
2371 longueur_tranche=bloc_size*m_max_c*nb_field*2
2374 mpid=mpi_double_precision
2375 CALL mpi_alltoall(dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
2376 mpid, communicator, code)
2377 IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -
t
2383 shiftc = (nb-1)*bloc_size
2384 shiftl = (nb-1)*m_max_c
2394 cu(shiftl+1:shiftl+m_max_c,nf,n) = 0.5d0*cmplx(combined_field(:,2*nf-1,jindex),-combined_field(:,2*nf,jindex),kind=8)
2398 cu(1,:,:) = 2.d0*cmplx(
REAL(cu(1,:,:),KIND=8),0.d0,kind=8)
2399 IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -
t
2402 fft_dim=1; istride=1; ostride=1;
2403 idist=n_r; inembed(1)=n_r; dim(1)=n_r
2404 odist=m_max; onembed(1)=m_max
2405 IF (rang==(nb_procs-1))
THEN
2406 howmany= (np - bloc_size*(nb_procs-1))*nb_field
2408 howmany=bloc_size*nb_field
2412 IF (once) CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
2413 onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate);
2414 CALL dfftw_execute(fftw_plan_multi_c2r)
2424 IF (nb_field==6)
THEN
2425 prod_ru(:,1,:) = ru(:,2,:)*ru(:,6,:) - ru(:,3,:)*ru(:,5,:)
2426 prod_ru(:,2,:) = ru(:,3,:)*ru(:,4,:) - ru(:,1,:)*ru(:,6,:)
2427 prod_ru(:,3,:) = ru(:,1,:)*ru(:,5,:) - ru(:,2,:)*ru(:,4,:)
2432 IF (once) CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru, &
2433 inembed, istride, idist, prod_cu, onembed, ostride, odist, fftw_estimate);
2434 CALL dfftw_execute(fftw_plan_multi_r2c)
2436 prod_cu = prod_cu/n_r
2438 IF (present(temps)) temps(2) = temps(2) + mpi_wtime() -
t
2443 combined_prod_cu(:,:,1)=prod_cu(1,:,:)
2446 combined_prod_cu(:,:,n)=2.d0*conjg(prod_cu(n,:,:))
2448 IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -
t
2451 longueur_tranche=bloc_size*m_max_c*nb_field
2452 mpid=mpi_double_precision
2453 CALL mpi_alltoall(combined_prod_cu,longueur_tranche,mpid, dist_prod_cu,longueur_tranche, &
2454 mpid,communicator,code)
2455 IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -
t
2462 DO i_field = 1, nb_field/2
2465 shiftc = (nb-1)*bloc_size
2466 shiftl = (nb-1)*m_max_c
2467 out_prod_cu(:,n+shiftc,i_field) = dist_prod_cu(i_field,n,shiftl+1:shiftl+m_max_c)
2472 DO i_field = 1, nb_field/2
2474 v_out(:, i_field*2-1, i) =
REAL(out_prod_cu(i, 1:np, i_field),kind=8)
2475 v_out(:, i_field*2, i) = aimag(out_prod_cu(i, 1:np, i_field))
2481 shiftc = (nb-1)*bloc_size
2482 shiftl = (nb-1)*m_max_c
2483 intermediate = dist_prod_cu(:,:,shiftl+i)
2485 IF (n+shiftc > np ) cycle
2486 DO i_field = 1, nb_field/2
2487 v_out(n+shiftc, i_field*2-1, i) =
REAL (intermediate(i_field,n),kind=8)
2488 v_out(n+shiftc, i_field*2 , i) = aimag(intermediate(i_field,n))
2497 IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -
t
2499 DEALLOCATE(dist_field, combined_field, dist_prod_cu, combined_prod_cu, intermediate, out_prod_cu)
2500 IF (once) once = .false.
2503 SUBROUTINE fft_heaviside_dcl(communicator,V1_in, V_out, nb_procs, bloc_size, m_max_pad, temps, padding)
2510 REAL(KIND=8),
DIMENSION(:,:,:),
INTENT(IN) :: v1_in
2511 REAL(KIND=8),
DIMENSION(:,:,:),
INTENT(OUT) :: v_out
2512 REAL(KIND=8),
DIMENSION(:),
OPTIONAL,
INTENT(INOUT) :: temps
2513 LOGICAL,
OPTIONAL,
INTENT(IN) :: padding
2514 INTEGER,
INTENT(IN) :: bloc_size, m_max_pad, nb_procs
2515 INTEGER :: np, np_tot, nb_field, m_max, m_max_c, mpid, n_r_pad
2516 INTEGER(KIND=8) :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
2519 COMPLEX(KIND=8),
DIMENSION(m_max_pad, bloc_size) :: cu
2520 REAL(KIND=8),
DIMENSION(2*m_max_pad-1, bloc_size) :: ru
2521 COMPLEX(KIND=8),
DIMENSION(m_max_pad,bloc_size) :: prod_cu
2522 REAL(KIND=8),
DIMENSION(2*m_max_pad-1,bloc_size) :: prod_ru
2523 COMPLEX(KIND=8),
DIMENSION(bloc_size) :: intermediate
2524 REAL(KIND=8),
DIMENSION(SIZE(V1_in,3),SIZE(V1_in,2),bloc_size*nb_procs) :: dist_field, combined_field
2525 COMPLEX(KIND=8),
DIMENSION(bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu
2526 COMPLEX(KIND=8),
DIMENSION(bloc_size,SIZE(V1_in,3)*nb_procs) :: dist_prod_cu
2528 INTEGER :: n1, n2, rank
2529 INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code
2533 INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
2534 INTEGER,
DIMENSION(1) :: dim, inembed, onembed
2537 #include "petsc/finclude/petsc.h"
2538 mpi_comm :: communicator
2539 petscerrorcode :: ierr
2541 IF (present(temps)) temps = 0.d0
2544 nb_field=
SIZE(v1_in,2)
2545 m_max_c =
SIZE(v1_in,3)
2546 m_max = m_max_c*nb_procs
2547 n_r_pad=2*m_max_pad-1
2548 np_tot = nb_procs*bloc_size
2550 IF (mod(nb_field,2)/=0 .OR. m_max_c==0)
THEN
2568 dist_field(i,1:nb_field,1:np) = transpose(v1_in(:,:,i))
2570 IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -
t
2572 IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 1.d100
2574 longueur_tranche=bloc_size*m_max_c*nb_field
2577 mpid=mpi_double_precision
2578 CALL mpi_alltoall(dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
2579 mpid, communicator, code)
2581 IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -
t
2589 shiftc = (nb-1)*bloc_size
2590 shiftl = (nb-1)*m_max_c
2597 cu(shiftl+1:shiftl+m_max_c,n) = cmplx(combined_field(:,1,jindex),&
2598 -combined_field(:,2,jindex),kind=8)/2
2601 cu(1,:) = 2*cmplx(
REAL(cu(1,:),KIND=8),0.d0,kind=8)
2607 IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -
t
2610 fft_dim=1; istride=1; ostride=1;
2614 idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
2615 odist=m_max_pad; onembed(1)=m_max_pad
2618 howmany=bloc_size*nb_field/2
2622 CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
2623 onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
2625 CALL dfftw_execute(fftw_plan_multi_c2r)
2627 IF (nb_field==2)
THEN
2628 DO n1 = 1, 2*m_max_pad-1
2629 DO n2 = 1, bloc_size
2631 IF (ru(n1,n2) > 0.5)
THEN
2632 prod_ru(n1,n2) = 1.d0
2634 prod_ru(n1,n2) = 0.d0
2642 CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru, &
2643 inembed, istride, idist, prod_cu, onembed, ostride, odist, fftw_estimate)
2645 CALL dfftw_execute(fftw_plan_multi_r2c)
2648 prod_cu = prod_cu/n_r_pad
2650 IF (present(temps)) temps(2) = temps(2) + mpi_wtime() -
t
2654 combined_prod_cu(:,1)=prod_cu(1,:)
2657 combined_prod_cu(:,n)=2*conjg(prod_cu(n,:))
2660 IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -
t
2663 longueur_tranche=bloc_size*m_max_c*nb_field
2664 mpid=mpi_double_precision
2665 CALL mpi_alltoall(combined_prod_cu,longueur_tranche,mpid, dist_prod_cu,longueur_tranche, &
2666 mpid,communicator,code)
2667 IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -
t
2672 shiftc = (nb-1)*bloc_size
2673 shiftl = (nb-1)*m_max_c
2674 intermediate = dist_prod_cu(:,shiftl+i)
2676 IF (n+shiftc > np ) cycle
2677 v_out(n+shiftc, 1, i) =
REAL (intermediate(n),kind=8)
2678 v_out(n+shiftc, 2, i) = aimag(intermediate(n))
2682 IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -
t
section doc_intro_frame_work_num_app Numerical approximation subsection doc_intro_fram_work_num_app_Fourier_FEM Fourier Finite element representation The SFEMaNS code uses a hybrid Fourier Finite element formulation The Fourier decomposition allows to approximate the problem’s solutions for each Fourier mode modulo nonlinear terms that are made explicit The variables are then approximated on a meridian section of the domain with a finite element method The numerical approximation of a function f $f f is written in the following generic t
subroutine, public fft_par_dot_prod_dcl(communicator, V1_in, V2_in, c_out, nb_procs, bloc_size, m_max_pad, temps)
subroutine, public fft_par_real(communicator, V1_in, V_out, opt_nb_plane)
subroutine, public fft_par_prod_dcl(communicator, c1_in, c2_in, c_out, nb_procs, bloc_size, m_max_pad, temps)
subroutine, public fft_par_allen_cahn(communicator, c_in, c_out, temps, padding)
subroutine fft_par_cross_prod_bug(communicator, V1_in, V2_in, V_out, nb_procs, bloc_size, m_max_pad, temps)
subroutine, public ref(communicator, V1_in, V2_in, V_out, temps)
subroutine, public fft_par_prod(communicator, c1_in, c2_in, c_out, temps, padding)
subroutine, public fft_heaviside_dcl(communicator, V1_in, values, V_out, nb_procs, bloc_size, m_max_pad, coeff_tanh, temps)
subroutine, public fft_par_cross_prod(communicator, V1_in, V2_in, V_out, temps, padding)
subroutine, public fft_par_compressive_visc_dcl(communicator, V1_in, V2_in, V_out, pb, nb_procs, bloc_size, m_max_pad, l_G, opt_norm_out, opt_M_vel, opt_norm, temps, padding)
subroutine fft_par_dot_prod_bis(communicator, V1_in, V2_in, V_out, nb_procs, bloc_size, m_max_pad, temps, padding)
subroutine error_petsc(string)
subroutine, public fft_par_cross_prod_dcl(communicator, V1_in, V2_in, V_out, nb_procs, bloc_size, m_max_pad, temps)
subroutine, public fft_par_dot_prod(communicator, V1_in, V2_in, c_out, temps, padding)