SFEMaNS  version 4.1 (work in progress)
Reference documentation for SFEMaNS
 All Classes Files Functions Variables Groups Pages
fft_parallel.f90
Go to the documentation of this file.
1 !
2 !Authors: Katarzyna Boronska, Jean-Luc Guermond, Copyrights 2007
3 !
5 
6  IMPLICIT NONE
16  PRIVATE
17 !===
18 !Comments about the use of select_mode.
19 !The first mode must be 0 and all the other modes must be multiple of one non-zero integer
20 !If select_mode is used, the FFT does not care and computes the real values
21 !as if al the modes were contiguous. It does not matter since the values
22 !in the real spaces are computed at the same fake angles, but they sent back
23 !in the fourier space on the correct fourier modes.
24 !===
25 CONTAINS
26  SUBROUTINE fft_par_real(communicator, V1_in, V_out, opt_nb_plane)
27  USE my_util
28  IMPLICIT NONE
29  include 'fftw3.f'
30  ! Format: V_1in(1:np,1:6,1:m_max_c)
31  ! INPUT ARE COSINE AND SINE COEFFICIENTS
32  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
33  REAL(KIND=8), DIMENSION(:,:,:), INTENT(IN) :: v1_in
34  REAL(KIND=8), DIMENSION(:,:,:), ALLOCATABLE :: v_out
35  ! FL-CN possible faute ??? 19/03/2013
36  !REAL(KIND=8), DIMENSION(:,:,:), POINTER :: V_out
37  INTEGER, OPTIONAL :: opt_nb_plane
38  INTEGER :: np, bloc_size, nb_field, &
39  m_max, m_max_c, rang, nb_procs, mpid, m_max_pad, n_r_pad
40  INTEGER(KIND=8) :: fftw_plan_multi_c2r
41  COMPLEX(KIND=8), ALLOCATABLE, DIMENSION(:,:,:) :: cu
42  INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code
43  REAL(KIND=8), ALLOCATABLE, DIMENSION(:,:,:) :: dist_field, combined_field
44  INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
45  INTEGER, DIMENSION(1) :: dim, inembed, onembed
46  ! Recall complexes must be rescaled
47  ! End FFTW parameters
48 #include "petsc/finclude/petsc.h"
49  mpi_comm :: communicator
50 
51  CALL mpi_comm_size(communicator, nb_procs, code)
52  CALL mpi_comm_rank(communicator, rang, code)
53 
54  np = SIZE(v1_in,1)
55  nb_field = SIZE(v1_in,2) ! Number of fields
56  m_max_c = SIZE(v1_in,3) ! Number of complex (cosines + sines) coefficients per point
57  m_max = m_max_c*nb_procs ! Number of comlex coefficients per point per processor
58  IF (mod(nb_field,2)/=0 .OR. m_max_c==0) THEN
59  CALL error_petsc(.OR.'Bug in FFT_PAR_REAL: MOD(nb_field,2)/=0 m_max_c==0')
60  END IF
61 
62  !===Bloc_size is the number of points that are handled by one processor
63  !===once the Fourier modes are all collected on the processor
64  IF (modulo(np,nb_procs)==0) THEN
65  bloc_size = np/nb_procs
66  ELSE
67  CALL error_petsc('Bug in FFT_PAR_REAL: np is not a multiple of nb_procs')
68  END IF
69 
70  IF (present(opt_nb_plane)) THEN
71  IF (opt_nb_plane> 2*m_max-1) THEN
72  m_max_pad = (opt_nb_plane+1)/2
73  ELSE
74  m_max_pad = m_max
75  END IF
76  ELSE
77  m_max_pad = m_max
78  END IF
79  n_r_pad=2*m_max_pad-1
80 
81  ALLOCATE(cu(m_max_pad,nb_field/2, bloc_size))
82  ALLOCATE(dist_field(m_max_c,nb_field,np))
83  ALLOCATE(combined_field(m_max_c,nb_field,np))
84 
85  DO i = 1, m_max_c
86  dist_field(i,:,:) = transpose(v1_in(:,:,i))
87  END DO
88 
89  longueur_tranche=bloc_size*m_max_c*nb_field
90 
91  mpid=mpi_double_precision
92  CALL mpi_alltoall(dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
93  mpid, communicator, code)
94 
95  cu = 0.d0
96  DO n = 1, bloc_size
97  DO nb = 1, nb_procs
98  shiftc = (nb-1)*bloc_size
99  shiftl = (nb-1)*m_max_c
100  jindex = n + shiftc
101  DO nf = 1, nb_field/2
102  !===Put real and imaginary parts in a complex
103  !===nf=1,2,3 => V1_in
104  !===INPUT ARE COSINE AND SINE COEFFICIENTS
105  !===THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
106  cu(shiftl+1:shiftl+m_max_c,nf,n) = 0.5d0*cmplx(combined_field(:,2*nf-1,jindex),&
107  -combined_field(:,2*nf,jindex),kind=8)
108  END DO
109  END DO
110  END DO
111  cu(1,:,:) = 2*cmplx(REAL(cu(1,:,:),KIND=8),0.d0,kind=8)
112  !===Padding is done by initialization of cu: cu = 0
113  !===This is equivalent to cu(m_max+1:m_max_pad,:,:) = 0.d0
114 
115  !===Set the parameters for dfftw
116  fft_dim = 1
117  istride = 1
118  ostride = 1
119  idist = n_r_pad
120  inembed(1)= n_r_pad
121  dim(1) = n_r_pad
122  odist = m_max_pad
123  onembed(1)= m_max_pad
124  howmany = bloc_size*nb_field/2
125 
126  ! FL-CN possible faute ??? 19/03/2013
127  ! IF (ASSOCIATED(V_out)) NULLIFY(V_out)
128  ! IF (ASSOCIATED(V_out)) DEALLOCATE(V_out)
129  ! pb sur la ligne suivante
130  IF (ALLOCATED(v_out)) DEALLOCATE(v_out)
131  ALLOCATE(v_out(n_r_pad,nb_field/2,bloc_size))
132 
133  CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
134  onembed, ostride, odist, v_out, inembed, istride, idist, fftw_estimate)
135  CALL dfftw_execute(fftw_plan_multi_c2r)
136 
137  ! clean up
138  CALL dfftw_destroy_plan(fftw_plan_multi_c2r)
139 
140  DEALLOCATE(cu, dist_field, combined_field)
141 
142  END SUBROUTINE fft_par_real
143 
144  SUBROUTINE fft_par_cross_prod_dcl(communicator,V1_in, V2_in, V_out, nb_procs, bloc_size, m_max_pad, temps)
145  !This a de-aliased version of the code, FEB 4, 2011, JLG
146  IMPLICIT NONE
147  include 'fftw3.f'
148  ! Format: V_1in(1:np,1:6,1:m_max_c)
149  ! INPUT ARE COSINE AND SINE COEFFICIENTS
150  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
151  REAL(KIND=8), DIMENSION(:,:,:), INTENT(IN) :: v1_in, v2_in
152  REAL(KIND=8), DIMENSION(:,:,:), INTENT(OUT) :: v_out
153  REAL(KIND=8), DIMENSION(:), OPTIONAL, INTENT(INOUT) :: temps
154  INTEGER, INTENT(IN) :: bloc_size, m_max_pad, nb_procs
155  INTEGER :: np, np_tot, nb_field, m_max, m_max_c, mpid, n_r_pad
156  INTEGER(KIND=8) :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
157  COMPLEX(KIND=8), DIMENSION(m_max_pad, SIZE(V1_in,2), bloc_size) :: cu
158  REAL(KIND=8), DIMENSION(2*m_max_pad-1,SIZE(V1_in,2),bloc_size) :: ru
159  COMPLEX(KIND=8), DIMENSION(m_max_pad,SIZE(V1_in,2)/2,bloc_size) :: prod_cu
160  REAL(KIND=8), DIMENSION(2*m_max_pad-1,SIZE(V1_in,2)/2,bloc_size) :: prod_ru
161  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size) :: intermediate
162  REAL(KIND=8), DIMENSION(SIZE(V1_in,3),2*SIZE(V1_in,2),bloc_size*nb_procs) :: dist_field, combined_field
163  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu
164  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: dist_prod_cu
165  INTEGER :: i_field
166  INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code
167  REAL(KIND=8) :: t
168  ! FFTW parameters
169  INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
170  INTEGER, DIMENSION(1) :: dim, inembed, onembed
171  ! Recall complexes must be rescaled
172  ! End FFTW parameters
173 #include "petsc/finclude/petsc.h"
174  mpi_comm :: communicator
175 
176  IF (present(temps)) temps = 0.d0
177 
178  np = SIZE(v1_in,1)
179  nb_field= SIZE(v1_in,2) ! Number of fields
180  m_max_c = SIZE(v1_in,3) ! Number of complex (cosines + sines) coefficients per point
181  m_max = m_max_c*nb_procs! Number of comlex coefficients per point per processor
182  n_r_pad=2*m_max_pad-1
183  np_tot = nb_procs*bloc_size
184 
185  IF (mod(nb_field,2)/=0 .OR. m_max_c==0) THEN
186  WRITE(*,*) ' BUG '
187  stop
188  END IF
189 
190  ! Bloc_size is the number of points that are handled by one processor
191  ! once the Fourier modes are all collected
192  ! Computation of bloc_size and np_tot
193  ! fin de la repartition des points
194 
195  ! Packing all 3 complex components of both v1 and v2 input fields
196  ! into dist_field, where the dimension indexing the nodal points varies the least rapidly,
197  ! so that after distributing the data to the processes, each one will obtain a part
198  ! on nodal points
199  ! TRANSPOSE pr que la variable i associee aux modes soit la 1ere sur laquelle on va faire la FFT
200  t = mpi_wtime()
201 
202  DO i = 1, m_max_c
203  dist_field(i,1:nb_field,1:np) = transpose(v1_in(:,:,i))
204  dist_field(i,nb_field+1:2*nb_field,1:np) = transpose(v2_in(:,:,i))
205  END DO
206  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
207 
208  IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 1.d100
209 
210  longueur_tranche=bloc_size*m_max_c*nb_field*2
211 
212  t = mpi_wtime()
213  mpid=mpi_double_precision
214  CALL mpi_alltoall(dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
215  mpid, communicator, code)
216  IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -t
217 
218  t = mpi_wtime()
219  !JLG, FEB 4, 2011
220  cu = 0.d0
221  !JLG, FEB 4, 2011
222  DO n = 1, bloc_size
223  DO nb = 1, nb_procs
224  shiftc = (nb-1)*bloc_size
225  shiftl = (nb-1)*m_max_c
226  jindex = n + shiftc
227  DO nf = 1, nb_field
228  ! Put real and imaginary parts in a complex
229  ! nf=1,2,3 => V1_in
230  ! nf=4,5,6 => V2_in
231  ! INPUT ARE COSINE AND SINE COEFFICIENTS
232  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
233  cu(shiftl+1:shiftl+m_max_c,nf,n) = 0.5d0*cmplx(combined_field(:,2*nf-1,jindex),&
234  -combined_field(:,2*nf,jindex),kind=8)
235  END DO
236  END DO
237  END DO
238  cu(1,:,:) = 2*cmplx(REAL(cu(1,:,:),KIND=8),0.d0,kind=8)
239  !JLG, FEB 4, 2011
240  !Padding is done by initialization of cu: cu = 0
241  !This is eequivalent to cu(m_max+1:m_max_pad,:,:) = 0.d0
242  !JLG, FEB 4, 2011
243 
244  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
245 
246  ! Set the parameters for dfftw
247  fft_dim=1; istride=1; ostride=1;
248  !JLG, FEB 4, 2011
249 !!$ idist=N_r; inembed(1)=N_r; DIM(1)=N_r
250 !!$ odist=m_max; onembed(1)=m_max
251  idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
252  odist=m_max_pad; onembed(1)=m_max_pad
253  !JLG, FEB 4, 2011
254 
255  howmany=bloc_size*nb_field
256 
257  t = mpi_wtime()
258  CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
259  onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
260  !write(*,*) ' FFT_PAR_CROSS_PROD: fftw_plan_multi_c2r', fftw_plan_multi_c2r
261  CALL dfftw_execute(fftw_plan_multi_c2r)
262 
263  ! clean up
264  CALL dfftw_destroy_plan(fftw_plan_multi_c2r)
265 
266  ! CROSS PRODDUCT
267  IF (nb_field==6) THEN
268  prod_ru(:,1,:) = ru(:,2,:)*ru(:,6,:) - ru(:,3,:)*ru(:,5,:)
269  prod_ru(:,2,:) = ru(:,3,:)*ru(:,4,:) - ru(:,1,:)*ru(:,6,:)
270  prod_ru(:,3,:) = ru(:,1,:)*ru(:,5,:) - ru(:,2,:)*ru(:,4,:)
271  END IF
272  ! CROSS PRODUCT
273 
274  howmany = howmany/2
275  CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru, &
276  inembed, istride, idist, prod_cu, onembed, ostride, odist, fftw_estimate)
277  !write(*,*) ' FFT_PAR_CROSS_PROD: fftw_plan_multi_r2c', fftw_plan_multi_r2c
278  CALL dfftw_execute(fftw_plan_multi_r2c)
279 
280  ! clean up
281  CALL dfftw_destroy_plan(fftw_plan_multi_r2c)
282 
283  !JLG, FEB 4, 2011
284 !!$ prod_cu = prod_cu/N_r !Scaling
285  prod_cu = (1.d0/n_r_pad)*prod_cu !Scaling
286  !JLG, FEB 4, 2011
287  IF (present(temps)) temps(2) = temps(2) + mpi_wtime() -t
288 
289  !Now we need to redistribute the Fourier coefficients on each processor
290 
291  t = mpi_wtime()
292  combined_prod_cu(:,:,1)=prod_cu(1,:,:)
293  DO n=2, m_max
294  !combined_prod_cu(:,:,n)=prod_cu(n,:,:)
295  combined_prod_cu(:,:,n)=2*conjg(prod_cu(n,:,:))
296  END DO
297 
298  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
299 
300  t = mpi_wtime()
301  longueur_tranche=bloc_size*m_max_c*nb_field
302  mpid=mpi_double_precision
303  CALL mpi_alltoall(combined_prod_cu,longueur_tranche,mpid, dist_prod_cu,longueur_tranche, &
304  mpid,communicator,code)
305  IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -t
306  ! dimensions:
307  t = mpi_wtime()
308  DO i = 1, m_max_c
309  DO nb = 1, nb_procs
310  shiftc = (nb-1)*bloc_size
311  shiftl = (nb-1)*m_max_c
312  intermediate = dist_prod_cu(:,:,shiftl+i)
313  DO n = 1, bloc_size
314  IF (n+shiftc > np ) cycle
315  DO i_field = 1, nb_field/2
316  v_out(n+shiftc, i_field*2-1, i) = REAL (intermediate(i_field,n),kind=8)
317  v_out(n+shiftc, i_field*2 , i) = aimag(intermediate(i_field,n))
318  END DO
319  END DO
320  END DO
321  END DO
322 
323  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
324 
325  END SUBROUTINE fft_par_cross_prod_dcl
326 
327  SUBROUTINE fft_par_dot_prod_dcl(communicator,V1_in, V2_in, c_out, nb_procs, bloc_size, m_max_pad, temps)
328  !FFT (FFT(-1) V1 . FFT(-1) V2) = c_out
329  !This a de-aliased version of the code, FEB 4, 2011, JLG
330  IMPLICIT NONE
331  include 'fftw3.f'
332  ! Format: V_1in(1:np,1:6,1:m_max_c)
333  ! INPUT ARE COSINE AND SINE COEFFICIENTS
334  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
335  REAL(KIND=8), DIMENSION(:,:,:), INTENT(IN) :: v1_in, v2_in
336  REAL(KIND=8), DIMENSION(:,:,:), INTENT(OUT) :: c_out
337  REAL(KIND=8), DIMENSION(:), OPTIONAL, INTENT(INOUT) :: temps
338  INTEGER, INTENT(IN) :: nb_procs, bloc_size, m_max_pad
339  COMPLEX(KIND=8), DIMENSION(m_max_pad, SIZE(V1_in,2), bloc_size) :: cu
340  REAL(KIND=8), DIMENSION(2*m_max_pad-1,SIZE(V1_in,2),bloc_size) :: ru
341  COMPLEX(KIND=8), DIMENSION(m_max_pad,bloc_size) :: prod_cu
342  REAL(KIND=8), DIMENSION(2*m_max_pad-1,bloc_size) :: prod_ru
343  COMPLEX(KIND=8), DIMENSION(bloc_size) :: intermediate
344  REAL(KIND=8), DIMENSION(SIZE(V1_in,3),2*SIZE(V1_in,2),bloc_size*nb_procs) :: dist_field, combined_field
345  COMPLEX(KIND=8), DIMENSION(bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu
346  COMPLEX(KIND=8), DIMENSION(bloc_size,SIZE(V1_in,3)*nb_procs) :: dist_prod_cu
347  INTEGER :: np, np_tot, nb_field, m_max, m_max_c, mpid, n_r_pad
348  INTEGER(KIND=8) :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
349  INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code
350  REAL(KIND=8) :: t
351  ! FFTW parameters
352  INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
353  INTEGER, DIMENSION(1) :: dim, inembed, onembed
354  ! Recall complexes must be rescaled
355  ! End FFTW parameters
356 #include "petsc/finclude/petsc.h"
357  mpi_comm :: communicator
358 
359  IF (present(temps)) temps = 0.d0
360 
361  np = SIZE(v1_in,1)
362  nb_field= SIZE(v1_in,2) ! Number of fields
363  m_max_c = SIZE(v1_in,3) ! Number of complex (cosines + sines) coefficients per point
364  m_max = m_max_c*nb_procs! Number of comlex coefficients per point per processor
365  np_tot = nb_procs*bloc_size
366  n_r_pad=2*m_max_pad-1
367 
368  IF (mod(nb_field,2)/=0 .OR. m_max_c==0) THEN
369  WRITE(*,*) ' BUG '
370  stop
371  END IF
372 
373  ! Packing all 3 complex components of both v1 and v2 input fields
374  ! into dist_field, where the dimension indexing the nodal points varies the least rapidly,
375  ! so that after distributing the data to the processes, each one will obtain a part
376  ! on nodal points
377  ! TRANSPOSE pr que la variable i associee aux modes soit la 1ere sur laquelle on va faire la FFT
378  t = mpi_wtime()
379 
380  DO i = 1, m_max_c
381  dist_field(i,1:nb_field,1:np) = transpose(v1_in(:,:,i))
382  dist_field(i,nb_field+1:2*nb_field,1:np) = transpose(v2_in(:,:,i))
383  END DO
384  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
385 
386  IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 1.d100
387 
388  longueur_tranche=bloc_size*m_max_c*nb_field*2
389 
390  t = mpi_wtime()
391  mpid=mpi_double_precision
392  CALL mpi_alltoall(dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
393  mpid, communicator, code)
394  IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -t
395 
396  t = mpi_wtime()
397  !JLG, FEB 4, 2011
398  cu = 0.d0
399  !JLG, FEB 4, 2011
400  DO n = 1, bloc_size
401  DO nb = 1, nb_procs
402  shiftc = (nb-1)*bloc_size
403  shiftl = (nb-1)*m_max_c
404  jindex = n + shiftc
405  DO nf = 1, nb_field
406  ! Put real and imaginary parts in a complex
407  ! nf=1,2,3 => V1_in
408  ! nf=4,5,6 => V2_in
409  ! INPUT ARE COSINE AND SINE COEFFICIENTS
410  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
411  cu(shiftl+1:shiftl+m_max_c,nf,n) = 0.5d0*cmplx(combined_field(:,2*nf-1,jindex),&
412  -combined_field(:,2*nf,jindex),kind=8)
413  END DO
414  END DO
415  END DO
416  cu(1,:,:) = 2*cmplx(REAL(cu(1,:,:),KIND=8),0.d0,kind=8)
417  !JLG, FEB 4, 2011
418  !Padding is done by initialization of cu: cu = 0
419  !This is eequivalent to cu(m_max+1:m_max_pad,:,:) = 0.d0
420  !JLG, FEB 4, 2011
421 
422  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
423 
424  ! Set the parameters for dfftw
425  fft_dim=1; istride=1; ostride=1;
426  !JLG, FEB 4, 2011
427 !!$ idist=N_r; inembed(1)=N_r; DIM(1)=N_r
428 !!$ odist=m_max; onembed(1)=m_max
429  idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
430  odist=m_max_pad; onembed(1)=m_max_pad
431  !JLG, FEB 4, 2011
432 
433  howmany=bloc_size*nb_field
434 
435 
436  t = mpi_wtime()
437  CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
438  onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
439  !write(*,*) ' FFT_PAR_DOT_PROD: fftw_plan_multi_c2r', fftw_plan_multi_c2r
440  CALL dfftw_execute(fftw_plan_multi_c2r)
441 
442  ! clean up
443  CALL dfftw_destroy_plan(fftw_plan_multi_c2r)
444 
445  ! DOT PRODDUCT
446  IF (nb_field==6) THEN
447  prod_ru(:,:) = ru(:,1,:)*ru(:,4,:) + ru(:,2,:)*ru(:,5,:) + ru(:,3,:)*ru(:,6,:)
448  END IF
449  ! DOT PRODUCT
450 
451  howmany = bloc_size*1
452  CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru, &
453  inembed, istride, idist, prod_cu, onembed, ostride, odist, fftw_estimate)
454  !write(*,*) ' FFT_PAR_DOT_PROD: fftw_plan_multi_r2c', fftw_plan_multi_r2c
455  CALL dfftw_execute(fftw_plan_multi_r2c)
456 
457  ! clean up
458  CALL dfftw_destroy_plan(fftw_plan_multi_r2c)
459 
460  !JLG, FEB 4, 2011
461 !!$ prod_cu = prod_cu/N_r !Scaling
462  prod_cu = prod_cu*(1.d0/n_r_pad) !Scaling
463  !JLG, FEB 4, 2011
464  IF (present(temps)) temps(2) = temps(2) + mpi_wtime() -t
465 
466  !Now we need to redistribute the Fourier coefficients on each processor
467  t = mpi_wtime()
468  combined_prod_cu(:,1)=prod_cu(1,:)
469  DO n=2, m_max
470  combined_prod_cu(:,n)=2*conjg(prod_cu(n,:))
471  END DO
472 
473  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
474 
475  t = mpi_wtime()
476  longueur_tranche=bloc_size*m_max_c*2
477  mpid=mpi_double_precision
478  CALL mpi_alltoall(combined_prod_cu,longueur_tranche,mpid, dist_prod_cu,longueur_tranche, &
479  mpid,communicator,code)
480  IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -t
481 
482  t = mpi_wtime()
483  DO i = 1, m_max_c
484  DO nb = 1, nb_procs
485  shiftc = (nb-1)*bloc_size
486  shiftl = (nb-1)*m_max_c
487  intermediate = dist_prod_cu(:,shiftl+i)
488  DO n = 1, bloc_size
489  IF (n+shiftc > np ) cycle
490  c_out(n+shiftc, 1, i) = REAL (intermediate(n),kind=8)
491  c_out(n+shiftc, 2 , i) = aimag(intermediate(n))
492  END DO
493  END DO
494  END DO
495  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
496 
497  END SUBROUTINE fft_par_dot_prod_dcl
498 
499  SUBROUTINE fft_par_prod_dcl(communicator, c1_in, c2_in, c_out, nb_procs, bloc_size, m_max_pad, temps)
500  !FFT (FFT(-1) V1 . FFT(-1) V2) = c_out
501  !This a de-aliased version of the code, FEB 4, 2011, JLG
502  USE my_util
503  IMPLICIT NONE
504  include 'fftw3.f'
505  ! Format: c_1in(1:np,1:2,1:m_max_c)
506  ! INPUT ARE COSINE AND SINE COEFFICIENTS
507  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
508  REAL(KIND=8), DIMENSION(:,:,:), INTENT(IN) :: c1_in, c2_in
509  REAL(KIND=8), DIMENSION(:,:,:), INTENT(OUT) :: c_out
510  REAL(KIND=8), DIMENSION(:), OPTIONAL, INTENT(INOUT) :: temps
511  INTEGER, INTENT(IN) :: nb_procs, bloc_size, m_max_pad
512  COMPLEX(KIND=8), DIMENSION(m_max_pad, 2, bloc_size) :: cu
513  REAL(KIND=8), DIMENSION(2*m_max_pad-1, 2, bloc_size) :: ru
514  COMPLEX(KIND=8), DIMENSION(m_max_pad, bloc_size) :: prod_cu
515  REAL(KIND=8), DIMENSION(2*m_max_pad-1,bloc_size) :: prod_ru
516  REAL(KIND=8), DIMENSION(SIZE(c1_in,3),4, bloc_size*nb_procs) :: dist_field, combined_field
517  COMPLEX(KIND=8), DIMENSION(bloc_size,SIZE(c1_in,3)*nb_procs) :: dist_prod_cu, combined_prod_cu
518  COMPLEX(KIND=8), DIMENSION(bloc_size) :: intermediate
519  INTEGER :: np, np_tot, m_max, m_max_c, mpid, n_r_pad
520  INTEGER(KIND=8) :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
521  INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code
522  REAL(KIND=8) :: t
523  ! FFTW parameters
524  INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
525  INTEGER, DIMENSION(1) :: dim, inembed, onembed
526  ! Recall complexes must be rescaled
527  ! End FFTW parameters
528 #include "petsc/finclude/petsc.h"
529  mpi_comm :: communicator
530 
531  IF (present(temps)) temps = 0.d0
532 
533  np = SIZE(c1_in,1)
534  m_max_c = SIZE(c1_in,3) ! Number of complex (cosines + sines) coefficients per point
535  m_max = m_max_c*nb_procs! Number of comlex coefficients per point per processor
536  np_tot = nb_procs*bloc_size
537  n_r_pad=2*m_max_pad-1
538 
539  IF (m_max_c==0) THEN
540  WRITE(*,*) ' BUG '
541  stop
542  END IF
543 
544  ! Packing all 3 complex components of both v1 and v2 input fields
545  ! into dist_field, where the dimension indexing the nodal points varies the least rapidly,
546  ! so that after distributing the data to the processes, each one will obtain a part
547  ! on nodal points
548  ! TRANSPOSE pr que la variable i associee aux modes soit la 1ere sur laquelle on va faire la FFT
549  t = mpi_wtime()
550  DO i = 1, m_max_c
551  dist_field(i,1:2,1:np) = transpose(c1_in(:,:,i))
552  dist_field(i,3:4,1:np) = transpose(c2_in(:,:,i))
553  END DO
554  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
555 
556  IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 1.d100
557 
558  longueur_tranche=bloc_size*m_max_c*4
559 
560  t = mpi_wtime()
561  mpid=mpi_double_precision
562  CALL mpi_alltoall(dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
563  mpid, communicator, code)
564  IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -t
565 
566  t = mpi_wtime()
567  !JLG, FEB 4, 2011
568  cu = 0.d0
569  !JLG, FEB 4, 2011
570  DO n = 1, bloc_size
571  DO nb = 1, nb_procs
572  shiftc = (nb-1)*bloc_size
573  shiftl = (nb-1)*m_max_c
574  jindex = n + shiftc
575  DO nf = 1, 2
576  ! Put real and imaginary parts in a complex
577  ! nf=1 => c1_in
578  ! nf=2 => c2_in
579  ! INPUT ARE COSINE AND SINE COEFFICIENTS
580  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
581  cu(shiftl+1:shiftl+m_max_c,nf,n) = 0.5d0*cmplx(combined_field(:,2*nf-1,jindex),&
582  -combined_field(:,2*nf,jindex),kind=8)
583  END DO
584  END DO
585  END DO
586  cu(1,:,:) = 2*cmplx(REAL(cu(1,:,:),KIND=8),0.d0,kind=8)
587  !JLG, FEB 4, 2011
588  !Padding is done by initialization of cu: cu = 0
589  !This is eequivalent to cu(m_max+1:m_max_pad,:,:) = 0.d0
590  !JLG, FEB 4, 2011
591 
592  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
593 
594  ! Set the parameters for dfftw
595  fft_dim=1; istride=1; ostride=1;
596  !JLG, FEB 4, 2011
597 !!$ idist=N_r; inembed(1)=N_r; DIM(1)=N_r
598 !!$ odist=m_max; onembed(1)=m_max
599  idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
600  odist=m_max_pad; onembed(1)=m_max_pad
601  !JLG, FEB 4, 2011
602 
603  howmany=bloc_size*2
604 
605  t = mpi_wtime()
606  CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
607  onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
608  CALL dfftw_execute(fftw_plan_multi_c2r)
609 
610  ! clean up
611  CALL dfftw_destroy_plan(fftw_plan_multi_c2r)
612 
613  !PRODDUCT
614  prod_ru(:,:) = ru(:,1,:)*ru(:,2,:)
615  !PRODUCT
616 
617  howmany = bloc_size*1
618  CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru, &
619  inembed, istride, idist, prod_cu, onembed, ostride, odist, fftw_estimate)
620  CALL dfftw_execute(fftw_plan_multi_r2c)
621 
622  ! clean up
623  CALL dfftw_destroy_plan(fftw_plan_multi_r2c)
624 
625  !JLG, FEB 4, 2011
626 !!$ prod_cu = prod_cu/N_r !Scaling
627  prod_cu = prod_cu*(1.d0/n_r_pad) !Scaling
628  !JLG, FEB 4, 2011
629  IF (present(temps)) temps(2) = temps(2) + mpi_wtime() -t
630 
631  !Now we need to redistribute the Fourier coefficients on each processor
632  t = mpi_wtime()
633  combined_prod_cu(:,1)=prod_cu(1,:)
634  DO n=2, m_max
635  combined_prod_cu(:,n)=2*conjg(prod_cu(n,:))
636  END DO
637 
638  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
639 
640  t = mpi_wtime()
641  longueur_tranche=bloc_size*m_max_c*2
642  mpid=mpi_double_precision
643  CALL mpi_alltoall(combined_prod_cu,longueur_tranche,mpid, dist_prod_cu,longueur_tranche, &
644  mpid,communicator,code)
645  IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -t
646 
647  t = mpi_wtime()
648  DO i = 1, m_max_c
649  DO nb = 1, nb_procs
650  shiftc = (nb-1)*bloc_size
651  shiftl = (nb-1)*m_max_c
652  intermediate = dist_prod_cu(:,shiftl+i)
653  DO n = 1, bloc_size
654  IF (n+shiftc > np ) cycle
655  c_out(n+shiftc, 1, i) = REAL (intermediate(n),kind=8)
656  c_out(n+shiftc, 2 , i) = aimag(intermediate(n))
657  END DO
658  END DO
659  END DO
660  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
661 
662  END SUBROUTINE fft_par_prod_dcl
663 
664  SUBROUTINE fft_heaviside_dcl(communicator, V1_in, values, V_out, nb_procs, bloc_size, &
665  m_max_pad, coeff_tanh, temps)
666  !This a de-aliased version of the code, FEB 4, 2011, JLG
667  IMPLICIT NONE
668  include 'fftw3.f'
669  ! Format: V_1in(1:np,1:6,1:m_max_c)
670  ! INPUT ARE COSINE AND SINE COEFFICIENTS
671  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
672  REAL(KIND=8), DIMENSION(:,:,:,:),INTENT(IN) :: v1_in
673  REAL(KIND=8), DIMENSION(:), INTENT(IN) :: values
674  REAL(KIND=8), INTENT(IN) :: coeff_tanh
675  REAL(KIND=8), DIMENSION(:,:,:), INTENT(OUT) :: v_out
676  REAL(KIND=8), DIMENSION(:), OPTIONAL, INTENT(INOUT) :: temps
677  INTEGER, INTENT(IN) :: bloc_size, m_max_pad, nb_procs
678  INTEGER :: np, np_tot, nb_field, m_max, m_max_c, mpid, n_r_pad
679  INTEGER(KIND=8) :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
680  COMPLEX(KIND=8), DIMENSION(m_max_pad, bloc_size) :: cu
681  REAL(KIND=8), DIMENSION(2*m_max_pad-1, bloc_size) :: ru
682  COMPLEX(KIND=8), DIMENSION(m_max_pad,bloc_size) :: prod_cu
683  REAL(KIND=8), DIMENSION(2*m_max_pad-1,bloc_size) :: prod_ru
684  COMPLEX(KIND=8), DIMENSION(bloc_size) :: intermediate
685  REAL(KIND=8), DIMENSION(SIZE(V1_in,4),SIZE(V1_in,3),bloc_size*nb_procs) :: dist_field, combined_field
686  COMPLEX(KIND=8), DIMENSION(bloc_size,SIZE(V1_in,4)*nb_procs) :: combined_prod_cu
687  COMPLEX(KIND=8), DIMENSION(bloc_size,SIZE(V1_in,4)*nb_procs) :: dist_prod_cu
688  REAL(KIND=8), DIMENSION(SIZE(values),2*m_max_pad-1, bloc_size) :: v1_real_loc
689  INTEGER :: n1, n2
690  INTEGER :: nb, shiftc, shiftl, jindex, longueur_tranche, i, n, code, interface_nb
691  REAL(KIND=8) :: t
692  ! FFTW parameters
693  INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
694  INTEGER, DIMENSION(1) :: dim, inembed, onembed
695  ! Recall complexes must be rescaled
696  ! End FFTW parameters
697 #include "petsc/finclude/petsc.h"
698  mpi_comm :: communicator
699 
700  IF (present(temps)) temps = 0.d0
701 
702  np = SIZE(v1_in,2)
703  nb_field= SIZE(v1_in,3) ! Number of fields
704  m_max_c = SIZE(v1_in,4) ! Number of complex (cosines + sines) coefficients per point
705  m_max = m_max_c*nb_procs! Number of comlex coefficients per point per processor
706  n_r_pad=2*m_max_pad-1
707  np_tot = nb_procs*bloc_size
708 
709  IF (mod(nb_field,2)/=0 .OR. m_max_c==0) THEN
710  WRITE(*,*) ' BUG '
711  stop
712  END IF
713 
714  ! Bloc_size is the number of points that are handled by one processor
715  ! once the Fourier modes are all collected
716  ! Computation of bloc_size and np_tot
717  ! fin de la repartition des points
718 
719  ! Packing all 3 complex components of both v1 and v2 input fields
720  ! into dist_field, where the dimension indexing the nodal points varies the least rapidly,
721  ! so that after distributing the data to the processes, each one will obtain a part
722  ! on nodal points
723  ! TRANSPOSE pr que la variable i associee aux modes soit la 1ere sur laquelle on va faire la FFT
724  t = mpi_wtime()
725 
726  DO interface_nb = 1, SIZE(values)-1
727  dist_field = 0.d0
728  DO i = 1, m_max_c
729  dist_field(i,1:nb_field,1:np) = transpose(v1_in(interface_nb,:,:,i))
730  END DO
731  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
732 
733  IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 1.d100
734 
735  longueur_tranche=bloc_size*m_max_c*nb_field
736 
737  t = mpi_wtime()
738  mpid=mpi_double_precision
739  CALL mpi_alltoall(dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
740  mpid, communicator, code)
741 
742  IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -t
743 
744  t = mpi_wtime()
745  !JLG, FEB 4, 2011
746  cu = 0.d0
747  !JLG, FEB 4, 2011
748  DO n = 1, bloc_size
749  DO nb = 1, nb_procs
750  shiftc = (nb-1)*bloc_size
751  shiftl = (nb-1)*m_max_c
752  jindex = n + shiftc
753  ! Put real and imaginary parts in a complex
754  ! nf=1,2,3 => V1_in
755  ! nf=4,5,6 => V2_in
756  ! INPUT ARE COSINE AND SINE COEFFICIENTS
757  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
758  cu(shiftl+1:shiftl+m_max_c,n) = 0.5d0*cmplx(combined_field(:,1,jindex),&
759  -combined_field(:,2,jindex),kind=8)
760  END DO
761  END DO
762  cu(1,:) = 2*cmplx(REAL(cu(1,:),KIND=8),0.d0,kind=8)
763 
764 
765  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
766 
767  ! Set the parameters for dfftw
768  fft_dim=1; istride=1; ostride=1;
769  !JLG, FEB 4, 2011
770 !!$ idist=N_r; inembed(1)=N_r; DIM(1)=N_r
771 !!$ odist=m_max; onembed(1)=m_max
772  idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
773  odist=m_max_pad; onembed(1)=m_max_pad
774  !JLG, FEB 4, 2011
775 
776  howmany=bloc_size*nb_field/2
777 
778  t = mpi_wtime()
779  CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
780  onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
781  !write(*,*) ' FFT_PAR_CROSS_PROD: fftw_plan_multi_c2r', fftw_plan_multi_c2r
782  CALL dfftw_execute(fftw_plan_multi_c2r)
783 
784  ! clean up
785  CALL dfftw_destroy_plan(fftw_plan_multi_c2r)
786  v1_real_loc(interface_nb,:,:)=ru
787  END DO
788 
789  ! CROSS PRODDUCT
790  IF (nb_field==2) THEN
791  DO n1 = 1, 2*m_max_pad-1
792  DO n2 = 1, bloc_size
793  prod_ru(n1,n2) = values(1)
794  DO interface_nb = 1, SIZE(values)-1
795  !reconstruction convex
796  prod_ru(n1,n2) = prod_ru(n1,n2)*(1.d0-regul(v1_real_loc(interface_nb,n1,n2),coeff_tanh)) + &
797  values(interface_nb+1)*regul(v1_real_loc(interface_nb,n1,n2),coeff_tanh)
798  END DO
799  END DO
800  END DO
801  END IF
802  ! CROSS PRODUCT
803 
804  howmany = howmany
805  CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru, &
806  inembed, istride, idist, prod_cu, onembed, ostride, odist, fftw_estimate)
807  !write(*,*) ' FFT_PAR_CROSS_PROD: fftw_plan_multi_r2c', fftw_plan_multi_r2c
808  CALL dfftw_execute(fftw_plan_multi_r2c)
809 
810  ! clean up
811  CALL dfftw_destroy_plan(fftw_plan_multi_r2c)
812 
813  !JLG, FEB 4, 2011
814 !!$ prod_cu = prod_cu/N_r !Scaling
815  prod_cu = prod_cu*(1.d0/n_r_pad) !Scaling
816  !JLG, FEB 4, 2011
817  IF (present(temps)) temps(2) = temps(2) + mpi_wtime() -t
818  !Now we need to redistribute the Fourier coefficients on each processor
819 
820  t = mpi_wtime()
821  combined_prod_cu(:,1)=prod_cu(1,:)
822  DO n=2, m_max
823  !combined_prod_cu(:,:,n)=prod_cu(n,:,:)
824  combined_prod_cu(:,n)=2*conjg(prod_cu(n,:))
825  END DO
826 
827  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
828 
829  t = mpi_wtime()
830  longueur_tranche=bloc_size*m_max_c*nb_field
831  mpid=mpi_double_precision
832  CALL mpi_alltoall(combined_prod_cu,longueur_tranche,mpid, dist_prod_cu,longueur_tranche, &
833  mpid,communicator,code)
834  IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -t
835  ! dimensions:
836  t = mpi_wtime()
837  DO i = 1, m_max_c
838  DO nb = 1, nb_procs
839  shiftc = (nb-1)*bloc_size
840  shiftl = (nb-1)*m_max_c
841  intermediate = dist_prod_cu(:,shiftl+i)
842  DO n = 1, bloc_size
843  IF (n+shiftc > np ) cycle
844  v_out(n+shiftc, 1, i) = REAL (intermediate(n),kind=8)
845  v_out(n+shiftc, 2, i) = aimag(intermediate(n))
846  END DO
847  END DO
848  END DO
849  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
850 
851  END SUBROUTINE fft_heaviside_dcl
852 
853  SUBROUTINE fft_scalar_vect_dcl(communicator,V1_in, V2_in, V_out, pb, nb_procs, &
854  bloc_size, m_max_pad, temps)
855  !This a de-aliased version of the code, FEB 4, 2011, JLG
856  USE my_util
857  IMPLICIT NONE
858  include 'fftw3.f'
859  ! Format: V_1in(1:np,1:6,1:m_max_c)
860  ! INPUT ARE COSINE AND SINE COEFFICIENTS
861  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
862  REAL(KIND=8), DIMENSION(:,:,:), INTENT(IN) :: v1_in ! VECTOR
863  REAL(KIND=8), DIMENSION(:,:,:), INTENT(IN) :: v2_in ! SCALAR
864  REAL(KIND=8), DIMENSION(:,:,:), INTENT(OUT) :: v_out
865  REAL(KIND=8), DIMENSION(:), OPTIONAL, INTENT(INOUT) :: temps
866  INTEGER, INTENT(IN) :: bloc_size, m_max_pad, nb_procs
867  INTEGER, INTENT(IN) :: pb
868  INTEGER :: np, np_tot, nb_field1, nb_field2, m_max, m_max_c, mpid, n_r_pad
869  INTEGER(KIND=8) :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
870  COMPLEX(KIND=8), DIMENSION(m_max_pad, (SIZE(V1_in,2)+SIZE(V2_in,2))/2, bloc_size) :: cu
871  REAL(KIND=8), DIMENSION(2*m_max_pad-1,(SIZE(V1_in,2)+SIZE(V2_in,2))/2,bloc_size) :: ru
872  COMPLEX(KIND=8), DIMENSION(m_max_pad,SIZE(V1_in,2)/2,bloc_size) :: prod_cu
873  REAL(KIND=8), DIMENSION(2*m_max_pad-1,SIZE(V1_in,2)/2,bloc_size) :: prod_ru
874  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2, bloc_size) :: intermediate
875  REAL(KIND=8), DIMENSION(SIZE(V1_in,3),SIZE(V1_in,2)+SIZE(V2_in,2),bloc_size*nb_procs) :: dist_field
876  REAL(KIND=8), DIMENSION(SIZE(V1_in,3),SIZE(V1_in,2)+SIZE(V2_in,2),bloc_size*nb_procs) :: combined_field
877  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu
878  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: dist_prod_cu
879  INTEGER :: i_field
880  INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code
881  REAL(KIND=8) :: t
882  ! FFTW parameters
883  INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
884  INTEGER, DIMENSION(1) :: dim, inembed, onembed
885  ! Recall complexes must be rescaled
886  ! End FFTW parameters
887 #include "petsc/finclude/petsc.h"
888  mpi_comm :: communicator
889 
890  IF (present(temps)) temps = 0.d0
891 
892  np = SIZE(v1_in,1)
893  nb_field1= SIZE(v1_in,2) ! Number of fields
894  nb_field2= SIZE(v2_in,2) ! Number of fields
895  m_max_c = SIZE(v1_in,3) ! Number of complex (cosines + sines) coefficients per point
896  m_max = m_max_c*nb_procs! Number of comlex coefficients per point per processor
897  n_r_pad=2*m_max_pad-1
898  np_tot = nb_procs*bloc_size
899 
900  IF (mod(nb_field1,2)/=0 .OR. mod(nb_field2,2)/=0 .OR. m_max_c==0) THEN
901  WRITE(*,*) ' BUG '
902  stop
903  END IF
904 
905  ! Bloc_size is the number of points that are handled by one processor
906  ! once the Fourier modes are all collected
907  ! Computation of bloc_size and np_tot
908  ! fin de la repartition des points
909 
910  ! Packing all 3 complex components of both v1 and v2 input fields
911  ! into dist_field, where the dimension indexing the nodal points varies the least rapidly,
912  ! so that after distributing the data to the processes, each one will obtain a part
913  ! on nodal points
914  ! TRANSPOSE pr que la variable i associee aux modes soit la 1ere sur laquelle on va faire la FFT
915  t = mpi_wtime()
916 
917  DO i = 1, m_max_c
918  dist_field(i,1:nb_field1,1:np) = transpose(v1_in(:,:,i))
919  dist_field(i,nb_field1+1:nb_field1+nb_field2,1:np) = transpose(v2_in(:,:,i))
920  END DO
921  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
922 
923  IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 1.d100
924 
925  longueur_tranche=bloc_size*m_max_c*(nb_field1+nb_field2)
926 
927  t = mpi_wtime()
928  mpid=mpi_double_precision
929  CALL mpi_alltoall(dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
930  mpid, communicator, code)
931  IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -t
932 
933  t = mpi_wtime()
934  !JLG, FEB 4, 2011
935  cu = 0.d0
936  !JLG, FEB 4, 2011
937  DO n = 1, bloc_size
938  DO nb = 1, nb_procs
939  shiftc = (nb-1)*bloc_size
940  shiftl = (nb-1)*m_max_c
941  jindex = n + shiftc
942  DO nf = 1, (nb_field1+nb_field2)/2
943  ! Put real and imaginary parts in a complex
944  ! nf=1,2,3 => V1_in
945  ! nf=4 => V2_in
946  ! INPUT ARE COSINE AND SINE COEFFICIENTS
947  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
948  cu(shiftl+1:shiftl+m_max_c,nf,n) = 0.5d0*cmplx(combined_field(:,2*nf-1,jindex),&
949  -combined_field(:,2*nf,jindex),kind=8)
950  END DO
951  END DO
952  END DO
953  cu(1,:,:) = 2*cmplx(REAL(cu(1,:,:),KIND=8),0.d0,kind=8)
954  !JLG, FEB 4, 2011
955  !Padding is done by initialization of cu: cu = 0
956  !This is eequivalent to cu(m_max+1:m_max_pad,:,:) = 0.d0
957  !JLG, FEB 4, 2011
958 
959  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
960 
961  ! Set the parameters for dfftw
962  fft_dim=1; istride=1; ostride=1;
963  !JLG, FEB 4, 2011
964 !!$ idist=N_r; inembed(1)=N_r; DIM(1)=N_r
965 !!$ odist=m_max; onembed(1)=m_max
966  idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
967  odist=m_max_pad; onembed(1)=m_max_pad
968  !JLG, FEB 4, 2011
969 
970  howmany=bloc_size*(nb_field1+nb_field2)/2
971 
972  t = mpi_wtime()
973  CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
974  onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
975  !write(*,*) ' FFT_PAR_CROSS_PROD: fftw_plan_multi_c2r', fftw_plan_multi_c2r
976  CALL dfftw_execute(fftw_plan_multi_c2r)
977 
978  ! clean up
979  CALL dfftw_destroy_plan(fftw_plan_multi_c2r)
980 
981  IF (nb_field1 == 6 .AND. nb_field2 == 2) THEN
982  IF (pb==1) THEN
983  prod_ru(:,1,:) = ru(:,4,:)*ru(:,1,:)
984  prod_ru(:,2,:) = ru(:,4,:)*ru(:,2,:)
985  prod_ru(:,3,:) = ru(:,4,:)*ru(:,3,:)
986  ELSE IF (pb==2) THEN
987  prod_ru(:,1,:) = 1/ru(:,4,:)*ru(:,1,:)
988  prod_ru(:,2,:) = 1/ru(:,4,:)*ru(:,2,:)
989  prod_ru(:,3,:) = 1/ru(:,4,:)*ru(:,3,:)
990  ELSE
991  CALL error_petsc('error in problem type while calling FFT_SCALAR_VECT_DCL ')
992  END IF
993  ELSE
994  CALL error_petsc('error in dimension of INPUT variables while calling FFT_SCALAR_VECT_DCL ')
995  END IF
996 
997  howmany = bloc_size*nb_field1/2
998 
999  CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru, &
1000  inembed, istride, idist, prod_cu, onembed, ostride, odist, fftw_estimate)
1001  !write(*,*) ' FFT_PAR_CROSS_PROD: fftw_plan_multi_r2c', fftw_plan_multi_r2c
1002  CALL dfftw_execute(fftw_plan_multi_r2c)
1003  ! clean up
1004  CALL dfftw_destroy_plan(fftw_plan_multi_r2c)
1005 
1006 
1007 
1008  !JLG, FEB 4, 2011
1009 !!$ prod_cu = prod_cu/N_r !Scaling
1010  prod_cu = prod_cu*(1.d0/n_r_pad) !Scaling
1011  !JLG, FEB 4, 2011
1012  IF (present(temps)) temps(2) = temps(2) + mpi_wtime() -t
1013 
1014  !Now we need to redistribute the Fourier coefficients on each processor
1015 
1016  t = mpi_wtime()
1017  combined_prod_cu(:,:,1)=prod_cu(1,:,:)
1018  DO n=2, m_max
1019  !combined_prod_cu(:,:,n)=prod_cu(n,:,:)
1020  combined_prod_cu(:,:,n)=2*conjg(prod_cu(n,:,:))
1021  END DO
1022 
1023  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
1024 
1025  longueur_tranche=bloc_size*m_max_c*nb_field1
1026 
1027  t = mpi_wtime()
1028  mpid=mpi_double_precision
1029  CALL mpi_alltoall(combined_prod_cu,longueur_tranche,mpid, dist_prod_cu,longueur_tranche, &
1030  mpid,communicator,code)
1031 
1032  IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -t
1033  ! dimensions:
1034  t = mpi_wtime()
1035 
1036  DO i = 1, m_max_c
1037  DO nb = 1, nb_procs
1038  shiftc = (nb-1)*bloc_size
1039  shiftl = (nb-1)*m_max_c
1040  intermediate = dist_prod_cu(:,:,shiftl+i)
1041  DO n = 1, bloc_size
1042  IF (n+shiftc > np ) cycle
1043  DO i_field = 1, nb_field1/2
1044  v_out(n+shiftc, i_field*2-1, i) = REAL (intermediate(i_field,n),kind=8)
1045  v_out(n+shiftc, i_field*2 , i) = aimag(intermediate(i_field,n))
1046  END DO
1047  END DO
1048  END DO
1049  END DO
1050  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
1051 
1052  END SUBROUTINE fft_scalar_vect_dcl
1053 
1054  SUBROUTINE fft_max_vel_dcl(communicator, V1_in, V_out, nb_procs, bloc_size, m_max_pad)
1055  !This a de-aliased version of the code, FEB 4, 2011, JLG
1056  USE my_util
1057  IMPLICIT NONE
1058  include 'fftw3.f'
1059  ! Format: V_1in(1:np,1:6,1:m_max_c)
1060  ! INPUT ARE COSINE AND SINE COEFFICIENTS
1061  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
1062  REAL(KIND=8), DIMENSION(:,:,:), INTENT(IN) :: v1_in ! VECTOR
1063  REAL(KIND=8), INTENT(OUT) :: v_out
1064  INTEGER, INTENT(IN) :: bloc_size, m_max_pad, nb_procs
1065  INTEGER :: np, np_tot, nb_field1, m_max, m_max_c, mpid, n_r_pad
1066  INTEGER(KIND=8) :: fftw_plan_multi_c2r
1067  COMPLEX(KIND=8), DIMENSION(m_max_pad, SIZE(V1_in,2)/2, bloc_size) :: cu
1068  REAL(KIND=8), DIMENSION(2*m_max_pad-1, SIZE(V1_in,2)/2,bloc_size) :: ru
1069  REAL(KIND=8), DIMENSION(SIZE(V1_in,3),SIZE(V1_in,2),bloc_size*nb_procs) :: dist_field, combined_field
1070  INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code
1071  REAL(KIND=8), DIMENSION(2*m_max_pad-1, bloc_size) :: norm_vel_loc
1072  REAL(KIND=8) :: max_velocity
1073  ! FFTW parameters
1074  INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
1075  INTEGER, DIMENSION(1) :: dim, inembed, onembed
1076  ! Recall complexes must be rescaled
1077  ! End FFTW parameters
1078 #include "petsc/finclude/petsc.h"
1079  mpi_comm :: communicator
1080 
1081  np = SIZE(v1_in,1)
1082  nb_field1 = SIZE(v1_in,2) ! Number of fields
1083  m_max_c = SIZE(v1_in,3) ! Number of complex (cosines + sines) coefficients per point
1084  m_max = m_max_c*nb_procs! Number of comlex coefficients per point per processor
1085  n_r_pad = 2*m_max_pad-1
1086  np_tot = nb_procs*bloc_size
1087 
1088  IF (mod(nb_field1,2)/=0 .OR. m_max_c==0) THEN
1089  WRITE(*,*) ' BUG '
1090  stop
1091  END IF
1092 
1093  ! Bloc_size is the number of points that are handled by one processor
1094  ! once the Fourier modes are all collected
1095  ! Computation of bloc_size and np_tot
1096  ! fin de la repartition des points
1097 
1098  ! Packing all 3 complex components of both v1 and v2 input fields
1099  ! into dist_field, where the dimension indexing the nodal points varies the least rapidly,
1100  ! so that after distributing the data to the processes, each one will obtain a part
1101  ! on nodal points
1102  ! TRANSPOSE pr que la variable i associee aux modes soit la 1ere sur laquelle on va faire la FFT
1103 
1104  DO i = 1, m_max_c
1105  dist_field(i,1:nb_field1,1:np) = transpose(v1_in(:,:,i))
1106  END DO
1107 
1108  IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 0.d0
1109 
1110  longueur_tranche=bloc_size*m_max_c*nb_field1
1111 
1112  mpid=mpi_double_precision
1113  CALL mpi_alltoall(dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
1114  mpid, communicator, code)
1115 
1116  !JLG, FEB 4, 2011
1117  cu = 0.d0
1118  !JLG, FEB 4, 2011
1119  DO n = 1, bloc_size
1120  DO nb = 1, nb_procs
1121  shiftc = (nb-1)*bloc_size
1122  shiftl = (nb-1)*m_max_c
1123  jindex = n + shiftc
1124  DO nf = 1, nb_field1/2
1125  ! Put real and imaginary parts in a complex
1126  ! nf=1,2,3 => V1_in
1127  ! nf=4 => V2_in
1128  ! INPUT ARE COSINE AND SINE COEFFICIENTS
1129  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
1130  cu(shiftl+1:shiftl+m_max_c,nf,n) = 0.5d0*cmplx(combined_field(:,2*nf-1,jindex),&
1131  -combined_field(:,2*nf,jindex),kind=8)
1132  END DO
1133  END DO
1134  END DO
1135  cu(1,:,:) = 2*cmplx(REAL(cu(1,:,:),KIND=8),0.d0,kind=8)
1136  !JLG, FEB 4, 2011
1137  !Padding is done by initialization of cu: cu = 0
1138  !This is eequivalent to cu(m_max+1:m_max_pad,:,:) = 0.d0
1139  !JLG, FEB 4, 2011
1140 
1141  ! Set the parameters for dfftw
1142  fft_dim=1; istride=1; ostride=1;
1143  !JLG, FEB 4, 2011
1144 !!$ idist=N_r; inembed(1)=N_r; DIM(1)=N_r
1145 !!$ odist=m_max; onembed(1)=m_max
1146  idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
1147  odist=m_max_pad; onembed(1)=m_max_pad
1148  !JLG, FEB 4, 2011
1149 
1150  howmany=bloc_size*nb_field1/2
1151 
1152  CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
1153  onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
1154  !write(*,*) ' FFT_PAR_CROSS_PROD: fftw_plan_multi_c2r', fftw_plan_multi_c2r
1155  CALL dfftw_execute(fftw_plan_multi_c2r)
1156 
1157  ! clean up
1158  CALL dfftw_destroy_plan(fftw_plan_multi_c2r)
1159 
1160 
1161  norm_vel_loc(:,:) = sqrt(ru(:,1,:)**2 + ru(:,2,:)**2 + ru(:,3,:)**2)
1162  max_velocity = maxval(norm_vel_loc)
1163  CALL mpi_allreduce(max_velocity, v_out, 1, mpi_double_precision, &
1164  mpi_max, communicator, code)
1165 
1166  END SUBROUTINE fft_max_vel_dcl
1167 
1168  SUBROUTINE fft_tensor_dcl(communicator,V1_in, V2_in, V_out, nb_procs, bloc_size, m_max_pad, temps, opt_tension)
1169  !This a de-aliased version of the code, FEB 4, 2011, JLG
1170  USE my_util
1171  IMPLICIT NONE
1172  include 'fftw3.f'
1173  ! Format: V_1in(1:np,1:6,1:m_max_c)
1174  ! INPUT ARE COSINE AND SINE COEFFICIENTS
1175  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
1176  REAL(KIND=8), DIMENSION(:,:,:), INTENT(IN) :: v1_in, v2_in
1177  REAL(KIND=8), DIMENSION(:,:,:,:), INTENT(OUT) :: v_out
1178  REAL(KIND=8), DIMENSION(:), OPTIONAL, INTENT(INOUT) :: temps
1179  LOGICAL, OPTIONAL, INTENT(IN) :: opt_tension
1180  INTEGER, INTENT(IN) :: bloc_size, m_max_pad, nb_procs
1181  INTEGER :: np, np_tot, nb_field, m_max, m_max_c, mpid, n_r_pad
1182  INTEGER(KIND=8) :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
1183  COMPLEX(KIND=8), DIMENSION(m_max_pad, SIZE(V1_in,2), bloc_size) :: cu
1184  REAL(KIND=8), DIMENSION(2*m_max_pad-1,SIZE(V1_in,2),bloc_size) :: ru
1185  COMPLEX(KIND=8), DIMENSION(m_max_pad,SIZE(V1_in,2)/2,bloc_size) :: prod_cu1, prod_cu2, prod_cu3
1186  REAL(KIND=8), DIMENSION(2*m_max_pad-1,SIZE(V1_in,2)/2,bloc_size) :: prod_ru1, prod_ru2, prod_ru3
1187  REAL(KIND=8), DIMENSION(SIZE(V1_in,3),2*SIZE(V1_in,2),bloc_size*nb_procs) :: dist_field
1188  REAL(KIND=8), DIMENSION(SIZE(V1_in,3),2*SIZE(V1_in,2),bloc_size*nb_procs) :: combined_field
1189  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu1
1190  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu2
1191  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu3
1192  COMPLEX(KIND=8), DIMENSION(3,SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu_tot
1193  COMPLEX(KIND=8), DIMENSION(3,SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: dist_prod_cu_tot
1194  COMPLEX(KIND=8), DIMENSION(3,SIZE(V1_in,2)/2,bloc_size) :: intermediate_tot
1195  REAL(KIND=8), DIMENSION(2*m_max_pad-1,bloc_size) :: norm_grad_tension
1196  INTEGER :: i_field
1197  INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code
1198  REAL(KIND=8) :: t
1199  ! FFTW parameters
1200  INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
1201  INTEGER, DIMENSION(1) :: dim, inembed, onembed
1202  ! Recall complexes must be rescaled
1203  ! End FFTW parameters
1204 #include "petsc/finclude/petsc.h"
1205  mpi_comm :: communicator
1206 
1207  IF (present(temps)) temps = 0.d0
1208 
1209  np = SIZE(v1_in,1)
1210  nb_field= SIZE(v1_in,2) ! Number of fields
1211  m_max_c = SIZE(v1_in,3) ! Number of complex (cosines + sines) coefficients per point
1212  m_max = m_max_c*nb_procs! Number of comlex coefficients per point per processor
1213  n_r_pad=2*m_max_pad-1
1214  np_tot = nb_procs*bloc_size
1215 
1216  IF (mod(nb_field,2)/=0 .OR. m_max_c==0) THEN
1217  WRITE(*,*) ' BUG '
1218  stop
1219  END IF
1220 
1221  ! Bloc_size is the number of points that are handled by one processor
1222  ! once the Fourier modes are all collected
1223  ! Computation of bloc_size and np_tot
1224  ! fin de la repartition des points
1225 
1226  ! Packing all 3 complex components of both v1 and v2 input fields
1227  ! into dist_field, where the dimension indexing the nodal points varies the least rapidly,
1228  ! so that after distributing the data to the processes, each one will obtain a part
1229  ! on nodal points
1230  ! TRANSPOSE pr que la variable i associee aux modes soit la 1ere sur laquelle on va faire la FFT
1231  t = mpi_wtime()
1232 
1233  DO i = 1, m_max_c
1234  dist_field(i,1:nb_field,1:np) = transpose(v1_in(:,:,i))
1235  dist_field(i,nb_field+1:2*nb_field,1:np) = transpose(v2_in(:,:,i))
1236  END DO
1237  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
1238 
1239  IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 1.d100
1240 
1241  longueur_tranche=bloc_size*m_max_c*nb_field*2
1242 
1243  t = mpi_wtime()
1244  mpid=mpi_double_precision
1245  CALL mpi_alltoall(dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
1246  mpid, communicator, code)
1247  IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -t
1248 
1249  t = mpi_wtime()
1250  !JLG, FEB 4, 2011
1251  cu = 0.d0
1252  !JLG, FEB 4, 2011
1253  DO n = 1, bloc_size
1254  DO nb = 1, nb_procs
1255  shiftc = (nb-1)*bloc_size
1256  shiftl = (nb-1)*m_max_c
1257  jindex = n + shiftc
1258  DO nf = 1, nb_field
1259  ! Put real and imaginary parts in a complex
1260  ! nf=1,2,3 => V1_in
1261  ! nf=4,5,6 => V2_in
1262  ! INPUT ARE COSINE AND SINE COEFFICIENTS
1263  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
1264  cu(shiftl+1:shiftl+m_max_c,nf,n) = 0.5d0*cmplx(combined_field(:,2*nf-1,jindex),&
1265  -combined_field(:,2*nf,jindex),kind=8)
1266  END DO
1267  END DO
1268  END DO
1269  cu(1,:,:) = 2*cmplx(REAL(cu(1,:,:),KIND=8),0.d0,kind=8)
1270  !JLG, FEB 4, 2011
1271  !Padding is done by initialization of cu: cu = 0
1272  !This is eequivalent to cu(m_max+1:m_max_pad,:,:) = 0.d0
1273  !JLG, FEB 4, 2011
1274 
1275  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
1276 
1277  ! Set the parameters for dfftw
1278  fft_dim=1; istride=1; ostride=1;
1279  !JLG, FEB 4, 2011
1280 !!$ idist=N_r; inembed(1)=N_r; DIM(1)=N_r
1281 !!$ odist=m_max; onembed(1)=m_max
1282  idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
1283  odist=m_max_pad; onembed(1)=m_max_pad
1284  !JLG, FEB 4, 2011
1285 
1286  howmany=bloc_size*nb_field
1287 
1288 
1289  t = mpi_wtime()
1290  CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
1291  onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
1292  !write(*,*) ' FFT_PAR_CROSS_PROD: fftw_plan_multi_c2r', fftw_plan_multi_c2r
1293  CALL dfftw_execute(fftw_plan_multi_c2r)
1294 
1295  ! clean up
1296  CALL dfftw_destroy_plan(fftw_plan_multi_c2r)
1297 
1298 
1299  ! TENSOR PRODDUCT
1300  IF(.NOT.present(opt_tension)) THEN
1301  IF (nb_field==6) THEN
1302  prod_ru1(:,1,:) = ru(:,1,:)*ru(:,4,:)
1303  prod_ru1(:,2,:) = ru(:,1,:)*ru(:,5,:)
1304  prod_ru1(:,3,:) = ru(:,1,:)*ru(:,6,:)
1305 
1306  prod_ru2(:,1,:) = ru(:,2,:)*ru(:,4,:)
1307  prod_ru2(:,2,:) = ru(:,2,:)*ru(:,5,:)
1308  prod_ru2(:,3,:) = ru(:,2,:)*ru(:,6,:)
1309 
1310  prod_ru3(:,1,:) = ru(:,3,:)*ru(:,4,:)
1311  prod_ru3(:,2,:) = ru(:,3,:)*ru(:,5,:)
1312  prod_ru3(:,3,:) = ru(:,3,:)*ru(:,6,:)
1313  END IF
1314  ELSE IF (opt_tension) THEN
1315  IF (nb_field==6) THEN
1316  norm_grad_tension = sqrt(ru(:,4,:)**2 + ru(:,5,:)**2 + ru(:,6,:)**2)
1317  prod_ru1(:,1,:) = ru(:,1,:)*ru(:,4,:)/(norm_grad_tension + 1.d-14) &
1318  - norm_grad_tension
1319  prod_ru1(:,2,:) = ru(:,1,:)*ru(:,5,:)/(norm_grad_tension + 1.d-14)
1320  prod_ru1(:,3,:) = ru(:,1,:)*ru(:,6,:)/(norm_grad_tension + 1.d-14)
1321 
1322  prod_ru2(:,1,:) = ru(:,2,:)*ru(:,4,:)/(norm_grad_tension + 1.d-14)
1323  prod_ru2(:,2,:) = ru(:,2,:)*ru(:,5,:)/(norm_grad_tension + 1.d-14) &
1324  - norm_grad_tension
1325  prod_ru2(:,3,:) = ru(:,2,:)*ru(:,6,:)/(norm_grad_tension + 1.d-14)
1326 
1327  prod_ru3(:,1,:) = ru(:,3,:)*ru(:,4,:)/(norm_grad_tension + 1.d-14)
1328  prod_ru3(:,2,:) = ru(:,3,:)*ru(:,5,:)/(norm_grad_tension + 1.d-14)
1329  prod_ru3(:,3,:) = ru(:,3,:)*ru(:,6,:)/(norm_grad_tension + 1.d-14) &
1330  - norm_grad_tension
1331  END IF
1332  ELSE
1333  CALL error_petsc('BUG in FFT_TENSOR_DCL : opt_tension should be true if used')
1334  END IF
1335  ! TENSOR PRODUCT
1336 
1337  ! Set the parameters for dfftw
1338  fft_dim=1; istride=1; ostride=1;
1339  !JLG, FEB 4, 2011
1340 !!$ idist=N_r; inembed(1)=N_r; DIM(1)=N_r
1341 !!$ odist=m_max; onembed(1)=m_max
1342  idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
1343  odist=m_max_pad; onembed(1)=m_max_pad
1344  !JLG, FEB 4, 2011
1345 
1346  howmany=bloc_size*nb_field/2
1347 
1348  CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru1, &
1349  inembed, istride, idist, prod_cu1, onembed, ostride, odist, fftw_estimate)
1350  !write(*,*) ' FFT_PAR_CROSS_PROD: fftw_plan_multi_r2c', fftw_plan_multi_r2c
1351  CALL dfftw_execute(fftw_plan_multi_r2c)
1352  ! clean up
1353  CALL dfftw_destroy_plan(fftw_plan_multi_r2c)
1354 
1355  CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru2, &
1356  inembed, istride, idist, prod_cu2, onembed, ostride, odist, fftw_estimate)
1357  !write(*,*) ' FFT_PAR_CROSS_PROD: fftw_plan_multi_r2c', fftw_plan_multi_r2c
1358  CALL dfftw_execute(fftw_plan_multi_r2c)
1359  ! clean up
1360  CALL dfftw_destroy_plan(fftw_plan_multi_r2c)
1361 
1362  CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru3, &
1363  inembed, istride, idist, prod_cu3, onembed, ostride, odist, fftw_estimate)
1364  !write(*,*) ' FFT_PAR_CROSS_PROD: fftw_plan_multi_r2c', fftw_plan_multi_r2c
1365  CALL dfftw_execute(fftw_plan_multi_r2c)
1366  ! clean up
1367  CALL dfftw_destroy_plan(fftw_plan_multi_r2c)
1368 
1369  !JLG, FEB 4, 2011
1370 !!$ prod_cu = prod_cu/N_r !Scaling
1371 !!$ prod_cu = prod_cu/N_r_pad !Scaling
1372 
1373  prod_cu1 = prod_cu1*(1.d0/n_r_pad) !Scaling
1374  prod_cu2 = prod_cu2*(1.d0/n_r_pad) !Scaling
1375  prod_cu3 = prod_cu3*(1.d0/n_r_pad) !Scaling
1376 
1377  !JLG, FEB 4, 2011
1378  IF (present(temps)) temps(2) = temps(2) + mpi_wtime() -t
1379 
1380  !Now we need to redistribute the Fourier coefficients on each processor
1381  t = mpi_wtime()
1382  combined_prod_cu1(:,:,1)=prod_cu1(1,:,:)
1383  combined_prod_cu2(:,:,1)=prod_cu2(1,:,:)
1384  combined_prod_cu3(:,:,1)=prod_cu3(1,:,:)
1385  DO n=2, m_max
1386  !combined_prod_cu(:,:,n)=prod_cu(n,:,:)
1387  combined_prod_cu1(:,:,n)=2*conjg(prod_cu1(n,:,:))
1388  combined_prod_cu2(:,:,n)=2*conjg(prod_cu2(n,:,:))
1389  combined_prod_cu3(:,:,n)=2*conjg(prod_cu3(n,:,:))
1390  END DO
1391 
1392  combined_prod_cu_tot(1,:,:,:) = combined_prod_cu1(:,:,:)
1393  combined_prod_cu_tot(2,:,:,:) = combined_prod_cu2(:,:,:)
1394  combined_prod_cu_tot(3,:,:,:) = combined_prod_cu3(:,:,:)
1395 
1396  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
1397 
1398  t = mpi_wtime()
1399  longueur_tranche=bloc_size*m_max_c*nb_field*3
1400  mpid=mpi_double_precision
1401  CALL mpi_alltoall(combined_prod_cu_tot,longueur_tranche,mpid, dist_prod_cu_tot,longueur_tranche, &
1402  mpid,communicator,code)
1403  IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -t
1404  ! dimensions:
1405  t = mpi_wtime()
1406  DO i = 1, m_max_c
1407  DO nb = 1, nb_procs
1408  shiftc = (nb-1)*bloc_size
1409  shiftl = (nb-1)*m_max_c
1410  intermediate_tot = dist_prod_cu_tot(:,:,:,shiftl+i)
1411  DO n = 1, bloc_size
1412  IF (n+shiftc > np ) cycle
1413  DO i_field = 1, nb_field/2
1414  v_out(:, n+shiftc, i_field*2-1, i) = REAL (intermediate_tot(:,i_field,n),kind=8)
1415  v_out(:, n+shiftc, i_field*2 , i) = aimag(intermediate_tot(:,i_field,n))
1416  END DO
1417  END DO
1418  END DO
1419  END DO
1420  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
1421  END SUBROUTINE fft_tensor_dcl
1422 
1423  SUBROUTINE fft_par_var_eta_prod_t_dcl(communicator, eta, H_mesh, &
1424  c_in, c_out, nb_procs, bloc_size, m_max_pad, time, temps)
1425  !FFT (FFT(-1) V1 . FFT(-1) V2) = c_out
1426  !This a de-aliased version of the code, FEB 4, 2011, JLG
1427  USE my_util
1428  USE def_type_mesh
1429  IMPLICIT NONE
1430  include 'fftw3.f'
1431  TYPE(mesh_type) :: h_mesh
1432  INTERFACE
1433  FUNCTION eta(H_mesh,angles,nb_angles,nb,ne,time) RESULT(vv)
1434  USE def_type_mesh
1435  USE input_data
1436  USE my_util
1437  IMPLICIT NONE
1438  TYPE(mesh_type), INTENT(IN) :: h_mesh
1439  REAL(KIND=8), DIMENSION(:), INTENT(IN) :: angles
1440  INTEGER, INTENT(IN) :: nb_angles
1441  INTEGER, INTENT(IN) :: nb, ne
1442  REAL(KIND=8), INTENT(IN) :: time
1443  REAL(KIND=8), DIMENSION(nb_angles,ne-nb+1) :: vv
1444  END FUNCTION eta
1445  END INTERFACE
1446  INTEGER, INTENT(IN) :: nb_procs, bloc_size, m_max_pad
1447  REAL(KIND=8), DIMENSION(2*m_max_pad-1) :: angles
1448  REAL(KIND=8), DIMENSION(2*m_max_pad-1,bloc_size) :: eta_n
1449  REAL(KIND=8) :: pi
1450  INTEGER :: rank_f
1451  REAL(KIND=8), DIMENSION(:,:,:), INTENT(IN) :: c_in
1452  REAL(KIND=8), DIMENSION(:,:,:), INTENT(OUT) :: c_out
1453  REAL(KIND=8), DIMENSION(:), OPTIONAL, INTENT(INOUT) :: temps
1454  REAL(KIND=8) :: time
1455  COMPLEX(KIND=8), DIMENSION(m_max_pad, SIZE(c_in,2)/2, bloc_size) :: cu
1456  REAL(KIND=8), DIMENSION(2*m_max_pad-1, SIZE(c_in,2)/2, bloc_size) :: ru
1457  COMPLEX(KIND=8), DIMENSION(m_max_pad, SIZE(c_in,2)/2, bloc_size) :: prod_cu
1458  REAL(KIND=8), DIMENSION(2*m_max_pad-1, SIZE(c_in,2)/2,bloc_size) :: prod_ru
1459  REAL(KIND=8), DIMENSION(SIZE(c_in,3),SIZE(c_in,2), bloc_size*nb_procs) :: dist_field, combined_field
1460  COMPLEX(KIND=8), DIMENSION(SIZE(c_in,2)/2,bloc_size,SIZE(c_in,3)*nb_procs):: dist_prod_cu
1461  COMPLEX(KIND=8), DIMENSION(SIZE(c_in,2)/2,bloc_size,SIZE(c_in,3)*nb_procs):: combined_prod_cu
1462  COMPLEX(KIND=8), DIMENSION(SIZE(c_in,2)/2,bloc_size) :: intermediate
1463  INTEGER :: np, np_tot, m_max, m_max_c, mpid, n_r_pad, nb_field, i_field
1464  INTEGER(KIND=8) :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
1465  INTEGER :: nb, ne, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code
1466  REAL(KIND=8) :: t
1467  ! FFTW parameters
1468  INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
1469  INTEGER, DIMENSION(1) :: dim, inembed, onembed
1470  ! Recall complexes must be rescaled
1471  ! End FFTW parameters
1472 #include "petsc/finclude/petsc.h"
1473  petscerrorcode :: ierr
1474  mpi_comm :: communicator
1475 
1476  IF (present(temps)) temps = 0.d0
1477 
1478  np = SIZE(c_in,1)
1479  nb_field = SIZE(c_in,2)
1480  m_max_c = SIZE(c_in,3) ! Number of complex (cosines + sines) coefficients per point
1481  m_max = m_max_c*nb_procs! Number of comlex coefficients per point per processor
1482  np_tot = nb_procs*bloc_size
1483  n_r_pad = 2*m_max_pad-1
1484 
1485  IF (m_max_c==0) THEN
1486  WRITE(*,*) ' BUG '
1487  stop
1488  END IF
1489 
1490  pi = acos(-1.d0)
1491  DO n = 1, n_r_pad
1492  angles(n) = 2*pi*(n-1)/n_r_pad
1493  END DO
1494 
1495  ! Packing all 3 complex components of both v1 and v2 input fields
1496  ! into dist_field, where the dimension indexing the nodal points varies the least rapidly,
1497  ! so that after distributing the data to the processes, each one will obtain a part
1498  ! on nodal points
1499  ! TRANSPOSE pr que la variable i associee aux modes soit la 1ere sur laquelle on va faire la FFT
1500  t = mpi_wtime()
1501  DO i = 1, m_max_c
1502  dist_field(i,:,1:np) = transpose(c_in(:,:,i))
1503  END DO
1504  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
1505 
1506  IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 1.d100
1507 
1508  longueur_tranche=bloc_size*m_max_c*nb_field
1509 
1510  t = mpi_wtime()
1511  mpid=mpi_double_precision
1512  CALL mpi_alltoall(dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
1513  mpid, communicator, code)
1514  IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -t
1515 
1516  t = mpi_wtime()
1517  !JLG, FEB 4, 2011
1518  cu = 0.d0
1519  !JLG, FEB 4, 2011
1520  DO n = 1, bloc_size
1521  DO nb = 1, nb_procs
1522  shiftc = (nb-1)*bloc_size
1523  shiftl = (nb-1)*m_max_c
1524  jindex = n + shiftc
1525  DO nf = 1, nb_field/2
1526  ! Put real and imaginary parts in a complex
1527  ! INPUT ARE COSINE AND SINE COEFFICIENTS
1528  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
1529  cu(shiftl+1:shiftl+m_max_c,nf,n) = 0.5d0*cmplx(combined_field(:,2*nf-1,jindex),&
1530  -combined_field(:,2*nf,jindex),kind=8)
1531  END DO
1532  END DO
1533  END DO
1534  cu(1,:,:) = 2*cmplx(REAL(cu(1,:,:),KIND=8),0.d0,kind=8)
1535  !JLG, FEB 4, 2011
1536  !Padding is done by initialization of cu: cu = 0
1537  !This is eequivalent to cu(m_max+1:m_max_pad,:,:) = 0.d0
1538  !JLG, FEB 4, 2011
1539 
1540  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
1541 
1542  ! Set the parameters for dfftw
1543  fft_dim=1; istride=1; ostride=1;
1544  !JLG, FEB 4, 2011
1545 !!$ idist=N_r; inembed(1)=N_r; DIM(1)=N_r
1546 !!$ odist=m_max; onembed(1)=m_max
1547  idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
1548  odist=m_max_pad; onembed(1)=m_max_pad
1549  !JLG, FEB 4, 2011
1550 
1551  howmany=bloc_size*nb_field/2
1552 
1553  t = mpi_wtime()
1554  CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
1555  onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
1556  CALL dfftw_execute(fftw_plan_multi_c2r)
1557 
1558  ! clean up
1559  CALL dfftw_destroy_plan(fftw_plan_multi_c2r)
1560 
1561 !!!!!!!!!!!!!!!!!!!!!!!!!! PRODUCT with eta !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1562  ! Check if the proc is the last one to know if you need to add 1.d0 to eta
1563  CALL mpi_comm_rank(communicator, rank_f, ierr)
1564  IF (rank_f==nb_procs-1) THEN
1565  IF (np-rank_f*bloc_size.GE.1) THEN
1566  nb = rank_f*bloc_size+1
1567  ne = np
1568  eta_n(:,1:np-rank_f*bloc_size) = eta(h_mesh,angles,n_r_pad,nb,ne,time)
1569  eta_n(:,np-rank_f*bloc_size+1:bloc_size) = 1.d0
1570  ELSE
1571  eta_n=1.d0
1572  END IF
1573  ELSE
1574  nb = rank_f*bloc_size+1
1575  ne = rank_f*bloc_size+bloc_size
1576  eta_n(:,:) = eta(h_mesh,angles,n_r_pad,nb,ne,time)
1577  END IF
1578 
1579  IF (nb_field==2) THEN
1580  prod_ru(:,1,:) = eta_n*ru(:,1,:)
1581  ELSE IF (nb_field==6) THEN
1582  prod_ru(:,1,:) = eta_n*ru(:,1,:)
1583  prod_ru(:,2,:) = eta_n*ru(:,2,:)
1584  prod_ru(:,3,:) = eta_n*ru(:,3,:)
1585  ELSE
1586  CALL error_petsc('error in nb_field of c_in while calling FFT_PAR_VAR_ETA_PROD_T_DCL')
1587  END IF
1588 !!!!!!!!!!!!!!!!!!!!!!!!!! PRODUCT with eta !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1589 
1590  howmany = bloc_size*nb_field/2
1591 
1592  CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru, &
1593  inembed, istride, idist, prod_cu, onembed, ostride, odist, fftw_estimate)
1594  CALL dfftw_execute(fftw_plan_multi_r2c)
1595 
1596  ! clean up
1597  CALL dfftw_destroy_plan(fftw_plan_multi_r2c)
1598 
1599  !JLG, FEB 4, 2011
1600 !!$ prod_cu = prod_cu/N_r !Scaling
1601  prod_cu = prod_cu*(1.d0/n_r_pad) !Scaling
1602  !JLG, FEB 4, 2011
1603  IF (present(temps)) temps(2) = temps(2) + mpi_wtime() -t
1604 
1605  !Now we need to redistribute the Fourier coefficients on each processor
1606  t = mpi_wtime()
1607  combined_prod_cu(:,:,1)=prod_cu(1,:,:)
1608  DO n=2, m_max
1609  combined_prod_cu(:,:,n)=2*conjg(prod_cu(n,:,:))
1610  END DO
1611 
1612  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
1613 
1614  t = mpi_wtime()
1615  longueur_tranche=bloc_size*m_max_c*nb_field
1616  mpid=mpi_double_precision
1617  CALL mpi_alltoall(combined_prod_cu,longueur_tranche,mpid, dist_prod_cu,longueur_tranche, &
1618  mpid,communicator,code)
1619  IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -t
1620 
1621  t = mpi_wtime()
1622  DO i = 1, m_max_c
1623  DO nb = 1, nb_procs
1624  shiftc = (nb-1)*bloc_size
1625  shiftl = (nb-1)*m_max_c
1626  intermediate = dist_prod_cu(:,:,shiftl+i)
1627  DO n = 1, bloc_size
1628  IF (n+shiftc > np ) cycle
1629  DO i_field = 1, nb_field/2
1630  c_out(n+shiftc, 2*i_field-1, i) = REAL (intermediate(i_field,n),kind=8)
1631  c_out(n+shiftc, 2*i_field, i) = aimag(intermediate(i_field,n))
1632  END DO
1633  END DO
1634  END DO
1635  END DO
1636  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
1637 
1638  END SUBROUTINE fft_par_var_eta_prod_t_dcl
1639 
1640  SUBROUTINE fft_par_var_eta_prod_gauss_dcl(communicator, eta, H_mesh, &
1641  c_in, c_out, nb_procs, bloc_size, m_max_pad, rr_gauss, time, temps)
1642  !FFT (FFT(-1) V1 . FFT(-1) V2) = c_out
1643  !This a de-aliased version of the code, FEB 4, 2011, JLG
1644  USE my_util
1645  USE def_type_mesh
1646  IMPLICIT NONE
1647  include 'fftw3.f'
1648  TYPE(mesh_type) :: h_mesh
1649  INTERFACE
1650  FUNCTION eta(H_mesh,rr_gauss,angles,nb_angles,nb,ne,time) RESULT(vv)
1651  USE def_type_mesh
1652  USE input_data
1653  USE my_util
1654  IMPLICIT NONE
1655  TYPE(mesh_type) :: h_mesh
1656  REAL(KIND=8), DIMENSION(:,:), INTENT(IN) :: rr_gauss
1657  REAL(KIND=8), DIMENSION(:), INTENT(IN) :: angles
1658  INTEGER, INTENT(IN) :: nb_angles
1659  INTEGER, INTENT(IN) :: nb, ne
1660  REAL(KIND=8), INTENT(IN) :: time
1661  REAL(KIND=8), DIMENSION(nb_angles,ne-nb+1) :: vv
1662  END FUNCTION eta
1663  END INTERFACE
1664  INTEGER, INTENT(IN) :: nb_procs, bloc_size, m_max_pad
1665  REAL(KIND=8), DIMENSION(2*m_max_pad-1) :: angles
1666  REAL(KIND=8), DIMENSION(2*m_max_pad-1,bloc_size) :: eta_n
1667  REAL(KIND=8) :: pi
1668  INTEGER :: rank_f
1669  REAL(KIND=8) :: time
1670  REAL(KIND=8), DIMENSION(:,:,:), INTENT(IN) :: c_in
1671  REAL(KIND=8), DIMENSION(:,:,:), INTENT(OUT) :: c_out
1672  REAL(KIND=8), DIMENSION(:,:), INTENT(IN) :: rr_gauss
1673  REAL(KIND=8), DIMENSION(:), OPTIONAL, INTENT(INOUT) :: temps
1674  COMPLEX(KIND=8), DIMENSION(m_max_pad, SIZE(c_in,2)/2, bloc_size) :: cu
1675  REAL(KIND=8), DIMENSION(2*m_max_pad-1, SIZE(c_in,2)/2, bloc_size) :: ru
1676  COMPLEX(KIND=8), DIMENSION(m_max_pad, SIZE(c_in,2)/2, bloc_size) :: prod_cu
1677  REAL(KIND=8), DIMENSION(2*m_max_pad-1, SIZE(c_in,2)/2,bloc_size) :: prod_ru
1678  REAL(KIND=8), DIMENSION(SIZE(c_in,3),SIZE(c_in,2), bloc_size*nb_procs) :: dist_field, combined_field
1679  COMPLEX(KIND=8), DIMENSION(SIZE(c_in,2)/2,bloc_size,SIZE(c_in,3)*nb_procs):: dist_prod_cu
1680  COMPLEX(KIND=8), DIMENSION(SIZE(c_in,2)/2,bloc_size,SIZE(c_in,3)*nb_procs):: combined_prod_cu
1681  COMPLEX(KIND=8), DIMENSION(SIZE(c_in,2)/2,bloc_size) :: intermediate
1682  INTEGER :: np, np_tot, m_max, m_max_c, mpid, n_r_pad, nb_field, i_field
1683  INTEGER(KIND=8) :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
1684  INTEGER :: nb, ne, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code
1685  REAL(KIND=8) :: t
1686  ! FFTW parameters
1687  INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
1688  INTEGER, DIMENSION(1) :: dim, inembed, onembed
1689  ! Recall complexes must be rescaled
1690  ! End FFTW parameters
1691 #include "petsc/finclude/petsc.h"
1692  petscerrorcode :: ierr
1693  mpi_comm :: communicator
1694 
1695  IF (present(temps)) temps = 0.d0
1696 
1697  np = SIZE(c_in,1)
1698  nb_field = SIZE(c_in,2)
1699  m_max_c = SIZE(c_in,3) ! Number of complex (cosines + sines) coefficients per point
1700  m_max = m_max_c*nb_procs! Number of comlex coefficients per point per processor
1701  np_tot = nb_procs*bloc_size
1702  n_r_pad = 2*m_max_pad-1
1703 
1704  IF (m_max_c==0) THEN
1705  WRITE(*,*) ' BUG '
1706  stop
1707  END IF
1708 
1709  pi = acos(-1.d0)
1710  DO n = 1, n_r_pad
1711  angles(n) = 2*pi*(n-1)/n_r_pad
1712  END DO
1713 
1714  ! Packing all 3 complex components of both v1 and v2 input fields
1715  ! into dist_field, where the dimension indexing the nodal points varies the least rapidly,
1716  ! so that after distributing the data to the processes, each one will obtain a part
1717  ! on nodal points
1718  ! TRANSPOSE pr que la variable i associee aux modes soit la 1ere sur laquelle on va faire la FFT
1719  t = mpi_wtime()
1720  DO i = 1, m_max_c
1721  dist_field(i,:,1:np) = transpose(c_in(:,:,i))
1722  END DO
1723  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
1724 
1725  IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 1.d100
1726 
1727  longueur_tranche=bloc_size*m_max_c*nb_field
1728 
1729  t = mpi_wtime()
1730  mpid=mpi_double_precision
1731  CALL mpi_alltoall(dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
1732  mpid, communicator, code)
1733  IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -t
1734 
1735  t = mpi_wtime()
1736  !JLG, FEB 4, 2011
1737  cu = 0.d0
1738  !JLG, FEB 4, 2011
1739  DO n = 1, bloc_size
1740  DO nb = 1, nb_procs
1741  shiftc = (nb-1)*bloc_size
1742  shiftl = (nb-1)*m_max_c
1743  jindex = n + shiftc
1744  DO nf = 1, nb_field/2
1745  ! Put real and imaginary parts in a complex
1746  ! INPUT ARE COSINE AND SINE COEFFICIENTS
1747  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
1748  cu(shiftl+1:shiftl+m_max_c,nf,n) = 0.5d0*cmplx(combined_field(:,2*nf-1,jindex),&
1749  -combined_field(:,2*nf,jindex),kind=8)
1750  END DO
1751  END DO
1752  END DO
1753  cu(1,:,:) = 2*cmplx(REAL(cu(1,:,:),KIND=8),0.d0,kind=8)
1754  !JLG, FEB 4, 2011
1755  !Padding is done by initialization of cu: cu = 0
1756  !This is eequivalent to cu(m_max+1:m_max_pad,:,:) = 0.d0
1757  !JLG, FEB 4, 2011
1758 
1759  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
1760 
1761  ! Set the parameters for dfftw
1762  fft_dim=1; istride=1; ostride=1;
1763  !JLG, FEB 4, 2011
1764 !!$ idist=N_r; inembed(1)=N_r; DIM(1)=N_r
1765 !!$ odist=m_max; onembed(1)=m_max
1766  idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
1767  odist=m_max_pad; onembed(1)=m_max_pad
1768  !JLG, FEB 4, 2011
1769 
1770  howmany=bloc_size*nb_field/2
1771 
1772  t = mpi_wtime()
1773  CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
1774  onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
1775  CALL dfftw_execute(fftw_plan_multi_c2r)
1776 
1777  ! clean up
1778  CALL dfftw_destroy_plan(fftw_plan_multi_c2r)
1779 
1780 !!!!!!!!!!!!!!!!!!!!!!!!!! PRODUCT with eta !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1781 ! Check if the proc is the last one to know if you need to add 1.d0 to eta
1782  CALL mpi_comm_rank(communicator, rank_f, ierr)
1783  IF (rank_f==nb_procs-1) THEN
1784  IF (np-rank_f*bloc_size.GE.1) THEN
1785  nb = rank_f*bloc_size+1
1786  ne = np
1787  eta_n(:,1:np-rank_f*bloc_size) = eta(h_mesh,rr_gauss(:,nb:ne),angles,n_r_pad,nb,ne,time)
1788  eta_n(:,np-rank_f*bloc_size+1:bloc_size) = 1.d0
1789  ELSE
1790  eta_n=1.d0
1791  END IF
1792  ELSE
1793  nb = rank_f*bloc_size+1
1794  ne = rank_f*bloc_size+bloc_size
1795  eta_n(:,:) = eta(h_mesh,rr_gauss(:,nb:ne),angles,n_r_pad,nb,ne,time)
1796  END IF
1797 
1798  IF (nb_field==2) THEN
1799  prod_ru(:,1,:) = eta_n*ru(:,1,:)
1800  ELSE IF (nb_field==6) THEN
1801  prod_ru(:,1,:) = eta_n*ru(:,1,:)
1802  prod_ru(:,2,:) = eta_n*ru(:,2,:)
1803  prod_ru(:,3,:) = eta_n*ru(:,3,:)
1804  ELSE
1805  CALL error_petsc('error in nb_field of c_in while calling FFT_PAR_VAR_ETA_PROD_GAUSS_DCL')
1806  END IF
1807 !!!!!!!!!!!!!!!!!!!!!!!!!! PRODUCT with eta !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1808 
1809  howmany = bloc_size*nb_field/2
1810 
1811  CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru, &
1812  inembed, istride, idist, prod_cu, onembed, ostride, odist, fftw_estimate)
1813  CALL dfftw_execute(fftw_plan_multi_r2c)
1814 
1815  ! clean up
1816  CALL dfftw_destroy_plan(fftw_plan_multi_r2c)
1817 
1818  !JLG, FEB 4, 2011
1819 !!$ prod_cu = prod_cu/N_r !Scaling
1820  prod_cu = prod_cu*(1.d0/n_r_pad) !Scaling
1821  !JLG, FEB 4, 2011
1822  IF (present(temps)) temps(2) = temps(2) + mpi_wtime() -t
1823 
1824  !Now we need to redistribute the Fourier coefficients on each processor
1825  t = mpi_wtime()
1826  combined_prod_cu(:,:,1)=prod_cu(1,:,:)
1827  DO n=2, m_max
1828  combined_prod_cu(:,:,n)=2*conjg(prod_cu(n,:,:))
1829  END DO
1830 
1831  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
1832 
1833  t = mpi_wtime()
1834  longueur_tranche=bloc_size*m_max_c*nb_field
1835  mpid=mpi_double_precision
1836  CALL mpi_alltoall(combined_prod_cu,longueur_tranche,mpid, dist_prod_cu,longueur_tranche, &
1837  mpid,communicator,code)
1838  IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -t
1839 
1840  t = mpi_wtime()
1841  DO i = 1, m_max_c
1842  DO nb = 1, nb_procs
1843  shiftc = (nb-1)*bloc_size
1844  shiftl = (nb-1)*m_max_c
1845  intermediate = dist_prod_cu(:,:,shiftl+i)
1846  DO n = 1, bloc_size
1847  IF (n+shiftc > np ) cycle
1848  DO i_field = 1, nb_field/2
1849  c_out(n+shiftc, 2*i_field-1, i) = REAL (intermediate(i_field,n),kind=8)
1850  c_out(n+shiftc, 2*i_field, i) = aimag(intermediate(i_field,n))
1851  END DO
1852  END DO
1853  END DO
1854  END DO
1855  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
1856 
1857  END SUBROUTINE fft_par_var_eta_prod_gauss_dcl
1858 
1859  FUNCTION regul(phi,eps) RESULT(r)
1860  IMPLICIT NONE
1861  REAL(KIND=8), INTENT(IN) :: phi, eps
1862  REAL(KIND=8) :: x, r
1863  x = phi - 0.5d0
1864  IF (x .LE. -eps) THEN
1865  r = 0.d0
1866  ELSE IF (x .LE. eps) THEN
1867  r = (1+ x*(x**2 - 3*eps**2)/(-2*eps**3))/2
1868  ELSE
1869  r = 1.d0
1870  END IF
1871  END FUNCTION regul
1872 
1873  FUNCTION regul_tab(phi,eps) RESULT(r)
1874  IMPLICIT NONE
1875  REAL(KIND=8), DIMENSION(:), INTENT(IN) :: phi
1876  REAL(KIND=8), INTENT(IN) :: eps
1877  REAL(KIND=8) :: x
1878  REAL(KIND=8), DIMENSION(SIZE(phi)) :: r
1879  INTEGER :: n
1880  DO n = 1, SIZE(phi)
1881  x = phi(n) - 0.5d0
1882  IF (x .LE. -eps) THEN
1883  r(n) = 0.d0
1884  ELSE IF (x .LE. eps) THEN
1885  r(n) = (1.d0 + x*(x**2 - 3*eps**2)/(-2*eps**3))/2
1886  ELSE
1887  r(n) = 1.d0
1888  END IF
1889  END DO
1890  END FUNCTION regul_tab
1891 
1892  SUBROUTINE fft_check_interface(communicator, V1_in, nb_fluids, interface_ok, &
1893  nb_procs, bloc_size, m_max_pad, temps)
1894  !This a de-aliased version of the code, FEB 4, 2011, JLG
1895  USE my_util
1896  IMPLICIT NONE
1897  include 'fftw3.f'
1898  ! Format: V_1in(1:np,1:6,1:m_max_c)
1899  ! INPUT ARE COSINE AND SINE COEFFICIENTS
1900  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
1901  REAL(KIND=8), DIMENSION(:,:,:,:),INTENT(IN) :: v1_in
1902  INTEGER, INTENT(IN) :: nb_fluids
1903  INTEGER, INTENT(OUT) :: interface_ok
1904  REAL(KIND=8), DIMENSION(:), OPTIONAL, INTENT(INOUT) :: temps
1905  INTEGER, INTENT(IN) :: bloc_size, m_max_pad, nb_procs
1906  INTEGER :: np, np_tot, nb_field, m_max, m_max_c, mpid, n_r_pad
1907  INTEGER(KIND=8) :: fftw_plan_multi_c2r
1908  COMPLEX(KIND=8), DIMENSION(m_max_pad, bloc_size) :: cu
1909  REAL(KIND=8), DIMENSION(2*m_max_pad-1, bloc_size) :: ru
1910  REAL(KIND=8), DIMENSION(SIZE(V1_in,4),SIZE(V1_in,3),bloc_size*nb_procs) :: dist_field, combined_field
1911  REAL(KIND=8), DIMENSION(nb_fluids-1,2*m_max_pad-1, bloc_size) :: v1_real_loc
1912  INTEGER :: n1, n2
1913  INTEGER :: nb, shiftc, shiftl, jindex, longueur_tranche, i, n, code, interface_nb
1914  REAL(KIND=8) :: t
1915  ! FFTW parameters
1916  INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
1917  INTEGER, DIMENSION(1) :: dim, inembed, onembed
1918  ! Recall complexes must be rescaled
1919  ! End FFTW parameters
1920 #include "petsc/finclude/petsc.h"
1921  mpi_comm :: communicator
1922 
1923  IF (present(temps)) temps = 0.d0
1924 
1925  np = SIZE(v1_in,2)
1926  nb_field= SIZE(v1_in,3) ! Number of fields
1927  m_max_c = SIZE(v1_in,4) ! Number of complex (cosines + sines) coefficients per point
1928  m_max = m_max_c*nb_procs! Number of comlex coefficients per point per processor
1929  n_r_pad=2*m_max_pad-1
1930  np_tot = nb_procs*bloc_size
1931 
1932  IF (mod(nb_field,2)/=0 .OR. m_max_c==0) THEN
1933  WRITE(*,*) ' BUG '
1934  stop
1935  END IF
1936 
1937  ! Bloc_size is the number of points that are handled by one processor
1938  ! once the Fourier modes are all collected
1939  ! Computation of bloc_size and np_tot
1940  ! fin de la repartition des points
1941 
1942  ! Packing all 3 complex components of both v1 and v2 input fields
1943  ! into dist_field, where the dimension indexing the nodal points varies the least rapidly,
1944  ! so that after distributing the data to the processes, each one will obtain a part
1945  ! on nodal points
1946  ! TRANSPOSE pr que la variable i associee aux modes soit la 1ere sur laquelle on va faire la FFT
1947  t = mpi_wtime()
1948 
1949  DO interface_nb = 1, nb_fluids-1
1950  dist_field = 0.d0
1951  DO i = 1, m_max_c
1952  dist_field(i,1:nb_field,1:np) = transpose(v1_in(interface_nb,:,:,i))
1953  END DO
1954  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
1955 
1956  !IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 1.d100
1957  !TEST LC
1958  IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 0.d0
1959 
1960  longueur_tranche=bloc_size*m_max_c*nb_field
1961 
1962  t = mpi_wtime()
1963  mpid=mpi_double_precision
1964  CALL mpi_alltoall(dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
1965  mpid, communicator, code)
1966 
1967  IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -t
1968 
1969  t = mpi_wtime()
1970  !JLG, FEB 4, 2011
1971  cu = 0.d0
1972  !JLG, FEB 4, 2011
1973  DO n = 1, bloc_size
1974  DO nb = 1, nb_procs
1975  shiftc = (nb-1)*bloc_size
1976  shiftl = (nb-1)*m_max_c
1977  jindex = n + shiftc
1978  ! Put real and imaginary parts in a complex
1979  ! nf=1,2,3 => V1_in
1980  ! nf=4,5,6 => V2_in
1981  ! INPUT ARE COSINE AND SINE COEFFICIENTS
1982  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
1983  cu(shiftl+1:shiftl+m_max_c,n) = 0.5d0*cmplx(combined_field(:,1,jindex),&
1984  -combined_field(:,2,jindex),kind=8)
1985  END DO
1986  END DO
1987  cu(1,:) = 2*cmplx(REAL(cu(1,:),KIND=8),0.d0,kind=8)
1988 
1989 
1990  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
1991 
1992  ! Set the parameters for dfftw
1993  fft_dim=1; istride=1; ostride=1;
1994  !JLG, FEB 4, 2011
1995 !!$ idist=N_r; inembed(1)=N_r; DIM(1)=N_r
1996 !!$ odist=m_max; onembed(1)=m_max
1997  idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
1998  odist=m_max_pad; onembed(1)=m_max_pad
1999  !JLG, FEB 4, 2011
2000 
2001  howmany=bloc_size*nb_field/2
2002 
2003  t = mpi_wtime()
2004  CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
2005  onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
2006  !write(*,*) ' FFT_PAR_CROSS_PROD: fftw_plan_multi_c2r', fftw_plan_multi_c2r
2007  CALL dfftw_execute(fftw_plan_multi_c2r)
2008 
2009  ! clean up
2010  CALL dfftw_destroy_plan(fftw_plan_multi_c2r)
2011  v1_real_loc(interface_nb,:,:)=ru
2012  END DO
2013 
2014 
2015  ! CHECK IF INTERFACES CROSS EACH OTHER
2016  ! no crossing > interface_ok=1 / crossing > interface_ok=0
2017  interface_ok = 1
2018  IF (nb_field==2) THEN
2019  IF (nb_fluids==2) THEN
2020  RETURN
2021  ELSE
2022  DO interface_nb = 1, nb_fluids-2
2023  DO n1 = 1, 2*m_max_pad-1
2024  DO n2 = 1, bloc_size
2025  IF((0.1d0.LE.v1_real_loc(interface_nb,n1,n2)).AND. &
2026  (v1_real_loc(interface_nb,n1,n2).LE.0.9d0)) THEN
2027  IF(v1_real_loc(interface_nb,n1,n2).LT.v1_real_loc(interface_nb+1,n1,n2)) THEN
2028  interface_ok = 0
2029  RETURN
2030  END IF
2031  END IF
2032  END DO
2033  END DO
2034  END DO
2035  END IF
2036  ELSE
2037  CALL error_petsc('BUG in FFT_CHECK_INTERFACE: pb with V1_in dimensions')
2038  END IF
2039  ! CHECK IF INTERFACE CROSS EACH OTHER
2040 
2041  END SUBROUTINE fft_check_interface
2042 
2043  SUBROUTINE fft_compute_entropy_visc(communicator, communicator_S, V1_in, V2_in, V3_in, V4_in, V5_in, &
2044  hloc_gauss, v1_out, v2_out, v3_out, &
2045  nb_procs, bloc_size, m_max_pad, residual_normalization, l_g, temps)
2046  !FFT (FFT(-1) V1 . FFT(-1) V2) = c_out
2047  !This a de-aliased version of the code, FEB 4, 2011, JLG
2048  USE my_util
2049  USE input_data
2050  IMPLICIT NONE
2051  include 'fftw3.f'
2052  ! Format: V_1in(1:np,1:6,1:m_max_c)
2053  ! INPUT ARE COSINE AND SINE COEFFICIENTS
2054  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
2055  REAL(KIND=8), DIMENSION(:,:,:), INTENT(IN) :: v1_in, v2_in, v3_in, v4_in, v5_in
2056  REAL(KIND=8), DIMENSION(:), INTENT(IN) :: hloc_gauss
2057  REAL(KIND=8), DIMENSION(:,:,:), INTENT(OUT) :: v1_out, v2_out, v3_out
2058  REAL(KIND=8), INTENT(IN) :: residual_normalization
2059  INTEGER, INTENT(IN) :: nb_procs, bloc_size, m_max_pad, l_g
2060  REAL(KIND=8), DIMENSION(:), OPTIONAL, INTENT(INOUT) :: temps
2061  COMPLEX(KIND=8), DIMENSION(m_max_pad, SIZE(V1_in,2)*5/2, bloc_size) :: cu
2062  REAL(KIND=8), DIMENSION(2*m_max_pad-1,SIZE(V1_in,2)*5/2,bloc_size) :: ru
2063  REAL(KIND=8), DIMENSION(2*m_max_pad-1,bloc_size) :: visco_entro
2064  REAL(KIND=8), DIMENSION(2*m_max_pad-1,SIZE(V1_in,2)/2,bloc_size) :: prod_ru_1, prod_ru_2, prod_ru_3
2065  COMPLEX(KIND=8), DIMENSION(m_max_pad,SIZE(V1_in,2)/2,bloc_size) :: prod_cu_1, prod_cu_2, prod_cu_3
2066  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size) :: intermediate_1
2067  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size) :: intermediate_2
2068  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size) :: intermediate_3
2069  REAL(KIND=8), DIMENSION(2*m_max_pad-1,bloc_size) :: norm_vel
2070  REAL(KIND=8), DIMENSION(2*m_max_pad-1, bloc_size/l_G) :: norm_vel_int
2071  REAL(KIND=8), DIMENSION(SIZE(V1_in,3),5*SIZE(V1_in,2),bloc_size*nb_procs) :: dist_field
2072  REAL(KIND=8), DIMENSION(SIZE(V1_in,3),5*SIZE(V1_in,2),bloc_size*nb_procs) :: combined_field
2073  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu_1
2074  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu_2
2075  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu_3
2076  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: dist_prod_cu_1
2077  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: dist_prod_cu_2
2078  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: dist_prod_cu_3
2079  REAL(KIND=8), DIMENSION(bloc_size*nb_procs) :: hloc_gauss_tot
2080  INTEGER :: np, np_tot, nb_field, m_max, m_max_c, mpid, n_r_pad
2081  INTEGER(KIND=8) :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
2082  INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code, rank, l, i_field
2083  REAL(KIND=8) :: t, hh, x
2084  REAL(KIND=8) :: max_norm_vel_loc, max_norm_vel_loc_f, max_norm_vel_tot
2085  ! FFTW parameters
2086  INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
2087  INTEGER, DIMENSION(1) :: dim, inembed, onembed
2088  ! Recall complexes must be rescaled
2089  ! End FFTW parameters
2090 #include "petsc/finclude/petsc.h"
2091  mpi_comm :: communicator
2092  mpi_comm :: communicator_s
2093  CALL mpi_comm_rank(communicator, rank, code)
2094 
2095  IF (present(temps)) temps = 0.d0
2096 
2097  np = SIZE(v1_in,1)
2098  nb_field= SIZE(v1_in,2) ! Number of fields
2099  m_max_c = SIZE(v1_in,3) ! Number of complex (cosines + sines) coefficients per point
2100  m_max = m_max_c*nb_procs! Number of comlex coefficients per point per processor
2101  np_tot = nb_procs*bloc_size
2102  n_r_pad=2*m_max_pad-1
2103 
2104  IF (mod(nb_field,2)/=0 .OR. m_max_c==0) THEN
2105  WRITE(*,*) ' BUG '
2106  stop
2107  END IF
2108 
2109  ! Packing all 3 complex components of both v1 and v2 input fields
2110  ! into dist_field, where the dimension indexing the nodal points varies the least rapidly,
2111  ! so that after distributing the data to the processes, each one will obtain a part
2112  ! on nodal points
2113  ! TRANSPOSE pr que la variable i associee aux modes soit la 1ere sur laquelle on va faire la FFT
2114  t = mpi_wtime()
2115 
2116  DO i = 1, m_max_c
2117  dist_field(i, 1:nb_field, 1:np) = transpose(v1_in(:,:,i))
2118  dist_field(i, nb_field+1:2*nb_field,1:np) = transpose(v2_in(:,:,i))
2119  dist_field(i,2*nb_field+1:3*nb_field,1:np) = transpose(v3_in(:,:,i))
2120  dist_field(i,3*nb_field+1:4*nb_field,1:np) = transpose(v4_in(:,:,i))
2121  dist_field(i,4*nb_field+1:5*nb_field,1:np) = transpose(v5_in(:,:,i))
2122  END DO
2123  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
2124 
2125  !IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 1.d100
2126  !LC 2016/02/16 (we do a max on vel later)
2127  IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 0.d0
2128  !LC 2016/02/16
2129 
2130  hloc_gauss_tot(1:np) = hloc_gauss
2131  IF (np/=np_tot) hloc_gauss_tot(np+1:np_tot) = 1.d100
2132 
2133  longueur_tranche=bloc_size*m_max_c*nb_field*5
2134 
2135  t = mpi_wtime()
2136  mpid=mpi_double_precision
2137  CALL mpi_alltoall(dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
2138  mpid, communicator, code)
2139  IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -t
2140 
2141  t = mpi_wtime()
2142  !JLG, FEB 4, 2011
2143  cu = 0.d0
2144  !JLG, FEB 4, 2011
2145  DO n = 1, bloc_size
2146  DO nb = 1, nb_procs
2147  shiftc = (nb-1)*bloc_size
2148  shiftl = (nb-1)*m_max_c
2149  jindex = n + shiftc
2150  DO nf = 1, nb_field*5/2
2151  ! Put real and imaginary parts in a complex
2152  ! nf=1,2,3 => V1_in
2153  ! nf=4,5,6 => V2_in
2154  ! INPUT ARE COSINE AND SINE COEFFICIENTS
2155  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
2156  cu(shiftl+1:shiftl+m_max_c,nf,n) = 0.5d0*cmplx(combined_field(:,2*nf-1,jindex),&
2157  -combined_field(:,2*nf,jindex),kind=8)
2158  END DO
2159  END DO
2160  END DO
2161  cu(1,:,:) = 2*cmplx(REAL(cu(1,:,:),KIND=8),0.d0,kind=8)
2162  !JLG, FEB 4, 2011
2163  !Padding is done by initialization of cu: cu = 0
2164  !This is eequivalent to cu(m_max+1:m_max_pad,:,:) = 0.d0
2165  !JLG, FEB 4, 2011
2166 
2167  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
2168 
2169  ! Set the parameters for dfftw
2170  fft_dim=1; istride=1; ostride=1;
2171  !JLG, FEB 4, 2011
2172 !!$ idist=N_r; inembed(1)=N_r; DIM(1)=N_r
2173 !!$ odist=m_max; onembed(1)=m_max
2174  idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
2175  odist=m_max_pad; onembed(1)=m_max_pad
2176  !JLG, FEB 4, 2011
2177 
2178  howmany=bloc_size*nb_field*5/2
2179 
2180  t = mpi_wtime()
2181  CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
2182  onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
2183  !write(*,*) ' FFT_PAR_DOT_PROD: fftw_plan_multi_c2r', fftw_plan_multi_c2r
2184  CALL dfftw_execute(fftw_plan_multi_c2r)
2185 
2186  ! clean up
2187  CALL dfftw_destroy_plan(fftw_plan_multi_c2r)
2188 
2189  !===ru(:,1:3,:) is velocity
2190  !===ru(:,4:6,:) is res_ns
2191  !===ru(:,7:9,:) is first line of velocity gradient
2192  !===ru(:,10:12,:) is second line of velocity gradient
2193  !===ru(:,10:12,:) is third line of velocity gradient
2194 
2195  !===Compute local velocity
2196  norm_vel(:,:) = sqrt(ru(:,1,:)**2 + ru(:,2,:)**2 + ru(:,3,:)**2)
2197  DO i = 1, 2*m_max_pad - 1
2198  DO l = 1, bloc_size/l_g
2199  x = maxval(norm_vel(i,(l-1)*l_g+1:l*l_g))
2200  norm_vel_int(i,l) = x
2201  END DO
2202  END DO
2203  IF (2*m_max_pad - 1 .GE. 3) THEN
2204  DO l = 1, bloc_size/l_g
2205  DO i = 2, 2*m_max_pad - 2
2206  norm_vel(i,(l-1)*l_g+1:l*l_g) = maxval(norm_vel_int(i-1:i+1,l))
2207  END DO
2208  norm_vel(1,(l-1)*l_g+1:l*l_g) = max(norm_vel_int(1,l),norm_vel_int(2,l),norm_vel_int(2*m_max_pad - 1,l))
2209  norm_vel(2*m_max_pad - 1,(l-1)*l_g+1:l*l_g) = &
2210  max(norm_vel_int(2*m_max_pad - 2,l),norm_vel_int(2*m_max_pad - 1,l),norm_vel_int(1,l))
2211  END DO
2212  ELSE
2213  DO l = 1, bloc_size/l_g
2214  norm_vel(1,(l-1)*l_g+1:l*l_g) = norm_vel_int(1,l)
2215  END DO
2216  END IF
2217  !===End compute local velocity
2218 
2219  !==Compute maximum of velocity
2220  max_norm_vel_loc=maxval(norm_vel)
2221  CALL mpi_allreduce(max_norm_vel_loc,max_norm_vel_loc_f,1,mpi_double_precision, mpi_max, communicator, code)
2222  CALL mpi_allreduce(max_norm_vel_loc_f,max_norm_vel_tot,1,mpi_double_precision, mpi_max, communicator_s, code)
2223  !==End compute maximum of velocity
2224 
2225  !===Compute entropy viscosity
2226  DO n = 1, bloc_size
2227  hh = hloc_gauss_tot(n+rank*bloc_size)
2228  visco_entro(:,n) = -inputs%LES_coeff1 + min(inputs%LES_coeff4*norm_vel(:,n), &
2229  inputs%LES_coeff2*hh*abs(ru(:,1,n)*ru(:,4,n) + ru(:,2,n)*ru(:,5,n) + ru(:,3,n)*ru(:,6,n))&
2230  /(residual_normalization+1.d-14))
2231  END DO
2232  !===End compute entropy viscosity
2233 
2234  !===Compute visco_entro times gradient(velocity)
2235  prod_ru_1(:,1,:) = visco_entro*ru(:,7,:)
2236  prod_ru_1(:,2,:) = visco_entro*ru(:,8,:)
2237  prod_ru_1(:,3,:) = visco_entro*ru(:,9,:)
2238 
2239  prod_ru_2(:,1,:) = visco_entro*ru(:,10,:)
2240  prod_ru_2(:,2,:) = visco_entro*ru(:,11,:)
2241  prod_ru_2(:,3,:) = visco_entro*ru(:,12,:)
2242 
2243  prod_ru_3(:,1,:) = visco_entro*ru(:,13,:)
2244  prod_ru_3(:,2,:) = visco_entro*ru(:,14,:)
2245  prod_ru_3(:,3,:) = visco_entro*ru(:,15,:)
2246  !===End compute visco_entro times gradient(velocity)
2247 
2248  howmany = bloc_size*nb_field/2
2249  CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru_1, &
2250  inembed, istride, idist, prod_cu_1, onembed, ostride, odist, fftw_estimate)
2251  !write(*,*) ' FFT_PAR_DOT_PROD: fftw_plan_multi_r2c', fftw_plan_multi_r2c
2252  CALL dfftw_execute(fftw_plan_multi_r2c)
2253  ! clean up
2254  CALL dfftw_destroy_plan(fftw_plan_multi_r2c)
2255 
2256  CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru_2, &
2257  inembed, istride, idist, prod_cu_2, onembed, ostride, odist, fftw_estimate)
2258  !write(*,*) ' FFT_PAR_DOT_PROD: fftw_plan_multi_r2c', fftw_plan_multi_r2c
2259  CALL dfftw_execute(fftw_plan_multi_r2c)
2260  ! clean up
2261  CALL dfftw_destroy_plan(fftw_plan_multi_r2c)
2262 
2263  CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru_3, &
2264  inembed, istride, idist, prod_cu_3, onembed, ostride, odist, fftw_estimate)
2265  !write(*,*) ' FFT_PAR_DOT_PROD: fftw_plan_multi_r2c', fftw_plan_multi_r2c
2266  CALL dfftw_execute(fftw_plan_multi_r2c)
2267  ! clean up
2268  CALL dfftw_destroy_plan(fftw_plan_multi_r2c)
2269 
2270  prod_cu_1 = prod_cu_1*(1.d0/n_r_pad) !Scaling
2271  prod_cu_2 = prod_cu_2*(1.d0/n_r_pad) !Scaling
2272  prod_cu_3 = prod_cu_3*(1.d0/n_r_pad) !Scaling
2273 
2274  IF (present(temps)) temps(2) = temps(2) + mpi_wtime() -t
2275 
2276  !Now we need to redistribute the Fourier coefficients on each processor
2277  t = mpi_wtime()
2278  combined_prod_cu_1(:,:,1)=prod_cu_1(1,:,:)
2279  combined_prod_cu_2(:,:,1)=prod_cu_2(1,:,:)
2280  combined_prod_cu_3(:,:,1)=prod_cu_3(1,:,:)
2281 
2282  DO n=2, m_max
2283  combined_prod_cu_1(:,:,n)=2*conjg(prod_cu_1(n,:,:))
2284  combined_prod_cu_2(:,:,n)=2*conjg(prod_cu_2(n,:,:))
2285  combined_prod_cu_3(:,:,n)=2*conjg(prod_cu_3(n,:,:))
2286  END DO
2287 
2288  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
2289 
2290  t = mpi_wtime()
2291  longueur_tranche=bloc_size*m_max_c*nb_field
2292  mpid=mpi_double_precision
2293  CALL mpi_alltoall(combined_prod_cu_1,longueur_tranche,mpid, dist_prod_cu_1,longueur_tranche, &
2294  mpid,communicator,code)
2295  CALL mpi_alltoall(combined_prod_cu_2,longueur_tranche,mpid, dist_prod_cu_2,longueur_tranche, &
2296  mpid,communicator,code)
2297  CALL mpi_alltoall(combined_prod_cu_3,longueur_tranche,mpid, dist_prod_cu_3,longueur_tranche, &
2298  mpid,communicator,code)
2299  IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -t
2300 
2301  t = mpi_wtime()
2302  DO i = 1, m_max_c
2303  DO nb = 1, nb_procs
2304  shiftc = (nb-1)*bloc_size
2305  shiftl = (nb-1)*m_max_c
2306  intermediate_1 = dist_prod_cu_1(:,:,shiftl+i)
2307  intermediate_2 = dist_prod_cu_2(:,:,shiftl+i)
2308  intermediate_3 = dist_prod_cu_3(:,:,shiftl+i)
2309  DO n = 1, bloc_size
2310  IF (n+shiftc > np ) cycle
2311  DO i_field = 1, nb_field/2
2312  v1_out(n+shiftc, i_field*2-1, i) = REAL (intermediate_1(i_field,n),kind=8)
2313  v1_out(n+shiftc, i_field*2 , i) = aimag(intermediate_1(i_field,n))
2314  v2_out(n+shiftc, i_field*2-1, i) = REAL (intermediate_2(i_field,n),kind=8)
2315  v2_out(n+shiftc, i_field*2 , i) = aimag(intermediate_2(i_field,n))
2316  v3_out(n+shiftc, i_field*2-1, i) = REAL (intermediate_3(i_field,n),kind=8)
2317  v3_out(n+shiftc, i_field*2 , i) = aimag(intermediate_3(i_field,n))
2318  END DO
2319  END DO
2320  END DO
2321  END DO
2322  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
2323 
2324  END SUBROUTINE fft_compute_entropy_visc
2325 
2326  SUBROUTINE fft_compute_entropy_visc_mom(communicator,communicator_S, V1_in, V2_in, V3_in, c1_in, hloc_gauss, &
2327  c1_real_out, nb_procs, bloc_size, m_max_pad, l_g, opt_c2_real_out, temps)
2328  !FFT (FFT(-1) V1 . FFT(-1) V2) = c_out
2329  !This a de-aliased version of the code, FEB 4, 2011, JLG
2330  USE my_util
2331  USE input_data
2332  IMPLICIT NONE
2333  include 'fftw3.f'
2334  ! Format: V_1in(1:np,1:6,1:m_max_c)
2335  ! INPUT ARE COSINE AND SINE COEFFICIENTS
2336  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
2337  REAL(KIND=8), DIMENSION(:,:,:), INTENT(IN) :: v1_in, v2_in, v3_in
2338  REAL(KIND=8), DIMENSION(:,:,:), INTENT(IN) :: c1_in
2339  REAL(KIND=8), DIMENSION(:), INTENT(IN) :: hloc_gauss
2340  REAL(KIND=8), DIMENSION(:,:), INTENT(OUT) :: c1_real_out
2341  INTEGER, INTENT(IN) :: nb_procs, bloc_size, m_max_pad, l_g
2342  REAL(KIND=8), DIMENSION(:,:), OPTIONAL, INTENT(OUT) :: opt_c2_real_out
2343  REAL(KIND=8), DIMENSION(:), OPTIONAL, INTENT(INOUT) :: temps
2344  REAL(KIND=8), DIMENSION(SIZE(V1_in,3),3*SIZE(V1_in,2)+SIZE(c1_in,2),bloc_size*nb_procs) :: dist_field
2345  REAL(KIND=8), DIMENSION(SIZE(V1_in,3),3*SIZE(V1_in,2)+SIZE(c1_in,2),bloc_size*nb_procs) :: combined_field
2346  COMPLEX(KIND=8), DIMENSION(m_max_pad, (3*SIZE(V1_in,2)+SIZE(c1_in,2))/2, bloc_size) :: cu
2347  REAL(KIND=8), DIMENSION(2*m_max_pad-1,(3*SIZE(V1_in,2)+SIZE(c1_in,2))/2,bloc_size) :: ru
2348  REAL(KIND=8), DIMENSION(2*m_max_pad-1,bloc_size) :: norm_vel, norm_mom
2349  REAL(KIND=8), DIMENSION(2*m_max_pad-1, bloc_size/l_G) :: norm_vel_int, norm_mom_int
2350  REAL(KIND=8), DIMENSION(bloc_size*nb_procs) :: hloc_gauss_tot
2351  REAL(KIND=8), DIMENSION(2*m_max_pad-1,bloc_size) :: visco_entro
2352  INTEGER :: np, np_tot, nb_field, nb_field2, m_max, m_max_c, mpid, n_r_pad
2353  INTEGER(KIND=8) :: fftw_plan_multi_c2r
2354  INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code, rank, l
2355  REAL(KIND=8) :: t, hh, x, max_vel_loc, max_vel_f, max_vel_tot
2356  ! FFTW parameters
2357  INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
2358  INTEGER, DIMENSION(1) :: dim, inembed, onembed
2359  ! Recall complexes must be rescaled
2360  ! End FFTW parameters
2361 #include "petsc/finclude/petsc.h"
2362  mpi_comm :: communicator
2363  mpi_comm :: communicator_s
2364 
2365  CALL mpi_comm_rank(communicator, rank, code)
2366 
2367  IF (present(temps)) temps = 0.d0
2368 
2369  np = SIZE(v1_in,1)
2370  nb_field = SIZE(v1_in,2) ! Number of fields for vector
2371  nb_field2 = SIZE(c1_in,2) ! Number of fields for scalar
2372  m_max_c = SIZE(v1_in,3) ! Number of complex (cosines + sines) coefficients per point
2373  m_max = m_max_c*nb_procs! Number of comlex coefficients per point per processor
2374  np_tot = nb_procs*bloc_size
2375  n_r_pad = 2*m_max_pad-1
2376 
2377  IF (mod(nb_field,2)/=0 .OR. m_max_c==0) THEN
2378  WRITE(*,*) ' BUG '
2379  stop
2380  END IF
2381 
2382  ! Packing all 3 complex components of both v1 and v2 input fields
2383  ! into dist_field, where the dimension indexing the nodal points varies the least rapidly,
2384  ! so that after distributing the data to the processes, each one will obtain a part
2385  ! on nodal points
2386  ! TRANSPOSE pr que la variable i associee aux modes soit la 1ere sur laquelle on va faire la FFT
2387  t = mpi_wtime()
2388 
2389  DO i = 1, m_max_c
2390  dist_field(i, 1:nb_field, 1:np) = transpose(v1_in(:,:,i))
2391  dist_field(i, nb_field+1:2*nb_field,1:np) = transpose(v2_in(:,:,i))
2392  dist_field(i,2*nb_field+1:3*nb_field,1:np) = transpose(v3_in(:,:,i))
2393  dist_field(i,3*nb_field+1:3*nb_field+nb_field2,1:np) = transpose(c1_in(:,:,i))
2394  END DO
2395  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
2396 
2397  IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 0.d0
2398 
2399  hloc_gauss_tot(1:np) = hloc_gauss
2400  IF (np/=np_tot) hloc_gauss_tot(np+1:np_tot) = 1.d100
2401 
2402  longueur_tranche=bloc_size*m_max_c*(nb_field*3+nb_field2)
2403 
2404  t = mpi_wtime()
2405  mpid=mpi_double_precision
2406  CALL mpi_alltoall(dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
2407  mpid, communicator, code)
2408  IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -t
2409 
2410  t = mpi_wtime()
2411  !JLG, FEB 4, 2011
2412  cu = 0.d0
2413  !JLG, FEB 4, 2011
2414  DO n = 1, bloc_size
2415  DO nb = 1, nb_procs
2416  shiftc = (nb-1)*bloc_size
2417  shiftl = (nb-1)*m_max_c
2418  jindex = n + shiftc
2419  DO nf = 1, (3*nb_field+nb_field2)/2
2420  ! Put real and imaginary parts in a complex
2421  ! nf=1,2,3 => V1_in
2422  ! nf=4,5,6 => V2_in
2423  ! INPUT ARE COSINE AND SINE COEFFICIENTS
2424  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
2425  cu(shiftl+1:shiftl+m_max_c,nf,n) = 0.5d0*cmplx(combined_field(:,2*nf-1,jindex),&
2426  -combined_field(:,2*nf,jindex),kind=8)
2427  END DO
2428  END DO
2429  END DO
2430  cu(1,:,:) = 2*cmplx(REAL(cu(1,:,:),KIND=8),0.d0,kind=8)
2431  !JLG, FEB 4, 2011
2432  !Padding is done by initialization of cu: cu = 0
2433  !This is eequivalent to cu(m_max+1:m_max_pad,:,:) = 0.d0
2434  !JLG, FEB 4, 2011
2435 
2436  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
2437 
2438  ! Set the parameters for dfftw
2439  fft_dim=1; istride=1; ostride=1;
2440  !JLG, FEB 4, 2011
2441  idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
2442  odist=m_max_pad; onembed(1)=m_max_pad
2443  !JLG, FEB 4, 2011
2444 
2445  howmany=bloc_size*(nb_field*3+nb_field2)/2
2446 
2447  t = mpi_wtime()
2448  CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
2449  onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
2450  !write(*,*) ' FFT_PAR_DOT_PROD: fftw_plan_multi_c2r', fftw_plan_multi_c2r
2451  CALL dfftw_execute(fftw_plan_multi_c2r)
2452 
2453  ! clean up
2454  CALL dfftw_destroy_plan(fftw_plan_multi_c2r)
2455 
2456  !===ru(:,1:3,:) is velocity
2457  !===ru(:,4:6,:) is momentum
2458  !===ru(:,7:9,:) is res_ns
2459  !===ru(:,10,:) is res_mass
2460 
2461  !===Compute norm_vel and norm_mom
2462  norm_vel(:,:) = sqrt(ru(:,1,:)**2 + ru(:,2,:)**2 + ru(:,3,:)**2)
2463  norm_mom(:,:) = sqrt(ru(:,4,:)**2 + ru(:,5,:)**2 + ru(:,6,:)**2)
2464  DO i = 1, 2*m_max_pad - 1
2465  DO l = 1, bloc_size/l_g
2466  x = maxval(norm_vel(i,(l-1)*l_g+1:l*l_g))
2467  norm_vel_int(i,l) = x
2468  x = maxval(norm_mom(i,(l-1)*l_g+1:l*l_g))
2469  norm_mom_int(i,l) = x
2470  END DO
2471  END DO
2472  IF (2*m_max_pad - 1 .GE. 3) THEN
2473  DO l = 1, bloc_size/l_g
2474  DO i = 2, 2*m_max_pad - 2
2475  norm_vel(i,(l-1)*l_g+1:l*l_g) = maxval(norm_vel_int(i-1:i+1,l))
2476  norm_mom(i,(l-1)*l_g+1:l*l_g) = maxval(norm_mom_int(i-1:i+1,l))
2477  END DO
2478  norm_vel(1,(l-1)*l_g+1:l*l_g) = max(norm_vel_int(1,l),norm_vel_int(2,l),norm_vel_int(2*m_max_pad - 1,l))
2479  norm_vel(2*m_max_pad - 1,(l-1)*l_g+1:l*l_g) = &
2480  max(norm_vel_int(2*m_max_pad - 2,l),norm_vel_int(2*m_max_pad - 1,l),norm_vel_int(1,l))
2481  norm_mom(1,(l-1)*l_g+1:l*l_g) = max(norm_mom_int(1,l),norm_mom_int(2,l),norm_mom_int(2*m_max_pad - 1,l))
2482  norm_mom(2*m_max_pad - 1,(l-1)*l_g+1:l*l_g) = &
2483  max(norm_mom_int(2*m_max_pad - 2,l),norm_mom_int(2*m_max_pad - 1,l),norm_mom_int(1,l))
2484  END DO
2485  ELSE
2486  DO l = 1, bloc_size/l_g
2487  norm_vel(1,(l-1)*l_g+1:l*l_g) = norm_vel_int(1,l)
2488  norm_mom(1,(l-1)*l_g+1:l*l_g) = norm_mom_int(1,l)
2489  END DO
2490  END IF
2491  !===End compute norm_vel and norm_mom
2492 
2493  !===Compute MAX(norm_vel) on domain
2494  max_vel_loc=maxval(norm_vel)
2495  CALL mpi_allreduce(max_vel_loc,max_vel_f, 1, mpi_double_precision, &
2496  mpi_max, communicator, code)
2497  CALL mpi_allreduce(max_vel_f, max_vel_tot, 1, mpi_double_precision, &
2498  mpi_max, communicator_s, code)
2499  !===End compute MAX(norm_vel) on domain
2500 
2501  !===Compute entropy viscosities for momentum and level set equations
2502  DO n = 1, bloc_size
2503  hh = hloc_gauss_tot(n+rank*bloc_size)
2504 
2505  visco_entro(:,n) = inputs%LES_coeff2*hh* &
2506  max(abs(ru(:,1,n)*ru(:,7,n) + ru(:,2,n)*ru(:,8,n) + ru(:,3,n)*ru(:,9,n)), &
2507  abs(ru(:,10,n)*(ru(:,1,n)**2 + ru(:,2,n)**2 + ru(:,3,n)**2)))
2508  END DO
2509 
2510  !JLG Sept 21 2016
2511  !===Average entropy viscosity
2512  IF (2*m_max_pad - 1 .GE. 3) THEN
2513  DO l = 1, bloc_size/l_g
2514  DO i = 2, 2*m_max_pad - 2
2515  norm_vel_int(i,l) = maxval(visco_entro(i-1:i+1,(l-1)*l_g+1:l*l_g))
2516  END DO
2517  norm_vel_int(1,l) = max(maxval(visco_entro(1,(l-1)*l_g+1:l*l_g)),maxval(visco_entro(2,(l-1)*l_g+1:l*l_g)),&
2518  maxval(visco_entro(2*m_max_pad - 1,(l-1)*l_g+1:l*l_g)))
2519  norm_vel_int(2*m_max_pad - 1,l) = max(maxval(visco_entro(2*m_max_pad - 2,(l-1)*l_g+1:l*l_g)),&
2520  maxval(visco_entro(2*m_max_pad - 1,(l-1)*l_g+1:l*l_g)),maxval(visco_entro(1,(l-1)*l_g+1:l*l_g)))
2521  END DO
2522  ELSE
2523  DO l = 1, bloc_size/l_g
2524  norm_vel_int(1,l) = maxval(visco_entro(1,(l-1)*l_g+1:l*l_g))
2525  END DO
2526  END IF
2527  DO i = 1, 2*m_max_pad - 1
2528  DO l = 1, bloc_size/l_g
2529  visco_entro(i,(l-1)*l_g+1:l*l_g)=norm_vel_int(i,l)
2530  END DO
2531  END DO
2532  !===End Average entropy viscosity
2533  !End JLG Sept 21 2016
2534 
2535  DO n = 1, bloc_size
2536  IF (l_g==3) THEN !===Level set viscosity
2537  c1_real_out(:,n) = min(4*inputs%LES_coeff4*max_vel_tot, &
2538  16*visco_entro(:,n)/(norm_vel(:,n)*norm_mom(:,n)+1.d-14))
2539  ELSE !===Momentum viscosity
2540  c1_real_out(:,n) = min(inputs%LES_coeff4*norm_vel(:,n), &
2541  visco_entro(:,n)/(norm_vel(:,n)*norm_mom(:,n)+1.d-14))
2542  IF (present(opt_c2_real_out)) THEN
2543  opt_c2_real_out(:,n) = min(inputs%LES_coeff4*max_vel_tot, &
2544  visco_entro(:,n)/(norm_vel(:,n)*norm_mom(:,n)+1.d-14))
2545  END IF
2546  END IF
2547  END DO
2548  !===End compute entropy viscosities for momentum and level set equations
2549 
2550  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
2551 
2552  END SUBROUTINE fft_compute_entropy_visc_mom
2553 
2554  SUBROUTINE fft_compute_diffu_mom(communicator, V1_in, V2_in, V3_in, &
2555  v1_out, v2_out, nb_procs, bloc_size, m_max_pad, temps)
2556  !This a de-aliased version of the code, FEB 4, 2011, JLG
2557  USE my_util
2558  USE input_data
2559  IMPLICIT NONE
2560  include 'fftw3.f'
2561  ! Format: V_1in(1:np,1:6,1:m_max_c)
2562  ! INPUT ARE COSINE AND SINE COEFFICIENTS
2563  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
2564  REAL(KIND=8), DIMENSION(:,:,:), INTENT(IN) :: v1_in, v2_in !vectors
2565  REAL(KIND=8), DIMENSION(:,:,:), INTENT(IN) :: v3_in !scalar
2566  REAL(KIND=8), DIMENSION(:,:,:), INTENT(OUT) :: v1_out
2567  REAL(KIND=8), DIMENSION(:,:,:), INTENT(OUT) :: v2_out
2568  REAL(KIND=8), DIMENSION(:), OPTIONAL, INTENT(INOUT) :: temps
2569  INTEGER, INTENT(IN) :: bloc_size, m_max_pad, nb_procs
2570  INTEGER :: np, np_tot, nb_field1, nb_field2, nb_field3
2571  INTEGER :: m_max, m_max_c, mpid, n_r_pad
2572  INTEGER(KIND=8) :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
2573  REAL(KIND=8), DIMENSION(SIZE(V1_in,3),2*SIZE(V1_in,2)+SIZE(V3_in,2), bloc_size*nb_procs) :: dist_field
2574  REAL(KIND=8), DIMENSION(SIZE(V1_in,3),2*SIZE(V1_in,2)+SIZE(V3_in,2), bloc_size*nb_procs) :: combined_field
2575  COMPLEX(KIND=8), DIMENSION(m_max_pad, (2*SIZE(V1_in,2)+SIZE(V3_in,2))/2, bloc_size) :: cu
2576  REAL(KIND=8), DIMENSION(2*m_max_pad-1,(2*SIZE(V1_in,2)+SIZE(V3_in,2))/2,bloc_size) :: ru
2577  REAL(KIND=8), DIMENSION(2*m_max_pad-1,SIZE(V1_in,2)/2,bloc_size) :: prod_ru_1, prod_ru_2
2578  COMPLEX(KIND=8), DIMENSION(m_max_pad,SIZE(V1_in,2)/2,bloc_size) :: prod_cu_1, prod_cu_2
2579  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size) :: intermediate_1, intermediate_2
2580  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu_1
2581  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu_2
2582  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: dist_prod_cu_1
2583  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: dist_prod_cu_2
2584  INTEGER :: i_field, rank
2585  INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code
2586  REAL(KIND=8) :: t
2587  ! FFTW parameters
2588  INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
2589  INTEGER, DIMENSION(1) :: dim, inembed, onembed
2590  ! Recall complexes must be rescaled
2591  ! End FFTW parameters
2592 #include "petsc/finclude/petsc.h"
2593  mpi_comm :: communicator
2594  petscerrorcode :: ierr
2595 
2596  IF (present(temps)) temps = 0.d0
2597 
2598  CALL mpi_comm_rank(communicator, rank, ierr)
2599 
2600  np = SIZE(v1_in,1)
2601  nb_field1= SIZE(v1_in,2) ! Number of fields for vector
2602  nb_field2= SIZE(v2_in,2) ! Number of fields
2603  nb_field3= SIZE(v3_in,2) ! Number of fields
2604  m_max_c = SIZE(v1_in,3) ! Number of complex (cosines + sines) coefficients per point
2605  m_max = m_max_c*nb_procs! Number of comlex coefficients per point per processor
2606  n_r_pad = 2*m_max_pad-1
2607  np_tot = nb_procs*bloc_size
2608 
2609  IF (mod(nb_field1,2)/=0 .OR. mod(nb_field2,2)/=0 .OR. m_max_c==0) THEN
2610  WRITE(*,*) ' BUG '
2611  stop
2612  END IF
2613 
2614  ! Bloc_size is the number of points that are handled by one processor
2615  ! once the Fourier modes are all collected
2616  ! Computation of bloc_size and np_tot
2617  ! fin de la repartition des points
2618 
2619  ! Packing all 3 complex components of both v1 and v2 input fields
2620  ! into dist_field, where the dimension indexing the nodal points varies the least rapidly,
2621  ! so that after distributing the data to the processes, each one will obtain a part
2622  ! on nodal points
2623  ! TRANSPOSE pr que la variable i associee aux modes soit la 1ere sur laquelle on va faire la FFT
2624  t = mpi_wtime()
2625 
2626  DO i = 1, m_max_c
2627  dist_field(i,1:nb_field1,1:np) = transpose(v1_in(:,:,i))
2628  dist_field(i,nb_field1+1:2*nb_field1,1:np) = transpose(v2_in(:,:,i))
2629  dist_field(i,2*nb_field1+1:2*nb_field1+nb_field3,1:np) = transpose(v3_in(:,:,i))
2630  END DO
2631  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
2632 
2633  IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 0.d0
2634 
2635  longueur_tranche=bloc_size*m_max_c*(nb_field1+nb_field2+nb_field3)
2636 
2637  t = mpi_wtime()
2638  mpid=mpi_double_precision
2639  CALL mpi_alltoall(dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
2640  mpid, communicator, code)
2641  IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -t
2642 
2643  t = mpi_wtime()
2644  !JLG, FEB 4, 2011
2645  cu = 0.d0
2646  !JLG, FEB 4, 2011
2647  DO n = 1, bloc_size
2648  DO nb = 1, nb_procs
2649  shiftc = (nb-1)*bloc_size
2650  shiftl = (nb-1)*m_max_c
2651  jindex = n + shiftc
2652  DO nf = 1, (nb_field1+nb_field2+nb_field3)/2
2653  ! Put real and imaginary parts in a complex
2654  ! nf=1,2,3 => V1_in
2655  ! nf=4 => V2_in
2656  ! INPUT ARE COSINE AND SINE COEFFICIENTS
2657  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
2658  cu(shiftl+1:shiftl+m_max_c,nf,n) = 0.5d0*cmplx(combined_field(:,2*nf-1,jindex),&
2659  -combined_field(:,2*nf,jindex),kind=8)
2660  END DO
2661  END DO
2662  END DO
2663  cu(1,:,:) = 2*cmplx(REAL(cu(1,:,:),KIND=8),0.d0,kind=8)
2664  !JLG, FEB 4, 2011
2665  !Padding is done by initialization of cu: cu = 0
2666  !This is eequivalent to cu(m_max+1:m_max_pad,:,:) = 0.d0
2667  !JLG, FEB 4, 2011
2668 
2669  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
2670 
2671  ! Set the parameters for dfftw
2672  fft_dim=1; istride=1; ostride=1;
2673  !JLG, FEB 4, 2011
2674  !idist=N_r; inembed(1)=N_r; DIM(1)=N_r
2675  !odist=m_max; onembed(1)=m_max
2676  idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
2677  odist=m_max_pad; onembed(1)=m_max_pad
2678  !JLG, FEB 4, 2011
2679 
2680  howmany=bloc_size*(nb_field1+nb_field2+nb_field3)/2
2681 
2682  t = mpi_wtime()
2683  CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
2684  onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
2685  !write(*,*) ' FFT_PAR_CROSS_PROD: fftw_plan_multi_c2r', fftw_plan_multi_c2r
2686  CALL dfftw_execute(fftw_plan_multi_c2r)
2687 
2688  ! clean up
2689  CALL dfftw_destroy_plan(fftw_plan_multi_c2r)
2690 
2691  !===ru(:,1:3,:) is first line of strain rate tensor
2692  !===ru(:,4:6,:) is 2nd line/column 2 and 3 of strain rate tensor + 3rd line/3rd column
2693  !===ru(:,7,:) is dynamical viscosity
2694 
2695  IF (nb_field1==6 .AND. nb_field2==6 .AND. nb_field3==2) THEN
2696  !===Compute visc_dyn times strain rate tensor
2697  prod_ru_1(:,1,:) = ru(:,7,:)*ru(:,1,:)
2698  prod_ru_1(:,2,:) = ru(:,7,:)*ru(:,2,:)
2699  prod_ru_1(:,3,:) = ru(:,7,:)*ru(:,3,:)
2700 
2701  prod_ru_2(:,1,:) = ru(:,7,:)*ru(:,4,:)
2702  prod_ru_2(:,2,:) = ru(:,7,:)*ru(:,5,:)
2703  prod_ru_2(:,3,:) = ru(:,7,:)*ru(:,6,:)
2704  !===End Compute visc_dyn times strain rate tensor
2705  ELSE
2706  CALL error_petsc('error on inputs while calling FFT_COMPUTE_DIFFU_MOM')
2707  END IF
2708 
2709  howmany = bloc_size*nb_field1/2 !vector
2710  CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru_1, &
2711  inembed, istride, idist, prod_cu_1, onembed, ostride, odist, fftw_estimate)
2712  CALL dfftw_execute(fftw_plan_multi_r2c)
2713  ! clean up
2714  CALL dfftw_destroy_plan(fftw_plan_multi_r2c)
2715 
2716  howmany = bloc_size*nb_field1/2 !vector
2717  CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru_2, &
2718  inembed, istride, idist, prod_cu_2, onembed, ostride, odist, fftw_estimate)
2719  CALL dfftw_execute(fftw_plan_multi_r2c)
2720  ! clean up
2721  CALL dfftw_destroy_plan(fftw_plan_multi_r2c)
2722 
2723  !JLG, FEB 4, 2011
2724  !prod_cu = prod_cu/N_r !Scaling
2725  prod_cu_1 = prod_cu_1*(1.d0/n_r_pad) !Scaling
2726  prod_cu_2 = prod_cu_2*(1.d0/n_r_pad) !Scaling
2727  !JLG, FEB 4, 2011
2728  IF (present(temps)) temps(2) = temps(2) + mpi_wtime() -t
2729 
2730  !Now we need to redistribute the Fourier coefficients on each processor
2731 
2732  t = mpi_wtime()
2733  combined_prod_cu_1(:,:,1)=prod_cu_1(1,:,:)
2734  combined_prod_cu_2(:,:,1)=prod_cu_2(1,:,:)
2735  DO n=2, m_max
2736  combined_prod_cu_1(:,:,n)=2*conjg(prod_cu_1(n,:,:))
2737  combined_prod_cu_2(:,:,n)=2*conjg(prod_cu_2(n,:,:))
2738  END DO
2739 
2740  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
2741 
2742  t = mpi_wtime()
2743  longueur_tranche=bloc_size*m_max_c*nb_field1 !vectors
2744  mpid=mpi_double_precision
2745  CALL mpi_alltoall(combined_prod_cu_1,longueur_tranche,mpid, dist_prod_cu_1,longueur_tranche, &
2746  mpid,communicator,code)
2747  CALL mpi_alltoall(combined_prod_cu_2,longueur_tranche,mpid, dist_prod_cu_2,longueur_tranche, &
2748  mpid,communicator,code)
2749 
2750  IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -t
2751 
2752  t = mpi_wtime()
2753  DO i = 1, m_max_c
2754  DO nb = 1, nb_procs
2755  shiftc = (nb-1)*bloc_size
2756  shiftl = (nb-1)*m_max_c
2757  intermediate_1 = dist_prod_cu_1(:,:,shiftl+i)
2758  intermediate_2 = dist_prod_cu_2(:,:,shiftl+i)
2759  DO n = 1, bloc_size
2760  IF (n+shiftc > np ) cycle
2761  DO i_field = 1, nb_field1/2
2762  v1_out(n+shiftc, i_field*2-1, i) = REAL (intermediate_1(i_field,n),kind=8)
2763  v1_out(n+shiftc, i_field*2 , i) = aimag(intermediate_1(i_field,n))
2764  v2_out(n+shiftc, i_field*2-1, i) = REAL (intermediate_2(i_field,n),kind=8)
2765  v2_out(n+shiftc, i_field*2 , i) = aimag(intermediate_2(i_field,n))
2766  END DO
2767  END DO
2768  END DO
2769  END DO
2770 
2771  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
2772 
2773  END SUBROUTINE fft_compute_diffu_mom
2774 
2775  SUBROUTINE fft_par_compr_entro_visc_dcl(communicator, V1_in, V2_in, c1_in, c_in_real, hloc_gauss, &
2776  coeff1_in_level, v_out, c_out, nb_procs, bloc_size, m_max_pad, temps)
2777  !This a de-aliased version of the code, FEB 4, 2011, JLG
2778  USE my_util
2779  USE input_data
2780  IMPLICIT NONE
2781  include 'fftw3.f'
2782  ! Format: V_1in(1:np,1:6,1:m_max_c)
2783  ! INPUT ARE COSINE AND SINE COEFFICIENTS
2784  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
2785  REAL(KIND=8), DIMENSION(:,:,:), INTENT(IN) :: v1_in, v2_in !vector
2786  REAL(KIND=8), DIMENSION(:,:,:), INTENT(IN) :: c1_in !scalar
2787  REAL(KIND=8), DIMENSION(:,:), INTENT(IN) :: c_in_real
2788  REAL(KIND=8), DIMENSION(:), INTENT(IN) :: hloc_gauss
2789  REAL(KIND=8), INTENT(IN) :: coeff1_in_level
2790  REAL(KIND=8), DIMENSION(:,:,:), INTENT(OUT) :: v_out
2791  REAL(KIND=8), DIMENSION(:,:,:), INTENT(OUT) :: c_out
2792  REAL(KIND=8), DIMENSION(:), OPTIONAL, INTENT(INOUT) :: temps
2793  INTEGER, INTENT(IN) :: bloc_size, m_max_pad, nb_procs
2794  INTEGER :: np, np_tot, nb_field1, nb_field2, nb_field3
2795  INTEGER :: m_max, m_max_c, mpid, n_r_pad
2796  INTEGER(KIND=8) :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
2797  REAL(KIND=8), DIMENSION(SIZE(V1_in,3),2*SIZE(V1_in,2)+SIZE(c1_in,2),bloc_size*nb_procs) :: dist_field
2798  REAL(KIND=8), DIMENSION(SIZE(V1_in,3),2*SIZE(V1_in,2)+SIZE(c1_in,2),bloc_size*nb_procs) :: combined_field
2799  COMPLEX(KIND=8), DIMENSION(m_max_pad, (2*SIZE(V1_in,2)+SIZE(c1_in,2))/2, bloc_size) :: cu
2800  REAL(KIND=8), DIMENSION(2*m_max_pad-1,(2*SIZE(V1_in,2)+SIZE(c1_in,2))/2,bloc_size) :: ru
2801  REAL(KIND=8), DIMENSION(2*m_max_pad-1, bloc_size) :: norm_grad_phi
2802  REAL(KIND=8), DIMENSION(bloc_size*nb_procs) :: hloc_gauss_tot
2803  REAL(KIND=8), DIMENSION(2*m_max_pad-1, bloc_size) :: visc_comp, visc_entro
2804  COMPLEX(KIND=8), DIMENSION(m_max_pad,SIZE(V1_in,2)/2,bloc_size) :: prod_cu
2805  REAL(KIND=8), DIMENSION(2*m_max_pad-1,SIZE(V1_in,2)/2,bloc_size) :: prod_ru
2806  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2, bloc_size) :: intermediate
2807  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu
2808  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: dist_prod_cu
2809  COMPLEX(KIND=8), DIMENSION(m_max_pad,bloc_size) :: prod_cu2
2810  REAL(KIND=8), DIMENSION(2*m_max_pad-1,bloc_size) :: prod_ru2
2811  COMPLEX(KIND=8), DIMENSION(bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu2
2812  COMPLEX(KIND=8), DIMENSION(bloc_size,SIZE(V1_in,3)*nb_procs) :: dist_prod_cu2
2813  COMPLEX(KIND=8), DIMENSION(bloc_size) :: intermediate2
2814  INTEGER :: i_field
2815  INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code
2816  REAL(KIND=8) :: t
2817  INTEGER :: rank
2818  ! FFTW parameters
2819  INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
2820  INTEGER, DIMENSION(1) :: dim, inembed, onembed
2821  ! Recall complexes must be rescaled
2822  ! End FFTW parameters
2823 #include "petsc/finclude/petsc.h"
2824  mpi_comm :: communicator
2825 
2826  CALL mpi_comm_rank(communicator, rank, code)
2827 
2828  IF (present(temps)) temps = 0.d0
2829 
2830  np = SIZE(v1_in,1)
2831  nb_field1= SIZE(v1_in,2) ! Number of fields
2832  nb_field2= SIZE(v2_in,2) ! Number of fields
2833  nb_field3= SIZE(c1_in,2) ! Number of fields
2834  m_max_c = SIZE(v1_in,3) ! Number of complex (cosines + sines) coefficients per point
2835  m_max = m_max_c*nb_procs! Number of comlex coefficients per point per processor
2836  n_r_pad = 2*m_max_pad-1
2837  np_tot = nb_procs*bloc_size
2838 
2839  IF (mod(nb_field1,2)/=0 .OR. mod(nb_field2,2)/=0 .OR. m_max_c==0) THEN
2840  WRITE(*,*) ' BUG '
2841  stop
2842  END IF
2843 
2844  ! Bloc_size is the number of points that are handled by one processor
2845  ! once the Fourier modes are all collected
2846  ! Computation of bloc_size and np_tot
2847  ! fin de la repartition des points
2848 
2849  ! Packing all 3 complex components of both v1 and v2 input fields
2850  ! into dist_field, where the dimension indexing the nodal points varies the least rapidly,
2851  ! so that after distributing the data to the processes, each one will obtain a part
2852  ! on nodal points
2853  ! TRANSPOSE pr que la variable i associee aux modes soit la 1ere sur laquelle on va faire la FFT
2854  t = mpi_wtime()
2855 
2856  DO i = 1, m_max_c
2857  dist_field(i,1:nb_field1,1:np) = transpose(v1_in(:,:,i))
2858  dist_field(i,nb_field1+1:2*nb_field1,1:np) = transpose(v2_in(:,:,i))
2859  dist_field(i,2*nb_field1+1:2*nb_field1+nb_field3,1:np) = transpose(c1_in(:,:,i))
2860  END DO
2861  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
2862 
2863  IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 1.d100
2864 
2865  hloc_gauss_tot(1:np) = hloc_gauss
2866  IF (np/=np_tot) hloc_gauss_tot(np+1:np_tot) = 1.d100
2867 
2868  longueur_tranche=bloc_size*m_max_c*(nb_field1+nb_field2+nb_field3)
2869 
2870  t = mpi_wtime()
2871  mpid=mpi_double_precision
2872  CALL mpi_alltoall(dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
2873  mpid, communicator, code)
2874  IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -t
2875 
2876  t = mpi_wtime()
2877  !JLG, FEB 4, 2011
2878  cu = 0.d0
2879  !JLG, FEB 4, 2011
2880  DO n = 1, bloc_size
2881  DO nb = 1, nb_procs
2882  shiftc = (nb-1)*bloc_size
2883  shiftl = (nb-1)*m_max_c
2884  jindex = n + shiftc
2885  DO nf = 1, (nb_field1+nb_field2+nb_field3)/2
2886  ! Put real and imaginary parts in a complex
2887  ! nf=1,2,3 => V1_in
2888  ! nf=4 => V2_in
2889  ! INPUT ARE COSINE AND SINE COEFFICIENTS
2890  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
2891  cu(shiftl+1:shiftl+m_max_c,nf,n) = 0.5d0*cmplx(combined_field(:,2*nf-1,jindex),&
2892  -combined_field(:,2*nf,jindex),kind=8)
2893  END DO
2894  END DO
2895  END DO
2896  cu(1,:,:) = 2*cmplx(REAL(cu(1,:,:),KIND=8),0.d0,kind=8)
2897  !JLG, FEB 4, 2011
2898  !Padding is done by initialization of cu: cu = 0
2899  !This is eequivalent to cu(m_max+1:m_max_pad,:,:) = 0.d0
2900  !JLG, FEB 4, 2011
2901 
2902  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
2903 
2904  ! Set the parameters for dfftw
2905  fft_dim=1; istride=1; ostride=1;
2906  !JLG, FEB 4, 2011
2907  !idist=N_r; inembed(1)=N_r; DIM(1)=N_r
2908  !odist=m_max; onembed(1)=m_max
2909  idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
2910  odist=m_max_pad; onembed(1)=m_max_pad
2911  !JLG, FEB 4, 2011
2912 
2913  howmany=bloc_size*(nb_field1+nb_field2+nb_field3)/2
2914 
2915  t = mpi_wtime()
2916  CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
2917  onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
2918  !write(*,*) ' FFT_PAR_CROSS_PROD: fftw_plan_multi_c2r', fftw_plan_multi_c2r
2919  CALL dfftw_execute(fftw_plan_multi_c2r)
2920 
2921  ! clean up
2922  CALL dfftw_destroy_plan(fftw_plan_multi_c2r)
2923 
2924  IF (nb_field1 == 6 .AND. nb_field2 == 6 .AND. nb_field3 == 2) THEN
2925  !===ru(:,1:3,:) = h*Grad(phi)
2926  !===ru(:,4:6,:) = h*Grad(phi_reg)
2927  !===ru(:,7,:) = phi
2928  !===c_in_real = visc_entro(:,:)
2929 
2930  !===Compute MAX(0,phi*(1-phi))
2931  prod_ru2(:,:) = max(0.d0, ru(:,7,:)*(1.d0 - ru(:,7,:)))
2932 
2933  !===Compute |Grad(phi_reg)|
2934  norm_grad_phi(:,:) = sqrt(ru(:,4,:)**2 + ru(:,5,:)**2 + ru(:,6,:)**2) + 1.d-14
2935 
2936  !===Compute visc_comp
2937  DO n = 1, bloc_size
2938  visc_entro(:,n) = -coeff1_in_level + c_in_real(:,n)
2939  visc_comp(:,n)= c_in_real(:,n)*inputs%level_set_comp_factor*prod_ru2(:,n)/norm_grad_phi(:,n)
2940  END DO
2941 
2942  !===Compute (coeff1_in_level-visc_entro)*Grad(phi) + visc_comp*Grad(phi_reg)
2943  prod_ru(:,1,:) = -visc_entro*ru(:,1,:) + visc_comp*ru(:,4,:)
2944  prod_ru(:,2,:) = -visc_entro*ru(:,2,:) + visc_comp*ru(:,5,:)
2945  prod_ru(:,3,:) = -visc_entro*ru(:,3,:) + visc_comp*ru(:,6,:)
2946 !TEST COUPEZ
2947 ! !===Compute MAX(0,phi*(1-phi))
2948 ! prod_ru2(:,:) = MAX(0.d0, ru(:,7,:)*(1.d0 - ru(:,7,:)))
2949 !
2950 ! !===Compute visc_comp
2951 ! DO n = 1, bloc_size
2952 ! visc_entro(:,n) = -coeff1_in_level + c_in_real(:,n)
2953 ! END DO
2954 !
2955 ! !===Compute (coeff1_in_level-visc_entro)*(h*Grad(phi))
2956 ! prod_ru(:,1,:) = -visc_entro*ru(:,1,:)
2957 ! prod_ru(:,2,:) = -visc_entro*ru(:,2,:)
2958 ! prod_ru(:,3,:) = -visc_entro*ru(:,3,:)
2959 !TEST COUPEZ
2960  ELSE
2961  CALL error_petsc('error in problem type while calling FFT_PAR_COMPR_ENTRO_VISC_DCL')
2962  END IF
2963 
2964  howmany = bloc_size*nb_field1/2
2965  CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru, &
2966  inembed, istride, idist, prod_cu, onembed, ostride, odist, fftw_estimate)
2967  CALL dfftw_execute(fftw_plan_multi_r2c)
2968  ! clean up
2969  CALL dfftw_destroy_plan(fftw_plan_multi_r2c)
2970 
2971  howmany = bloc_size*1
2972  CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru2, &
2973  inembed, istride, idist, prod_cu2, onembed, ostride, odist, fftw_estimate)
2974  CALL dfftw_execute(fftw_plan_multi_r2c)
2975  ! clean up
2976  CALL dfftw_destroy_plan(fftw_plan_multi_r2c)
2977 
2978  !JLG, FEB 4, 2011
2979  !prod_cu = prod_cu/N_r !Scaling
2980  prod_cu = prod_cu*(1.d0/n_r_pad) !Scaling
2981  prod_cu2 = prod_cu2*(1.d0/n_r_pad) !Scaling
2982  !JLG, FEB 4, 2011
2983  IF (present(temps)) temps(2) = temps(2) + mpi_wtime() -t
2984 
2985  !Now we need to redistribute the Fourier coefficients on each processor
2986 
2987  t = mpi_wtime()
2988  combined_prod_cu(:,:,1)=prod_cu(1,:,:)
2989  combined_prod_cu2(:,1)=prod_cu2(1,:)
2990  DO n=2, m_max
2991  combined_prod_cu(:,:,n)=2*conjg(prod_cu(n,:,:))
2992  combined_prod_cu2(:,n)=2*conjg(prod_cu2(n,:))
2993  END DO
2994 
2995  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
2996 
2997  t = mpi_wtime()
2998  longueur_tranche=bloc_size*m_max_c*nb_field1 !vector
2999  mpid=mpi_double_precision
3000  CALL mpi_alltoall(combined_prod_cu,longueur_tranche,mpid, dist_prod_cu,longueur_tranche, &
3001  mpid,communicator,code)
3002 
3003  longueur_tranche=bloc_size*m_max_c*2 !scalar
3004  mpid=mpi_double_precision
3005  CALL mpi_alltoall(combined_prod_cu2,longueur_tranche,mpid, dist_prod_cu2,longueur_tranche, &
3006  mpid,communicator,code)
3007 
3008  IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -t
3009  ! dimensions:
3010  t = mpi_wtime()
3011 
3012  DO i = 1, m_max_c
3013  DO nb = 1, nb_procs
3014  shiftc = (nb-1)*bloc_size
3015  shiftl = (nb-1)*m_max_c
3016  intermediate = dist_prod_cu(:,:,shiftl+i)
3017  intermediate2 = dist_prod_cu2(:,shiftl+i)
3018  DO n = 1, bloc_size
3019  IF (n+shiftc > np ) cycle
3020  DO i_field = 1, nb_field1/2
3021  v_out(n+shiftc, i_field*2-1, i) = REAL (intermediate(i_field,n),kind=8)
3022  v_out(n+shiftc, i_field*2 , i) = aimag(intermediate(i_field,n))
3023  END DO
3024  c_out(n+shiftc, 1, i) = REAL (intermediate2(n),kind=8)
3025  c_out(n+shiftc, 2, i) = aimag(intermediate2(n))
3026  END DO
3027  END DO
3028  END DO
3029 
3030  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
3031 
3032  END SUBROUTINE fft_par_compr_entro_visc_dcl
3033 
3034  SUBROUTINE fft_compute_entropy_visc_grad_mom(communicator, V1_in, V2_in, V3_in, c_in_real, &
3035  v_out, nb_procs, bloc_size, m_max_pad, temps)
3036  !FFT (FFT(-1) V1 . FFT(-1) V2) = c_out
3037  !This a de-aliased version of the code, FEB 4, 2011, JLG
3038  USE my_util
3039  USE input_data
3040  IMPLICIT NONE
3041  include 'fftw3.f'
3042  ! Format: V_1in(1:np,1:6,1:m_max_c)
3043  ! INPUT ARE COSINE AND SINE COEFFICIENTS
3044  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
3045  REAL(KIND=8), DIMENSION(:,:,:), INTENT(IN) :: v1_in, v2_in, v3_in !Gradient of momentum
3046  REAL(KIND=8), DIMENSION(:,:), INTENT(IN) :: c_in_real !entropy viscosity
3047  REAL(KIND=8), DIMENSION(:,:,:,:),INTENT(OUT) :: v_out
3048  INTEGER, INTENT(IN) :: nb_procs, bloc_size, m_max_pad
3049  REAL(KIND=8), DIMENSION(:), OPTIONAL, INTENT(INOUT) :: temps
3050  COMPLEX(KIND=8), DIMENSION(m_max_pad, SIZE(V1_in,2)*3/2, bloc_size) :: cu
3051  REAL(KIND=8), DIMENSION(2*m_max_pad-1,SIZE(V1_in,2)*3/2,bloc_size) :: ru
3052  REAL(KIND=8), DIMENSION(2*m_max_pad-1,bloc_size) :: visco_entro
3053  REAL(KIND=8), DIMENSION(2*m_max_pad-1,SIZE(V1_in,2)/2,bloc_size) :: prod_ru_1, prod_ru_2, prod_ru_3
3054  COMPLEX(KIND=8), DIMENSION(m_max_pad,SIZE(V1_in,2)/2,bloc_size) :: prod_cu_1, prod_cu_2, prod_cu_3
3055 
3056  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size) :: intermediate_1, intermediate_2, intermediate_3
3057  REAL(KIND=8), DIMENSION(SIZE(V1_in,3),3*SIZE(V1_in,2),bloc_size*nb_procs) :: dist_field, combined_field
3058  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu_1
3059  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu_2
3060  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu_3
3061  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: dist_prod_cu_1
3062  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: dist_prod_cu_2
3063  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: dist_prod_cu_3
3064  INTEGER :: np, np_tot, nb_field, m_max, m_max_c, mpid, n_r_pad
3065  INTEGER(KIND=8) :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
3066  INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code, rank, i_field
3067  REAL(KIND=8) :: t
3068  ! FFTW parameters
3069  INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
3070  INTEGER, DIMENSION(1) :: dim, inembed, onembed
3071  ! Recall complexes must be rescaled
3072  ! End FFTW parameters
3073 #include "petsc/finclude/petsc.h"
3074  mpi_comm :: communicator
3075  CALL mpi_comm_rank(communicator, rank, code)
3076 
3077  IF (present(temps)) temps = 0.d0
3078 
3079  np = SIZE(v1_in,1)
3080  nb_field= SIZE(v1_in,2) ! Number of fields
3081  m_max_c = SIZE(v1_in,3) ! Number of complex (cosines + sines) coefficients per point
3082  m_max = m_max_c*nb_procs! Number of comlex coefficients per point per processor
3083  np_tot = nb_procs*bloc_size
3084  n_r_pad=2*m_max_pad-1
3085 
3086  IF (mod(nb_field,2)/=0 .OR. m_max_c==0) THEN
3087  WRITE(*,*) ' BUG '
3088  stop
3089  END IF
3090 
3091  ! Packing all 3 complex components of both v1 and v2 input fields
3092  ! into dist_field, where the dimension indexing the nodal points varies the least rapidly,
3093  ! so that after distributing the data to the processes, each one will obtain a part
3094  ! on nodal points
3095  ! TRANSPOSE pr que la variable i associee aux modes soit la 1ere sur laquelle on va faire la FFT
3096  t = mpi_wtime()
3097 
3098  DO i = 1, m_max_c
3099  dist_field(i, 1:nb_field, 1:np) = transpose(v1_in(:,:,i))
3100  dist_field(i, nb_field+1:2*nb_field,1:np) = transpose(v2_in(:,:,i))
3101  dist_field(i,2*nb_field+1:3*nb_field,1:np) = transpose(v3_in(:,:,i))
3102  END DO
3103  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
3104 
3105  IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 1.d100
3106 
3107  longueur_tranche=bloc_size*m_max_c*nb_field*3
3108 
3109  t = mpi_wtime()
3110  mpid=mpi_double_precision
3111  CALL mpi_alltoall(dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
3112  mpid, communicator, code)
3113  IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -t
3114 
3115  t = mpi_wtime()
3116  !JLG, FEB 4, 2011
3117  cu = 0.d0
3118  !JLG, FEB 4, 2011
3119  DO n = 1, bloc_size
3120  DO nb = 1, nb_procs
3121  shiftc = (nb-1)*bloc_size
3122  shiftl = (nb-1)*m_max_c
3123  jindex = n + shiftc
3124  DO nf = 1, nb_field*3/2
3125  ! Put real and imaginary parts in a complex
3126  ! nf=1,2,3 => V1_in
3127  ! nf=4,5,6 => V2_in
3128  ! INPUT ARE COSINE AND SINE COEFFICIENTS
3129  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
3130  cu(shiftl+1:shiftl+m_max_c,nf,n) = 0.5d0*cmplx(combined_field(:,2*nf-1,jindex),&
3131  -combined_field(:,2*nf,jindex),kind=8)
3132  END DO
3133  END DO
3134  END DO
3135  cu(1,:,:) = 2*cmplx(REAL(cu(1,:,:),KIND=8),0.d0,kind=8)
3136  !JLG, FEB 4, 2011
3137  !Padding is done by initialization of cu: cu = 0
3138  !This is eequivalent to cu(m_max+1:m_max_pad,:,:) = 0.d0
3139  !JLG, FEB 4, 2011
3140 
3141  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
3142 
3143  ! Set the parameters for dfftw
3144  fft_dim=1; istride=1; ostride=1;
3145  !JLG, FEB 4, 2011
3146 !!$ idist=N_r; inembed(1)=N_r; DIM(1)=N_r
3147 !!$ odist=m_max; onembed(1)=m_max
3148  idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
3149  odist=m_max_pad; onembed(1)=m_max_pad
3150  !JLG, FEB 4, 2011
3151 
3152  howmany=bloc_size*nb_field*3/2
3153 
3154  t = mpi_wtime()
3155  CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
3156  onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
3157  !write(*,*) ' FFT_PAR_DOT_PROD: fftw_plan_multi_c2r', fftw_plan_multi_c2r
3158  CALL dfftw_execute(fftw_plan_multi_c2r)
3159  ! clean up
3160  CALL dfftw_destroy_plan(fftw_plan_multi_c2r)
3161 
3162  !===ru(:,1:3,:) is first line of momentum gradient
3163  !===ru(:,4:6,:) is second line of momentum gradient
3164  !===ru(:,7:9,:) is third line of momentum gradient
3165 
3166  !===Compute -LES_coeff1 + entropy viscosity
3167  DO n = 1, bloc_size
3168  visco_entro(:,n) = -inputs%LES_coeff1 + c_in_real(:,n)
3169  END DO
3170  !===End compute -LES_coeff1 + entropy viscosity
3171 
3172  !===Compute (-LES_coeff1 + visco_entro) times gradient(momentum)
3173  prod_ru_1(:,1,:) = visco_entro*ru(:,1,:)
3174  prod_ru_1(:,2,:) = visco_entro*ru(:,2,:)
3175  prod_ru_1(:,3,:) = visco_entro*ru(:,3,:)
3176 
3177  prod_ru_2(:,1,:) = visco_entro*ru(:,4,:)
3178  prod_ru_2(:,2,:) = visco_entro*ru(:,5,:)
3179  prod_ru_2(:,3,:) = visco_entro*ru(:,6,:)
3180 
3181  prod_ru_3(:,1,:) = visco_entro*ru(:,7,:)
3182  prod_ru_3(:,2,:) = visco_entro*ru(:,8,:)
3183  prod_ru_3(:,3,:) = visco_entro*ru(:,9,:)
3184  !===End compute (-LES_coeff1 + visco_entro) times gradient(momentum)
3185 
3186  howmany = bloc_size*nb_field/2
3187  CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru_1, &
3188  inembed, istride, idist, prod_cu_1, onembed, ostride, odist, fftw_estimate)
3189  !write(*,*) ' FFT_PAR_DOT_PROD: fftw_plan_multi_r2c', fftw_plan_multi_r2c
3190  CALL dfftw_execute(fftw_plan_multi_r2c)
3191  ! clean up
3192  CALL dfftw_destroy_plan(fftw_plan_multi_r2c)
3193 
3194  CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru_2, &
3195  inembed, istride, idist, prod_cu_2, onembed, ostride, odist, fftw_estimate)
3196  !write(*,*) ' FFT_PAR_DOT_PROD: fftw_plan_multi_r2c', fftw_plan_multi_r2c
3197  CALL dfftw_execute(fftw_plan_multi_r2c)
3198  ! clean up
3199  CALL dfftw_destroy_plan(fftw_plan_multi_r2c)
3200 
3201  CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru_3, &
3202  inembed, istride, idist, prod_cu_3, onembed, ostride, odist, fftw_estimate)
3203  !write(*,*) ' FFT_PAR_DOT_PROD: fftw_plan_multi_r2c', fftw_plan_multi_r2c
3204  CALL dfftw_execute(fftw_plan_multi_r2c)
3205  ! clean up
3206  CALL dfftw_destroy_plan(fftw_plan_multi_r2c)
3207 
3208  prod_cu_1 = prod_cu_1*(1.d0/n_r_pad) !Scaling
3209  prod_cu_2 = prod_cu_2*(1.d0/n_r_pad) !Scaling
3210  prod_cu_3 = prod_cu_3*(1.d0/n_r_pad) !Scaling
3211 
3212  IF (present(temps)) temps(2) = temps(2) + mpi_wtime() -t
3213 
3214  !Now we need to redistribute the Fourier coefficients on each processor
3215  t = mpi_wtime()
3216  combined_prod_cu_1(:,:,1)=prod_cu_1(1,:,:)
3217  combined_prod_cu_2(:,:,1)=prod_cu_2(1,:,:)
3218  combined_prod_cu_3(:,:,1)=prod_cu_3(1,:,:)
3219 
3220  DO n=2, m_max
3221  combined_prod_cu_1(:,:,n)=2*conjg(prod_cu_1(n,:,:))
3222  combined_prod_cu_2(:,:,n)=2*conjg(prod_cu_2(n,:,:))
3223  combined_prod_cu_3(:,:,n)=2*conjg(prod_cu_3(n,:,:))
3224  END DO
3225 
3226  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
3227 
3228  t = mpi_wtime()
3229  longueur_tranche=bloc_size*m_max_c*nb_field
3230  mpid=mpi_double_precision
3231  CALL mpi_alltoall(combined_prod_cu_1,longueur_tranche,mpid, dist_prod_cu_1,longueur_tranche, &
3232  mpid,communicator,code)
3233  CALL mpi_alltoall(combined_prod_cu_2,longueur_tranche,mpid, dist_prod_cu_2,longueur_tranche, &
3234  mpid,communicator,code)
3235  CALL mpi_alltoall(combined_prod_cu_3,longueur_tranche,mpid, dist_prod_cu_3,longueur_tranche, &
3236  mpid,communicator,code)
3237  IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -t
3238 
3239  t = mpi_wtime()
3240  DO i = 1, m_max_c
3241  DO nb = 1, nb_procs
3242  shiftc = (nb-1)*bloc_size
3243  shiftl = (nb-1)*m_max_c
3244  intermediate_1 = dist_prod_cu_1(:,:,shiftl+i)
3245  intermediate_2 = dist_prod_cu_2(:,:,shiftl+i)
3246  intermediate_3 = dist_prod_cu_3(:,:,shiftl+i)
3247  DO n = 1, bloc_size
3248  IF (n+shiftc > np ) cycle
3249  DO i_field = 1, nb_field/2
3250  v_out(1,n+shiftc, i_field*2-1, i) = REAL (intermediate_1(i_field,n),kind=8)
3251  v_out(1,n+shiftc, i_field*2 , i) = aimag(intermediate_1(i_field,n))
3252  v_out(2,n+shiftc, i_field*2-1, i) = REAL (intermediate_2(i_field,n),kind=8)
3253  v_out(2,n+shiftc, i_field*2 , i) = aimag(intermediate_2(i_field,n))
3254  v_out(3,n+shiftc, i_field*2-1, i) = REAL (intermediate_3(i_field,n),kind=8)
3255  v_out(3,n+shiftc, i_field*2 , i) = aimag(intermediate_3(i_field,n))
3256  END DO
3257  END DO
3258  END DO
3259  END DO
3260  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
3261 
3262  END SUBROUTINE fft_compute_entropy_visc_grad_mom
3263 
3264  SUBROUTINE fft_no_overshoot_level_set(communicator, c1_inout, nb_procs, bloc_size, m_max_pad, temps)
3265  !FFT (FFT(-1) V1 . FFT(-1) V2) = c_out
3266  !This a de-aliased version of the code, FEB 4, 2011, JLG
3267  USE my_util
3268  USE input_data
3269  IMPLICIT NONE
3270  include 'fftw3.f'
3271  ! Format: V_1in(1:np,1:6,1:m_max_c)
3272  ! INPUT ARE COSINE AND SINE COEFFICIENTS
3273  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
3274  REAL(KIND=8), DIMENSION(:,:,:), INTENT(INOUT) :: c1_inout
3275  INTEGER, INTENT(IN) :: nb_procs, bloc_size, m_max_pad
3276  REAL(KIND=8), DIMENSION(:), OPTIONAL, INTENT(INOUT) :: temps
3277  REAL(KIND=8), DIMENSION(SIZE(c1_inout,3),SIZE(c1_inout,2),bloc_size*nb_procs) :: dist_field
3278  REAL(KIND=8), DIMENSION(SIZE(c1_inout,3),SIZE(c1_inout,2),bloc_size*nb_procs) :: combined_field
3279  COMPLEX(KIND=8), DIMENSION(m_max_pad, SIZE(c1_inout,2)/2, bloc_size) :: cu
3280  REAL(KIND=8), DIMENSION(2*m_max_pad-1,SIZE(c1_inout,2)/2,bloc_size) :: ru
3281  REAL(KIND=8), DIMENSION(2*m_max_pad-1,bloc_size) :: prod_ru_1
3282  COMPLEX(KIND=8), DIMENSION(m_max_pad,bloc_size) :: prod_cu_1
3283  COMPLEX(KIND=8), DIMENSION(bloc_size,SIZE(c1_inout,3)*nb_procs) :: combined_prod_cu_1
3284  COMPLEX(KIND=8), DIMENSION(bloc_size,SIZE(c1_inout,3)*nb_procs) :: dist_prod_cu_1
3285  COMPLEX(KIND=8), DIMENSION(bloc_size) :: intermediate_1
3286  INTEGER :: np, np_tot, nb_field, m_max, m_max_c, mpid, n_r_pad
3287  INTEGER(KIND=8) :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
3288  INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code, rank
3289  REAL(KIND=8) :: t
3290  ! FFTW parameters
3291  INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
3292  INTEGER, DIMENSION(1) :: dim, inembed, onembed
3293  ! Recall complexes must be rescaled
3294  ! End FFTW parameters
3295 #include "petsc/finclude/petsc.h"
3296  mpi_comm :: communicator
3297 
3298  CALL mpi_comm_rank(communicator, rank, code)
3299 
3300  IF (present(temps)) temps = 0.d0
3301 
3302  np = SIZE(c1_inout,1)
3303  nb_field = SIZE(c1_inout,2) ! Number of fields for scalar
3304  m_max_c = SIZE(c1_inout,3) ! Number of complex (cosines + sines) coefficients per point
3305  m_max = m_max_c*nb_procs! Number of comlex coefficients per point per processor
3306  np_tot = nb_procs*bloc_size
3307  n_r_pad = 2*m_max_pad-1
3308 
3309  IF (mod(nb_field,2)/=0 .OR. m_max_c==0) THEN
3310  WRITE(*,*) ' BUG '
3311  stop
3312  END IF
3313 
3314  ! Packing all 3 complex components of both v1 and v2 input fields
3315  ! into dist_field, where the dimension indexing the nodal points varies the least rapidly,
3316  ! so that after distributing the data to the processes, each one will obtain a part
3317  ! on nodal points
3318  ! TRANSPOSE pr que la variable i associee aux modes soit la 1ere sur laquelle on va faire la FFT
3319  t = mpi_wtime()
3320 
3321  DO i = 1, m_max_c
3322  dist_field(i, 1:nb_field, 1:np) = transpose(c1_inout(:,:,i))
3323  END DO
3324  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
3325 
3326  IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 0.d0
3327 
3328  longueur_tranche=bloc_size*m_max_c*nb_field
3329 
3330  t = mpi_wtime()
3331  mpid=mpi_double_precision
3332  CALL mpi_alltoall(dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
3333  mpid, communicator, code)
3334  IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -t
3335 
3336  t = mpi_wtime()
3337  !JLG, FEB 4, 2011
3338  cu = 0.d0
3339  !JLG, FEB 4, 2011
3340  DO n = 1, bloc_size
3341  DO nb = 1, nb_procs
3342  shiftc = (nb-1)*bloc_size
3343  shiftl = (nb-1)*m_max_c
3344  jindex = n + shiftc
3345  DO nf = 1, nb_field/2
3346  ! Put real and imaginary parts in a complex
3347  ! nf=1,2,3 => V1_in
3348  ! nf=4,5,6 => V2_in
3349  ! INPUT ARE COSINE AND SINE COEFFICIENTS
3350  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
3351  cu(shiftl+1:shiftl+m_max_c,nf,n) = 0.5d0*cmplx(combined_field(:,2*nf-1,jindex),&
3352  -combined_field(:,2*nf,jindex),kind=8)
3353  END DO
3354  END DO
3355  END DO
3356  cu(1,:,:) = 2*cmplx(REAL(cu(1,:,:),KIND=8),0.d0,kind=8)
3357  !JLG, FEB 4, 2011
3358  !Padding is done by initialization of cu: cu = 0
3359  !This is eequivalent to cu(m_max+1:m_max_pad,:,:) = 0.d0
3360  !JLG, FEB 4, 2011
3361 
3362  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
3363 
3364  ! Set the parameters for dfftw
3365  fft_dim=1; istride=1; ostride=1;
3366  !JLG, FEB 4, 2011
3367 !!$ idist=N_r; inembed(1)=N_r; DIM(1)=N_r
3368 !!$ odist=m_max; onembed(1)=m_max
3369  idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
3370  odist=m_max_pad; onembed(1)=m_max_pad
3371  !JLG, FEB 4, 2011
3372 
3373  howmany=bloc_size*nb_field/2
3374 
3375  t = mpi_wtime()
3376  CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
3377  onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
3378  !write(*,*) ' FFT_PAR_DOT_PROD: fftw_plan_multi_c2r', fftw_plan_multi_c2r
3379  CALL dfftw_execute(fftw_plan_multi_c2r)
3380 
3381  ! clean up
3382  CALL dfftw_destroy_plan(fftw_plan_multi_c2r)
3383 
3384  !===Compute MAX(0, MIN(1, c1_inout))
3385  DO n = 1, bloc_size
3386  prod_ru_1(:,n) = max(0.d0, min(1.d0, ru(:,1,n)))
3387  END DO
3388  !===End compute MAX(0, MIN(1, c1_inout))
3389 
3390  howmany = bloc_size*1
3391  CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru_1, &
3392  inembed, istride, idist, prod_cu_1, onembed, ostride, odist, fftw_estimate)
3393  !write(*,*) ' FFT_PAR_DOT_PROD: fftw_plan_multi_r2c', fftw_plan_multi_r2c
3394  CALL dfftw_execute(fftw_plan_multi_r2c)
3395  ! clean up
3396  CALL dfftw_destroy_plan(fftw_plan_multi_r2c)
3397 
3398  prod_cu_1 = prod_cu_1*(1.d0/n_r_pad) !Scaling
3399 
3400  IF (present(temps)) temps(2) = temps(2) + mpi_wtime() -t
3401 
3402  !Now we need to redistribute the Fourier coefficients on each processor
3403  t = mpi_wtime()
3404  combined_prod_cu_1(:,1)=prod_cu_1(1,:)
3405 
3406  DO n=2, m_max
3407  combined_prod_cu_1(:,n)=2*conjg(prod_cu_1(n,:))
3408  END DO
3409 
3410  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
3411 
3412  t = mpi_wtime()
3413  longueur_tranche=bloc_size*m_max_c*2
3414  mpid=mpi_double_precision
3415  CALL mpi_alltoall(combined_prod_cu_1,longueur_tranche,mpid, dist_prod_cu_1,longueur_tranche, &
3416  mpid,communicator,code)
3417  IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -t
3418 
3419  t = mpi_wtime()
3420  DO i = 1, m_max_c
3421  DO nb = 1, nb_procs
3422  shiftc = (nb-1)*bloc_size
3423  shiftl = (nb-1)*m_max_c
3424  intermediate_1 = dist_prod_cu_1(:,shiftl+i)
3425  DO n = 1, bloc_size
3426  IF (n+shiftc > np ) cycle
3427  c1_inout(n+shiftc, 1, i) = REAL (intermediate_1(n),kind=8)
3428  c1_inout(n+shiftc, 2, i) = aimag(intermediate_1(n))
3429  END DO
3430  END DO
3431  END DO
3432  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
3433 
3434  END SUBROUTINE fft_no_overshoot_level_set
3435 
3436  SUBROUTINE fft_compression_level_set_dcl(communicator_F,communicator_S, V1_in, V2_in, c_in, c_out, &
3437  hloc_gauss, l_g, nb_procs, bloc_size, m_max_pad, temps)
3438  USE input_data
3439 !JLG 09/ 2016
3440 USE user_data
3441 !JLG 09/ 2016
3442  !FFT (FFT(-1) V1 . FFT(-1) V2) = c_out
3443  !This a de-aliased version of the code, FEB 4, 2011, JLG
3444  IMPLICIT NONE
3445  include 'fftw3.f'
3446  ! Format: V_1in(1:np,1:6,1:m_max_c)
3447  ! INPUT ARE COSINE AND SINE COEFFICIENTS
3448  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
3449  REAL(KIND=8), DIMENSION(:,:,:), INTENT(IN) :: v1_in, v2_in
3450  REAL(KIND=8), DIMENSION(:,:,:), INTENT(IN) :: c_in
3451  REAL(KIND=8), DIMENSION(:,:,:), INTENT(OUT) :: c_out
3452  REAL(KIND=8), DIMENSION(:), INTENT(IN) :: hloc_gauss
3453  INTEGER, INTENT(IN) :: l_g
3454  REAL(KIND=8), DIMENSION(:), OPTIONAL, INTENT(INOUT) :: temps
3455  INTEGER, INTENT(IN) :: nb_procs, bloc_size, m_max_pad
3456  COMPLEX(KIND=8), DIMENSION(m_max_pad, SIZE(V1_in,2)+SIZE(c_in,2)/2, bloc_size) :: cu
3457  REAL(KIND=8), DIMENSION(2*m_max_pad-1,SIZE(V1_in,2)+SIZE(c_in,2)/2,bloc_size) :: ru
3458  COMPLEX(KIND=8), DIMENSION(m_max_pad,bloc_size) :: prod_cu
3459  REAL(KIND=8), DIMENSION(2*m_max_pad-1,bloc_size) :: prod_ru
3460  COMPLEX(KIND=8), DIMENSION(bloc_size) :: intermediate
3461  REAL(KIND=8), DIMENSION(SIZE(V1_in,3),2*SIZE(V1_in,2)+SIZE(c_in,2),bloc_size*nb_procs) :: dist_field
3462  REAL(KIND=8), DIMENSION(SIZE(V1_in,3),2*SIZE(V1_in,2)+SIZE(c_in,2),bloc_size*nb_procs) :: combined_field
3463  COMPLEX(KIND=8), DIMENSION(bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu
3464  COMPLEX(KIND=8), DIMENSION(bloc_size,SIZE(V1_in,3)*nb_procs) :: dist_prod_cu
3465  REAL(KIND=8), DIMENSION(2*m_max_pad-1,bloc_size) :: norm_vel
3466  REAL(KIND=8), DIMENSION(2*m_max_pad-1,bloc_size/l_G) :: norm_vel_int
3467  REAL(KIND=8), DIMENSION(2*m_max_pad-1,bloc_size) :: norm_grad_phi
3468  REAL(KIND=8), DIMENSION(bloc_size*nb_procs) :: hloc_gauss_tot
3469  REAL(KIND=8) :: hh, x, max_norm_vel_loc, max_norm_vel_loc_f, max_norm_vel_tot
3470  INTEGER :: rank, l
3471  INTEGER :: np, np_tot, nb_field, nb_field2, m_max, m_max_c, mpid, n_r_pad
3472  INTEGER(KIND=8) :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
3473  INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code
3474  REAL(KIND=8) :: t
3475  ! FFTW parameters
3476  INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
3477  INTEGER, DIMENSION(1) :: dim, inembed, onembed
3478  ! Recall complexes must be rescaled
3479  ! End FFTW parameters
3480 #include "petsc/finclude/petsc.h"
3481  mpi_comm :: communicator_f
3482  mpi_comm :: communicator_s
3483 
3484  IF (present(temps)) temps = 0.d0
3485 
3486  CALL mpi_comm_rank(communicator_f, rank, code)
3487 
3488  np = SIZE(v1_in,1)
3489  nb_field = SIZE(v1_in,2) ! Number of fields
3490  nb_field2= SIZE(c_in,2) ! Number of fields
3491  m_max_c = SIZE(v1_in,3) ! Number of complex (cosines + sines) coefficients per point
3492  m_max = m_max_c*nb_procs! Number of comlex coefficients per point per processor
3493  np_tot = nb_procs*bloc_size
3494  n_r_pad = 2*m_max_pad-1
3495 
3496  IF (mod(nb_field,2)/=0 .OR. mod(nb_field2,2)/=0 .OR. m_max_c==0) THEN
3497  WRITE(*,*) ' BUG '
3498  stop
3499  END IF
3500 
3501  ! Packing all 3 complex components of both v1 and v2 input fields
3502  ! into dist_field, where the dimension indexing the nodal points varies the least rapidly,
3503  ! so that after distributing the data to the processes, each one will obtain a part
3504  ! on nodal points
3505  ! TRANSPOSE pr que la variable i associee aux modes soit la 1ere sur laquelle on va faire la FFT
3506  t = mpi_wtime()
3507 
3508  DO i = 1, m_max_c
3509  dist_field(i,1:nb_field,1:np) = transpose(v1_in(:,:,i))
3510  dist_field(i,nb_field+1:2*nb_field,1:np) = transpose(v2_in(:,:,i))
3511  dist_field(i,2*nb_field+1:2*nb_field+nb_field2,1:np) = transpose(c_in(:,:,i))
3512  END DO
3513  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
3514 
3515  IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 0.d0
3516 
3517  hloc_gauss_tot(1:np) = hloc_gauss
3518  IF (np/=np_tot) hloc_gauss_tot(np+1:np_tot) = 1.d100
3519 
3520  longueur_tranche=bloc_size*m_max_c*(nb_field*2+nb_field2)
3521 
3522  t = mpi_wtime()
3523  mpid=mpi_double_precision
3524  CALL mpi_alltoall(dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
3525  mpid, communicator_f, code)
3526  IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -t
3527 
3528  t = mpi_wtime()
3529  !JLG, FEB 4, 2011
3530  cu = 0.d0
3531  !JLG, FEB 4, 2011
3532  DO n = 1, bloc_size
3533  DO nb = 1, nb_procs
3534  shiftc = (nb-1)*bloc_size
3535  shiftl = (nb-1)*m_max_c
3536  jindex = n + shiftc
3537  DO nf = 1, (nb_field+nb_field2/2)
3538  ! Put real and imaginary parts in a complex
3539  ! nf=1,2,3 => V1_in
3540  ! nf=4,5,6 => V2_in
3541  ! INPUT ARE COSINE AND SINE COEFFICIENTS
3542  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
3543  cu(shiftl+1:shiftl+m_max_c,nf,n) = 0.5d0*cmplx(combined_field(:,2*nf-1,jindex),&
3544  -combined_field(:,2*nf,jindex),kind=8)
3545  END DO
3546  END DO
3547  END DO
3548  cu(1,:,:) = 2*cmplx(REAL(cu(1,:,:),KIND=8),0.d0,kind=8)
3549  !JLG, FEB 4, 2011
3550  !Padding is done by initialization of cu: cu = 0
3551  !This is eequivalent to cu(m_max+1:m_max_pad,:,:) = 0.d0
3552  !JLG, FEB 4, 2011
3553 
3554  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
3555 
3556  ! Set the parameters for dfftw
3557  fft_dim=1; istride=1; ostride=1;
3558  !JLG, FEB 4, 2011
3559 !!$ idist=N_r; inembed(1)=N_r; DIM(1)=N_r
3560 !!$ odist=m_max; onembed(1)=m_max
3561  idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
3562  odist=m_max_pad; onembed(1)=m_max_pad
3563  !JLG, FEB 4, 2011
3564 
3565  howmany=bloc_size*(nb_field+nb_field2/2)
3566 
3567 
3568  t = mpi_wtime()
3569  CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
3570  onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
3571  !write(*,*) ' FFT_PAR_DOT_PROD: fftw_plan_multi_c2r', fftw_plan_multi_c2r
3572  CALL dfftw_execute(fftw_plan_multi_c2r)
3573  ! clean up
3574  CALL dfftw_destroy_plan(fftw_plan_multi_c2r)
3575 
3576  !===ru(:,1:3,:) is gradient(level_set)
3577  !===ru(:,4:6,:) is velocity
3578  !===ru(:,7,:) is level set
3579 
3580  !===Compute local velocity
3581  norm_vel(:,:) = sqrt(ru(:,4,:)**2 + ru(:,5,:)**2 + ru(:,6,:)**2)
3582  DO i = 1, 2*m_max_pad - 1
3583  DO l = 1, bloc_size/l_g
3584  x = maxval(norm_vel(i,(l-1)*l_g+1:l*l_g))
3585  norm_vel_int(i,l) = x
3586  END DO
3587  END DO
3588  IF (2*m_max_pad - 1 .GE. 3) THEN
3589  DO l = 1, bloc_size/l_g
3590  DO i = 2, 2*m_max_pad - 2
3591  norm_vel(i,(l-1)*l_g+1:l*l_g) = maxval(norm_vel_int(i-1:i+1,l))
3592  END DO
3593  norm_vel(1,(l-1)*l_g+1:l*l_g) = max(norm_vel_int(1,l),norm_vel_int(2,l),norm_vel_int(2*m_max_pad - 1,l))
3594  norm_vel(2*m_max_pad - 1,(l-1)*l_g+1:l*l_g) = &
3595  max(norm_vel_int(2*m_max_pad - 2,l),norm_vel_int(2*m_max_pad - 1,l),norm_vel_int(1,l))
3596  END DO
3597  ELSE
3598  DO l = 1, bloc_size/l_g
3599  norm_vel(1,(l-1)*l_g+1:l*l_g) = norm_vel_int(1,l)
3600  END DO
3601  END IF
3602  !===End compute local velocity
3603 
3604  !==Compute maximum of velocity
3605  max_norm_vel_loc=maxval(norm_vel)
3606  CALL mpi_allreduce(max_norm_vel_loc,max_norm_vel_loc_f,1,mpi_double_precision, mpi_max, communicator_f, code)
3607  CALL mpi_allreduce(max_norm_vel_loc_f,max_norm_vel_tot,1,mpi_double_precision, mpi_max, communicator_s, code)
3608  !==End compute maximum of velocity
3609 
3610  !===Compute |Grad(level_set)|
3611  norm_grad_phi(:,:) = sqrt(ru(:,1,:)**2 + ru(:,2,:)**2 + ru(:,3,:)**2)
3612 
3613  !===Compute compression term
3614 !JLG Sept 30 2016
3615  hh = inputs%h_min_distance
3616 !JLG 09/2016
3617 ! hh = user%ep
3618 !JLG 09/2016
3619  !hh = 2.d0*h_min !does not work well with small reg.
3620  max_norm_vel_tot=min(1.d0,max_norm_vel_tot)
3621  DO n = 1, bloc_size
3622 !TEST JLG LC 2016
3623  prod_ru(:,n) = inputs%level_set_comp_factor*4*inputs%LES_coeff4* &
3624  max_norm_vel_tot*(2.d0*regul_tab(ru(:,7,n),0.01d0)-1.d0)* &
3625  (max((1.d0 - (2*ru(:,7,n)-1.d0)**2),0.d0)/(2*hh)-norm_grad_phi(:,n))
3626 !TEST JLG LC 2016
3627  END DO
3628  !=End compute compression term
3629 
3630  howmany = bloc_size*1
3631  CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru, &
3632  inembed, istride, idist, prod_cu, onembed, ostride, odist, fftw_estimate)
3633  !write(*,*) ' FFT_PAR_DOT_PROD: fftw_plan_multi_r2c', fftw_plan_multi_r2c
3634  CALL dfftw_execute(fftw_plan_multi_r2c)
3635  ! clean up
3636  CALL dfftw_destroy_plan(fftw_plan_multi_r2c)
3637 
3638  !JLG, FEB 4, 2011
3639 !!$ prod_cu = prod_cu/N_r !Scaling
3640  prod_cu = prod_cu*(1.d0/n_r_pad) !Scaling
3641  !JLG, FEB 4, 2011
3642  IF (present(temps)) temps(2) = temps(2) + mpi_wtime() -t
3643 
3644  !Now we need to redistribute the Fourier coefficients on each processor
3645  t = mpi_wtime()
3646  combined_prod_cu(:,1)=prod_cu(1,:)
3647  DO n=2, m_max
3648  combined_prod_cu(:,n)=2*conjg(prod_cu(n,:))
3649  END DO
3650 
3651  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
3652 
3653  t = mpi_wtime()
3654  longueur_tranche=bloc_size*m_max_c*2
3655  mpid=mpi_double_precision
3656  CALL mpi_alltoall(combined_prod_cu,longueur_tranche,mpid, dist_prod_cu,longueur_tranche, &
3657  mpid,communicator_f,code)
3658  IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -t
3659 
3660  t = mpi_wtime()
3661  DO i = 1, m_max_c
3662  DO nb = 1, nb_procs
3663  shiftc = (nb-1)*bloc_size
3664  shiftl = (nb-1)*m_max_c
3665  intermediate = dist_prod_cu(:,shiftl+i)
3666  DO n = 1, bloc_size
3667  IF (n+shiftc > np ) cycle
3668  c_out(n+shiftc, 1, i) = REAL (intermediate(n),kind=8)
3669  c_out(n+shiftc, 2 , i) = aimag(intermediate(n))
3670  END DO
3671  END DO
3672  END DO
3673  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
3674 
3675  END SUBROUTINE fft_compression_level_set_dcl
3676 
3677  SUBROUTINE fft_par_entro_visc_dcl(communicator, V1_in, c1_in, c_in_real, hloc_gauss, &
3678  coeff1_in_level, v_out, c_out, nb_procs, bloc_size, m_max_pad, temps)
3679  !This a de-aliased version of the code, FEB 4, 2011, JLG
3680  USE my_util
3681  USE input_data
3682  IMPLICIT NONE
3683  include 'fftw3.f'
3684  ! Format: V_1in(1:np,1:6,1:m_max_c)
3685  ! INPUT ARE COSINE AND SINE COEFFICIENTS
3686  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
3687  REAL(KIND=8), DIMENSION(:,:,:), INTENT(IN) :: v1_in !vector
3688  REAL(KIND=8), DIMENSION(:,:,:), INTENT(IN) :: c1_in !scalar
3689  REAL(KIND=8), DIMENSION(:,:), INTENT(IN) :: c_in_real
3690  REAL(KIND=8), DIMENSION(:), INTENT(IN) :: hloc_gauss
3691  REAL(KIND=8), INTENT(IN) :: coeff1_in_level
3692  REAL(KIND=8), DIMENSION(:,:,:), INTENT(OUT) :: v_out
3693  REAL(KIND=8), DIMENSION(:,:,:), INTENT(OUT) :: c_out
3694  REAL(KIND=8), DIMENSION(:), OPTIONAL, INTENT(INOUT) :: temps
3695  INTEGER, INTENT(IN) :: bloc_size, m_max_pad, nb_procs
3696  INTEGER :: np, np_tot, nb_field1, nb_field2
3697  INTEGER :: m_max, m_max_c, mpid, n_r_pad
3698  INTEGER(KIND=8) :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
3699 
3700  REAL(KIND=8), DIMENSION(SIZE(V1_in,3),SIZE(V1_in,2)+SIZE(c1_in,2),bloc_size*nb_procs) :: dist_field
3701  REAL(KIND=8), DIMENSION(SIZE(V1_in,3),SIZE(V1_in,2)+SIZE(c1_in,2),bloc_size*nb_procs) :: combined_field
3702  COMPLEX(KIND=8), DIMENSION(m_max_pad, (SIZE(V1_in,2)+SIZE(c1_in,2))/2, bloc_size) :: cu
3703  REAL(KIND=8), DIMENSION(2*m_max_pad-1,(SIZE(V1_in,2)+SIZE(c1_in,2))/2,bloc_size) :: ru
3704  REAL(KIND=8), DIMENSION(bloc_size*nb_procs) :: hloc_gauss_tot
3705  REAL(KIND=8), DIMENSION(2*m_max_pad-1, bloc_size) :: visc_entro
3706  COMPLEX(KIND=8), DIMENSION(m_max_pad,SIZE(V1_in,2)/2,bloc_size) :: prod_cu
3707  REAL(KIND=8), DIMENSION(2*m_max_pad-1,SIZE(V1_in,2)/2,bloc_size) :: prod_ru
3708  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2, bloc_size) :: intermediate
3709  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu
3710  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: dist_prod_cu
3711  COMPLEX(KIND=8), DIMENSION(m_max_pad,bloc_size) :: prod_cu2
3712  REAL(KIND=8), DIMENSION(2*m_max_pad-1,bloc_size) :: prod_ru2
3713  COMPLEX(KIND=8), DIMENSION(bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu2
3714  COMPLEX(KIND=8), DIMENSION(bloc_size,SIZE(V1_in,3)*nb_procs) :: dist_prod_cu2
3715  COMPLEX(KIND=8), DIMENSION(bloc_size) :: intermediate2
3716  INTEGER :: i_field
3717  INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code
3718  REAL(KIND=8) :: t
3719  INTEGER :: rank
3720 
3721  ! FFTW parameters
3722  INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
3723  INTEGER, DIMENSION(1) :: dim, inembed, onembed
3724  ! Recall complexes must be rescaled
3725  ! End FFTW parameters
3726 #include "petsc/finclude/petsc.h"
3727  mpi_comm :: communicator
3728 
3729  CALL mpi_comm_rank(communicator, rank, code)
3730 
3731  IF (present(temps)) temps = 0.d0
3732 
3733  np = SIZE(v1_in,1)
3734  nb_field1= SIZE(v1_in,2) ! Number of fields
3735  nb_field2= SIZE(c1_in,2) ! Number of fields
3736  m_max_c = SIZE(v1_in,3) ! Number of complex (cosines + sines) coefficients per point
3737  m_max = m_max_c*nb_procs! Number of comlex coefficients per point per processor
3738  n_r_pad = 2*m_max_pad-1
3739  np_tot = nb_procs*bloc_size
3740 
3741  IF (mod(nb_field1,2)/=0 .OR. mod(nb_field2,2)/=0 .OR. m_max_c==0) THEN
3742  WRITE(*,*) ' BUG '
3743  stop
3744  END IF
3745 
3746  ! Bloc_size is the number of points that are handled by one processor
3747  ! once the Fourier modes are all collected
3748  ! Computation of bloc_size and np_tot
3749  ! fin de la repartition des points
3750 
3751  ! Packing all 3 complex components of both v1 and v2 input fields
3752  ! into dist_field, where the dimension indexing the nodal points varies the least rapidly,
3753  ! so that after distributing the data to the processes, each one will obtain a part
3754  ! on nodal points
3755  ! TRANSPOSE pr que la variable i associee aux modes soit la 1ere sur laquelle on va faire la FFT
3756  t = mpi_wtime()
3757 
3758  DO i = 1, m_max_c
3759  dist_field(i,1:nb_field1,1:np) = transpose(v1_in(:,:,i))
3760  dist_field(i,nb_field1+1:nb_field1+nb_field2,1:np) = transpose(c1_in(:,:,i))
3761  END DO
3762  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
3763 
3764  IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 1.d100
3765 
3766  hloc_gauss_tot(1:np) = hloc_gauss
3767  IF (np/=np_tot) hloc_gauss_tot(np+1:np_tot) = 1.d100
3768 
3769  longueur_tranche=bloc_size*m_max_c*(nb_field1+nb_field2)
3770 
3771  t = mpi_wtime()
3772  mpid=mpi_double_precision
3773  CALL mpi_alltoall(dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
3774  mpid, communicator, code)
3775  IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -t
3776 
3777  t = mpi_wtime()
3778  !JLG, FEB 4, 2011
3779  cu = 0.d0
3780  !JLG, FEB 4, 2011
3781  DO n = 1, bloc_size
3782  DO nb = 1, nb_procs
3783  shiftc = (nb-1)*bloc_size
3784  shiftl = (nb-1)*m_max_c
3785  jindex = n + shiftc
3786  DO nf = 1, (nb_field1+nb_field2)/2
3787  ! Put real and imaginary parts in a complex
3788  ! nf=1,2,3 => V1_in
3789  ! nf=4 => V2_in
3790  ! INPUT ARE COSINE AND SINE COEFFICIENTS
3791  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
3792  cu(shiftl+1:shiftl+m_max_c,nf,n) = 0.5d0*cmplx(combined_field(:,2*nf-1,jindex),&
3793  -combined_field(:,2*nf,jindex),kind=8)
3794  END DO
3795  END DO
3796  END DO
3797  cu(1,:,:) = 2*cmplx(REAL(cu(1,:,:),KIND=8),0.d0,kind=8)
3798  !JLG, FEB 4, 2011
3799  !Padding is done by initialization of cu: cu = 0
3800  !This is eequivalent to cu(m_max+1:m_max_pad,:,:) = 0.d0
3801  !JLG, FEB 4, 2011
3802 
3803  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
3804 
3805  ! Set the parameters for dfftw
3806  fft_dim=1; istride=1; ostride=1;
3807  !JLG, FEB 4, 2011
3808  !idist=N_r; inembed(1)=N_r; DIM(1)=N_r
3809  !odist=m_max; onembed(1)=m_max
3810  idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
3811  odist=m_max_pad; onembed(1)=m_max_pad
3812  !JLG, FEB 4, 2011
3813 
3814  howmany=bloc_size*(nb_field1+nb_field2)/2
3815 
3816  t = mpi_wtime()
3817  CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
3818  onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
3819  !write(*,*) ' FFT_PAR_CROSS_PROD: fftw_plan_multi_c2r', fftw_plan_multi_c2r
3820  CALL dfftw_execute(fftw_plan_multi_c2r)
3821 
3822  ! clean up
3823  CALL dfftw_destroy_plan(fftw_plan_multi_c2r)
3824 
3825  IF (nb_field1 == 6 .AND. nb_field2 == 2) THEN
3826  !===ru(:,1:3,:) = h*Grad(phi)
3827  !===ru(:,4,:) = phi
3828  !===c_in_real = visc_entro(:,:)
3829 
3830  !===Compute MAX(0,phi*(1-phi))
3831  prod_ru2(:,:) = max(0.d0, ru(:,4,:)*(1.d0 - ru(:,4,:)))
3832 
3833  !===Compute visc_entro
3834  DO n = 1, bloc_size
3835  visc_entro(:,n) = -coeff1_in_level + c_in_real(:,n)
3836  END DO
3837 
3838  !===Compute (coeff1_in_level-visc_entro)*(h*Grad(phi))
3839  prod_ru(:,1,:) = -visc_entro*ru(:,1,:)
3840  prod_ru(:,2,:) = -visc_entro*ru(:,2,:)
3841  prod_ru(:,3,:) = -visc_entro*ru(:,3,:)
3842  ELSE
3843  CALL error_petsc('error in problem type while calling FFT_PAR_ENTRO_VISC_DCL')
3844  END IF
3845 
3846  howmany = bloc_size*nb_field1/2
3847  CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru, &
3848  inembed, istride, idist, prod_cu, onembed, ostride, odist, fftw_estimate)
3849  CALL dfftw_execute(fftw_plan_multi_r2c)
3850  ! clean up
3851  CALL dfftw_destroy_plan(fftw_plan_multi_r2c)
3852 
3853  howmany = bloc_size*1
3854  CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru2, &
3855  inembed, istride, idist, prod_cu2, onembed, ostride, odist, fftw_estimate)
3856  CALL dfftw_execute(fftw_plan_multi_r2c)
3857  ! clean up
3858  CALL dfftw_destroy_plan(fftw_plan_multi_r2c)
3859 
3860  !JLG, FEB 4, 2011
3861  !prod_cu = prod_cu/N_r !Scaling
3862  prod_cu = prod_cu*(1.d0/n_r_pad) !Scaling
3863  prod_cu2 = prod_cu2*(1.d0/n_r_pad) !Scaling
3864  !JLG, FEB 4, 2011
3865  IF (present(temps)) temps(2) = temps(2) + mpi_wtime() -t
3866 
3867  !Now we need to redistribute the Fourier coefficients on each processor
3868 
3869  t = mpi_wtime()
3870  combined_prod_cu(:,:,1)=prod_cu(1,:,:)
3871  combined_prod_cu2(:,1)=prod_cu2(1,:)
3872  DO n=2, m_max
3873  combined_prod_cu(:,:,n)=2*conjg(prod_cu(n,:,:))
3874  combined_prod_cu2(:,n)=2*conjg(prod_cu2(n,:))
3875  END DO
3876 
3877  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
3878 
3879  t = mpi_wtime()
3880  longueur_tranche=bloc_size*m_max_c*nb_field1 !vector
3881  mpid=mpi_double_precision
3882  CALL mpi_alltoall(combined_prod_cu,longueur_tranche,mpid, dist_prod_cu,longueur_tranche, &
3883  mpid,communicator,code)
3884 
3885  longueur_tranche=bloc_size*m_max_c*2 !scalar
3886  mpid=mpi_double_precision
3887  CALL mpi_alltoall(combined_prod_cu2,longueur_tranche,mpid, dist_prod_cu2,longueur_tranche, &
3888  mpid,communicator,code)
3889 
3890  IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -t
3891  ! dimensions:
3892  t = mpi_wtime()
3893 
3894  DO i = 1, m_max_c
3895  DO nb = 1, nb_procs
3896  shiftc = (nb-1)*bloc_size
3897  shiftl = (nb-1)*m_max_c
3898  intermediate = dist_prod_cu(:,:,shiftl+i)
3899  intermediate2 = dist_prod_cu2(:,shiftl+i)
3900  DO n = 1, bloc_size
3901  IF (n+shiftc > np ) cycle
3902  DO i_field = 1, nb_field1/2
3903  v_out(n+shiftc, i_field*2-1, i) = REAL (intermediate(i_field,n),kind=8)
3904  v_out(n+shiftc, i_field*2 , i) = aimag(intermediate(i_field,n))
3905  END DO
3906  c_out(n+shiftc, 1, i) = REAL (intermediate2(n),kind=8)
3907  c_out(n+shiftc, 2, i) = aimag(intermediate2(n))
3908  END DO
3909  END DO
3910  END DO
3911 
3912  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
3913 
3914  END SUBROUTINE fft_par_entro_visc_dcl
3915 
3916  SUBROUTINE fft_kelvin_force(communicator,V1_in, V2_in, V_out, nb_procs, &
3917  bloc_size, m_max_pad, temps)
3918  USE my_util
3919  USE boundary
3920  IMPLICIT NONE
3921  include 'fftw3.f'
3922  ! Format: V_1in(1:np,1:6,1:m_max_c)
3923  ! INPUT ARE COSINE AND SINE COEFFICIENTS
3924  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
3925  REAL(KIND=8), DIMENSION(:,:,:), INTENT(IN) :: v1_in ! VECTOR
3926  REAL(KIND=8), DIMENSION(:,:,:), INTENT(IN) :: v2_in ! SCALAR
3927  REAL(KIND=8), DIMENSION(:,:,:), INTENT(OUT) :: v_out
3928  REAL(KIND=8), DIMENSION(:), OPTIONAL, INTENT(INOUT) :: temps
3929  INTEGER, INTENT(IN) :: bloc_size, m_max_pad, nb_procs
3930  INTEGER :: np, np_tot, nb_field1, nb_field2, m_max, m_max_c, mpid, n_r_pad
3931  INTEGER(KIND=8) :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
3932 
3933  COMPLEX(KIND=8), DIMENSION(m_max_pad, (SIZE(V1_in,2)+SIZE(V2_in,2))/2, bloc_size) :: cu
3934  REAL(KIND=8), DIMENSION(2*m_max_pad-1,(SIZE(V1_in,2)+SIZE(V2_in,2))/2,bloc_size) :: ru
3935  COMPLEX(KIND=8), DIMENSION(m_max_pad,SIZE(V1_in,2)/2,bloc_size) :: prod_cu
3936  REAL(KIND=8), DIMENSION(2*m_max_pad-1,SIZE(V1_in,2)/2,bloc_size) :: prod_ru
3937  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2, bloc_size) :: intermediate
3938  REAL(KIND=8), DIMENSION(SIZE(V1_in,3),SIZE(V1_in,2)+SIZE(V2_in,2),bloc_size*nb_procs) :: dist_field, combined_field
3939  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu
3940  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: dist_prod_cu
3941 
3942  INTEGER :: i_field
3943  INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code
3944  REAL(KIND=8) :: t
3945 
3946  ! FFTW parameters
3947  INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
3948  INTEGER, DIMENSION(1) :: dim, inembed, onembed
3949  ! Recall complexes must be rescaled
3950  ! End FFTW parameters
3951 #include "petsc/finclude/petsc.h"
3952  mpi_comm :: communicator
3953 
3954  IF (present(temps)) temps = 0.d0
3955 
3956  np = SIZE(v1_in,1)
3957  nb_field1= SIZE(v1_in,2) ! Number of fields
3958  nb_field2= SIZE(v2_in,2) ! Number of fields
3959  m_max_c = SIZE(v1_in,3) ! Number of complex (cosines + sines) coefficients per point
3960  m_max = m_max_c*nb_procs! Number of comlex coefficients per point per processor
3961  n_r_pad=2*m_max_pad-1
3962  np_tot = nb_procs*bloc_size
3963 
3964  IF (mod(nb_field1,2)/=0 .OR. mod(nb_field2,2)/=0 .OR. m_max_c==0) THEN
3965  WRITE(*,*) ' BUG '
3966  stop
3967  END IF
3968 
3969  ! Bloc_size is the number of points that are handled by one processor
3970  ! once the Fourier modes are all collected
3971  ! Computation of bloc_size and np_tot
3972  ! fin de la repartition des points
3973 
3974  ! Packing all 3 complex components of both v1 and v2 input fields
3975  ! into dist_field, where the dimension indexing the nodal points varies the least rapidly,
3976  ! so that after distributing the data to the processes, each one will obtain a part
3977  ! on nodal points
3978  ! TRANSPOSE pr que la variable i associee aux modes soit la 1ere sur laquelle on va faire la FFT
3979  t = mpi_wtime()
3980 
3981  DO i = 1, m_max_c
3982  dist_field(i,1:nb_field1,1:np) = transpose(v1_in(:,:,i))
3983  dist_field(i,nb_field1+1:nb_field1+nb_field2,1:np) = transpose(v2_in(:,:,i))
3984  END DO
3985  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
3986 
3987  IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 1.d100
3988 
3989  longueur_tranche=bloc_size*m_max_c*(nb_field1+nb_field2)
3990 
3991  t = mpi_wtime()
3992  mpid=mpi_double_precision
3993  CALL mpi_alltoall(dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
3994  mpid, communicator, code)
3995  IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -t
3996 
3997  t = mpi_wtime()
3998  cu = 0.d0
3999  DO n = 1, bloc_size
4000  DO nb = 1, nb_procs
4001  shiftc = (nb-1)*bloc_size
4002  shiftl = (nb-1)*m_max_c
4003  jindex = n + shiftc
4004  DO nf = 1, (nb_field1+nb_field2)/2
4005  ! Put real and imaginary parts in a complex
4006  ! nf=1,2,3 => V1_in
4007  ! nf=4 => V2_in
4008  ! INPUT ARE COSINE AND SINE COEFFICIENTS
4009  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
4010  cu(shiftl+1:shiftl+m_max_c,nf,n) = 0.5d0*cmplx(combined_field(:,2*nf-1,jindex),&
4011  -combined_field(:,2*nf,jindex),kind=8)
4012  END DO
4013  END DO
4014  END DO
4015  cu(1,:,:) = 2*cmplx(REAL(cu(1,:,:),KIND=8),0.d0,kind=8)
4016  !Padding is done by initialization of cu: cu = 0
4017  !This is eequivalent to cu(m_max+1:m_max_pad,:,:) = 0.d0
4018 
4019  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
4020 
4021  ! Set the parameters for dfftw
4022  fft_dim=1; istride=1; ostride=1;
4023  idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
4024  odist=m_max_pad; onembed(1)=m_max_pad
4025 
4026  howmany=bloc_size*(nb_field1+nb_field2)/2
4027 
4028  t = mpi_wtime()
4029  CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
4030  onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
4031  CALL dfftw_execute(fftw_plan_multi_c2r)
4032 
4033  ! clean up
4034  CALL dfftw_destroy_plan(fftw_plan_multi_c2r)
4035 
4036  !===ru(:,1:3,:) is the gradient(H^2/2)
4037  !===ru(:,4,:) is the temperature
4038 
4039  IF (nb_field1 == 6 .AND. nb_field2 == 2) THEN
4040  DO i = 1, 2*m_max_pad-1
4041  DO n = 1, bloc_size
4042  prod_ru(i,1,n) = kelvin_force_coeff(ru(i,4,n))*ru(i,1,n)
4043  prod_ru(i,2,n) = kelvin_force_coeff(ru(i,4,n))*ru(i,2,n)
4044  prod_ru(i,3,n) = kelvin_force_coeff(ru(i,4,n))*ru(i,3,n)
4045  END DO
4046  END DO
4047  ELSE
4048  CALL error_petsc('error in dimension of INPUT variables while calling FFT_SCALAR_VECT_DCL ')
4049  END IF
4050 
4051  howmany = bloc_size*nb_field1/2
4052 
4053  CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru, &
4054  inembed, istride, idist, prod_cu, onembed, ostride, odist, fftw_estimate)
4055  CALL dfftw_execute(fftw_plan_multi_r2c)
4056  CALL dfftw_destroy_plan(fftw_plan_multi_r2c)
4057 
4058 
4059 
4060  prod_cu = prod_cu*(1.d0/n_r_pad) !Scaling
4061  IF (present(temps)) temps(2) = temps(2) + mpi_wtime() -t
4062 
4063  !Now we need to redistribute the Fourier coefficients on each processor
4064 
4065  t = mpi_wtime()
4066  combined_prod_cu(:,:,1)=prod_cu(1,:,:)
4067  DO n=2, m_max
4068  combined_prod_cu(:,:,n)=2*conjg(prod_cu(n,:,:))
4069  END DO
4070 
4071  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
4072 
4073  longueur_tranche=bloc_size*m_max_c*nb_field1
4074 
4075  t = mpi_wtime()
4076  mpid=mpi_double_precision
4077  CALL mpi_alltoall(combined_prod_cu,longueur_tranche,mpid, dist_prod_cu,longueur_tranche, &
4078  mpid,communicator,code)
4079 
4080  IF (present(temps)) temps(1) = temps(1) + mpi_wtime() -t
4081  ! dimensions:
4082  t = mpi_wtime()
4083 
4084  DO i = 1, m_max_c
4085  DO nb = 1, nb_procs
4086  shiftc = (nb-1)*bloc_size
4087  shiftl = (nb-1)*m_max_c
4088  intermediate = dist_prod_cu(:,:,shiftl+i)
4089  DO n = 1, bloc_size
4090  IF (n+shiftc > np ) cycle
4091  DO i_field = 1, nb_field1/2
4092  v_out(n+shiftc, i_field*2-1, i) = REAL (intermediate(i_field,n),kind=8)
4093  v_out(n+shiftc, i_field*2 , i) = aimag(intermediate(i_field,n))
4094  END DO
4095  END DO
4096  END DO
4097  END DO
4098  IF (present(temps)) temps(3) = temps(3) + mpi_wtime() -t
4099 
4100  END SUBROUTINE fft_kelvin_force
4101 
4102 END MODULE sft_parallele
section doc_intro_frame_work_num_app Numerical approximation subsection doc_intro_fram_work_num_app_Fourier_FEM Fourier Finite element representation The SFEMaNS code uses a hybrid Fourier Finite element formulation The Fourier decomposition allows to approximate the problem’s solutions for each Fourier mode modulo nonlinear terms that are made explicit The variables are then approximated on a meridian section of the domain with a finite element method The numerical approximation of a function f $f f is written in the following generic t
Definition: doc_intro.h:199
subroutine, public fft_par_dot_prod_dcl(communicator, V1_in, V2_in, c_out, nb_procs, bloc_size, m_max_pad, temps)
subroutine, public fft_kelvin_force(communicator, V1_in, V2_in, V_out, nb_procs, bloc_size, m_max_pad, temps)
real(kind=8) function, dimension(size(phi)) regul_tab(phi, eps)
subroutine, public fft_par_real(communicator, V1_in, V_out, opt_nb_plane)
subroutine, public fft_par_prod_dcl(communicator, c1_in, c2_in, c_out, nb_procs, bloc_size, m_max_pad, temps)
subroutine, public fft_compute_entropy_visc_mom(communicator, communicator_S, V1_in, V2_in, V3_in, c1_in, hloc_gauss, c1_real_out, nb_procs, bloc_size, m_max_pad, l_G, opt_c2_real_out, temps)
real(kind=8) function, public kelvin_force_coeff(temp)
subroutine, public fft_par_entro_visc_dcl(communicator, V1_in, c1_in, c_in_real, hloc_gauss, coeff1_in_level, V_out, c_out, nb_procs, bloc_size, m_max_pad, temps)
subroutine, public fft_max_vel_dcl(communicator, V1_in, V_out, nb_procs, bloc_size, m_max_pad)
subroutine, public fft_compute_entropy_visc_grad_mom(communicator, V1_in, V2_in, V3_in, c_in_real, V_out, nb_procs, bloc_size, m_max_pad, temps)
subroutine, public fft_compute_entropy_visc(communicator, communicator_S, V1_in, V2_in, V3_in, V4_in, V5_in, hloc_gauss, V1_out, V2_out, V3_out, nb_procs, bloc_size, m_max_pad, residual_normalization, l_G, temps)
subroutine, public fft_no_overshoot_level_set(communicator, c1_inout, nb_procs, bloc_size, m_max_pad, temps)
subroutine, public fft_par_compr_entro_visc_dcl(communicator, V1_in, V2_in, c1_in, c_in_real, hloc_gauss, coeff1_in_level, V_out, c_out, nb_procs, bloc_size, m_max_pad, temps)
subroutine, public fft_par_var_eta_prod_t_dcl(communicator, eta, H_mesh, c_in, c_out, nb_procs, bloc_size, m_max_pad, time, temps)
subroutine, public fft_compression_level_set_dcl(communicator_F, communicator_S, V1_in, V2_in, c_in, c_out, hloc_gauss, l_G, nb_procs, bloc_size, m_max_pad, temps)
real(kind=8) function, public regul(phi, eps)
subroutine, public fft_scalar_vect_dcl(communicator, V1_in, V2_in, V_out, pb, nb_procs, bloc_size, m_max_pad, temps)
subroutine, public fft_heaviside_dcl(communicator, V1_in, values, V_out, nb_procs, bloc_size, m_max_pad, coeff_tanh, temps)
subroutine, public fft_par_var_eta_prod_gauss_dcl(communicator, eta, H_mesh, c_in, c_out, nb_procs, bloc_size, m_max_pad, rr_gauss, time, temps)
subroutine, public fft_compute_diffu_mom(communicator, V1_in, V2_in, V3_in, V1_out, V2_out, nb_procs, bloc_size, m_max_pad, temps)
subroutine, public fft_tensor_dcl(communicator, V1_in, V2_in, V_out, nb_procs, bloc_size, m_max_pad, temps, opt_tension)
subroutine, public fft_check_interface(communicator, V1_in, nb_fluids, interface_ok, nb_procs, bloc_size, m_max_pad, temps)
subroutine error_petsc(string)
Definition: my_util.f90:15
subroutine, public fft_par_cross_prod_dcl(communicator, V1_in, V2_in, V_out, nb_procs, bloc_size, m_max_pad, temps)