From fa9b6f2b9bcb80778e63ef2aa4e17c7573de0015 Mon Sep 17 00:00:00 2001
From: Ian Jauslin <ian.jauslin@roma1.infn.it>
Date: Tue, 24 May 2016 13:39:23 +0000
Subject: Initial commit

---
 src/ss_integral.c | 958 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 958 insertions(+)
 create mode 100644 src/ss_integral.c

(limited to 'src/ss_integral.c')

diff --git a/src/ss_integral.c b/src/ss_integral.c
new file mode 100644
index 0000000..cb39ea8
--- /dev/null
+++ b/src/ss_integral.c
@@ -0,0 +1,958 @@
+/*
+Copyright 2016 Ian Jauslin
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include "ss_integral.h"
+#include <pthread.h>
+
+// compute the integral
+// takes a pointer to an integrand function 'I' as an argument. 'I' must have the following arguments
+//   int I(mpfr_t out, mpfr_t p1, mpfr_t p2, mpfr_t q1, mpfr_t q2, mpfr_t F1, mpfr_t F2, mpfr_t t1, mpfr_t t2, mpfr_t phi, mpfr_t W, array_mpfr* tmps, struct ss_cache cache)
+int ss_integrate(mpfr_t* out, TYPE_I, hh_params params, array_mpfr abcissa, array_mpfr weights, unsigned int threads){
+  mpfr_t val;
+  array_2_mpfr tmpss;
+  struct ss_cache cache;
+  unsigned int i;
+
+  // compute pi and sqrt3
+  mpfr_inits(cache.pi, cache.sqrt3, NULL);
+  mpfr_const_pi(cache.pi, MPFR_RNDN);
+  mpfr_sqrt_ui(cache.sqrt3, 3, MPFR_RNDN);
+
+  mpfr_init(val);
+
+  // init tmps
+  array_2_mpfr_init(&tmpss, 8*threads);
+  tmpss.length=8*threads;
+  for(i=0;i<threads;i++){
+    // 8 array_mpfr's per thread
+    // do not allocate too much memory (I'm trying to keep a modicum of optimality)
+    array_mpfr_init(tmpss.values+8*i+0, 2);
+    array_mpfr_init(tmpss.values+8*i+1, 4);
+    array_mpfr_init(tmpss.values+8*i+2, 3);
+    array_mpfr_init(tmpss.values+8*i+3, 4);
+    array_mpfr_init(tmpss.values+8*i+4, 2);
+    array_mpfr_init(tmpss.values+8*i+5, 4);
+    array_mpfr_init(tmpss.values+8*i+6, 12);
+    array_mpfr_init(tmpss.values+8*i+7, 9);
+  }
+
+  // compute A_FF, A_RF and A_RR
+  ssFF_integrate(out, I, params, abcissa, weights, cache, tmpss, threads);
+
+  ssRF_integrate(&val, I, params, abcissa, weights, cache, tmpss, threads);
+  mpfr_mul_ui(val, val, 2, MPFR_RNDN);
+  mpfr_add(*out, *out, val, MPFR_RNDN);
+
+  ssRR_integrate(&val, I, params, abcissa, weights, cache, tmpss, threads);
+  mpfr_add(*out, *out, val, MPFR_RNDN);
+
+  mpfr_clears(cache.pi, cache.sqrt3, NULL);
+  mpfr_clear(val);
+
+  array_2_mpfr_free(tmpss);
+  return(0);
+}
+
+
+// compute the integral A_FF
+int ssFF_integrate(mpfr_t* out, TYPE_I, hh_params params, array_mpfr abcissa, array_mpfr weights, struct ss_cache cache, array_2_mpfr tmpss, unsigned int threads){
+  ssFF_argsint_rho args;
+  int ret;
+  mpfr_t upper, lower;
+  array_pthread_t thread_ids;
+
+  // init
+  thread_ids.length=0;
+
+  args.params=params;
+  args.I=I;
+  args.abcissa=abcissa;
+  args.weights=weights;
+  args.cache=cache;
+  args.tmpss=tmpss;
+  args.thread_ids=&thread_ids;
+
+  // bounds
+  mpfr_inits(upper, lower, NULL);
+  mpfr_set_ui(lower, 0, MPFR_RNDN);
+  mpfr_set_ui(upper, 1, MPFR_RNDN);
+
+  if(threads==1){
+    ret=integrate_gauss_mpfr(out, &ssFF_integrand_rho, lower, upper, abcissa, weights, &args);
+  }
+  else{
+    ret=integrate_gauss_multithread_mpfr(out, &ssFF_integrand_rho, lower, upper, abcissa, weights, &args, threads, args.thread_ids);
+    array_pthread_t_free(*(args.thread_ids));
+  }
+
+  // extra factor
+  mpfr_mul_ui(*out, *out, 6, MPFR_RNDN);
+
+  mpfr_clears(upper, lower, NULL);
+
+  return(ret);
+}
+
+// integrand of the integral over rho in A_FF
+int ssFF_integrand_rho(mpfr_t* out, mpfr_t rho, void* args){
+  ssFF_argsint_theta nargs;
+  int ret;
+  ssFF_argsint_rho* argument=(ssFF_argsint_rho*)args;
+  array_mpfr* tmps1;
+  array_mpfr* tmps2;
+  int thread_id;
+
+  // the number of the thread
+  if(argument->thread_ids->length>1){
+    thread_id=array_pthread_t_find(pthread_self(), *(argument->thread_ids));
+    if(thread_id<0){
+      fprintf(stderr, "error: thread not found\nIf you see this message, then you've uncovered a serious bug...\n");
+      return(-1);
+    }
+  }
+  else{
+    thread_id=0;
+  }
+
+  // tmps used in this function (not passed to integrand_theta)
+  tmps1=argument->tmpss.values+8*thread_id;
+  tmps2=argument->tmpss.values+8*thread_id+1;
+
+  // new argument
+  nargs.params=argument->params;
+  nargs.I=argument->I;
+  nargs.abcissa=argument->abcissa;
+  nargs.weights=argument->weights;
+  nargs.cache=argument->cache;
+  nargs.rho=rho;
+
+  // copy tmps
+  // only pass tmps reserved for this thread
+  // warning: 'nargs.tmpss' must not be freed or resized
+  nargs.tmpss.values=argument->tmpss.values+8*thread_id+2;
+  // 6 tmps in integrand_theta
+  nargs.tmpss.length=6;
+
+  // alloc tmps if needed
+  array_mpfr_alloc_tmps(2, tmps1);
+
+  // bounds
+  mpfr_div_si(tmps1->values[0], nargs.cache.pi, -6, MPFR_RNDN);
+  mpfr_div_ui(tmps1->values[1], nargs.cache.pi, 2, MPFR_RNDN);
+
+  ret=integrate_gauss_smarttmp_mpfr(out, &ssFF_integrand_theta, tmps1->values[0], tmps1->values[1], argument->abcissa, argument->weights, tmps2, &nargs);
+
+  return(ret);
+}
+
+// integrand of the integral over theta in A_FF
+int ssFF_integrand_theta(mpfr_t* out, mpfr_t theta, void* args){
+  ssFF_argsint_psi nargs;
+  int ret;
+  ssFF_argsint_theta* argument=(ssFF_argsint_theta*)args;
+  unsigned int a;
+  // tmps used in this function (not passed to integrand_psi)
+  array_mpfr* tmps1=argument->tmpss.values;
+  array_mpfr* tmps2=argument->tmpss.values+1;
+
+  // new argument
+  nargs.params=argument->params;
+  nargs.I=argument->I;
+  nargs.abcissa=argument->abcissa;
+  nargs.weights=argument->weights;
+  nargs.cache=argument->cache;
+  nargs.rho=argument->rho;
+  nargs.theta=theta;
+
+  // copy tmps
+  // warning: 'nargs.tmpss' must not be freed or resized
+  nargs.tmpss.values=argument->tmpss.values+2;
+  // 4 tmps in integrand_psi
+  nargs.tmpss.length=4;
+
+  // alloc tmps if needed
+  array_mpfr_alloc_tmps(3, tmps1);
+
+  // split the integral into four parts to improve the precision
+  mpfr_set_ui(*out, 0, MPFR_RNDN);
+  for(a=0;a<4;a++){
+    // bounds
+    mpfr_set_ui(tmps1->values[0], a, MPFR_RNDN);
+    mpfr_sub_ui(tmps1->values[0], tmps1->values[0], 2, MPFR_RNDN);
+    mpfr_div_ui(tmps1->values[0], tmps1->values[0], 4, MPFR_RNDN);
+    mpfr_mul(tmps1->values[0], tmps1->values[0], nargs.cache.pi, MPFR_RNDN);
+
+    mpfr_set_ui(tmps1->values[1], a, MPFR_RNDN);
+    mpfr_sub_ui(tmps1->values[1], tmps1->values[1], 1, MPFR_RNDN);
+    mpfr_div_ui(tmps1->values[1], tmps1->values[1], 4, MPFR_RNDN);
+    mpfr_mul(tmps1->values[1], tmps1->values[1], nargs.cache.pi, MPFR_RNDN);
+
+
+    ret=integrate_gauss_smarttmp_mpfr(tmps1->values+2, &ssFF_integrand_psi, tmps1->values[0], tmps1->values[1], argument->abcissa, argument->weights, tmps2, &nargs);
+    if(ret<0){
+      break;
+    }
+    mpfr_add(*out, *out, tmps1->values[2], MPFR_RNDN);
+  }
+
+  return(ret);
+}
+
+// integrand of the integral over psi in A_FF
+int ssFF_integrand_psi(mpfr_t* out, mpfr_t psi, void* args){
+  ssFF_argsint_z nargs;
+  int ret;
+  ssFF_argsint_psi* argument=(ssFF_argsint_psi*)args;
+  // tmps used in this function (not passed to integrand_z)
+  array_mpfr* tmps1=argument->tmpss.values;
+  array_mpfr* tmps2=argument->tmpss.values+1;
+
+  // new argument
+  nargs.params=argument->params;
+  nargs.I=argument->I;
+  nargs.cache=argument->cache;
+  nargs.rho=argument->rho;
+  nargs.theta=argument->theta;
+  nargs.psi=psi;
+
+  // copy tmps
+  // warning: 'nargs.tmpss' must not be freed or resized
+  nargs.tmpss.values=argument->tmpss.values+2;
+  // 2 tmps in integrand_z
+  nargs.tmpss.length=2;
+
+  // alloc tmps if needed
+  array_mpfr_alloc_tmps(2, tmps1);
+
+  // bounds
+  mpfr_set_ui(tmps1->values[0], 0, MPFR_RNDN);
+  mpfr_set_ui(tmps1->values[1], 1, MPFR_RNDN);
+
+  ret=integrate_gauss_smarttmp_mpfr(out, &ssFF_integrand_z, tmps1->values[0], tmps1->values[1], argument->abcissa, argument->weights, tmps2, &nargs);
+
+  return(ret);
+}
+
+// integrand of the integral over z in A_FF
+int ssFF_integrand_z(mpfr_t* out, mpfr_t z, void* args){
+  ssFF_argsint_z* a=(ssFF_argsint_z*)args;
+  mpfr_t* tmps;
+  // tmps used in this function
+  array_mpfr* tmps1=a->tmpss.values;
+  array_mpfr* tmps2=a->tmpss.values+1;
+
+  // alloc tmps if needed
+  array_mpfr_alloc_tmps(7, tmps1);
+
+  tmps=tmps1->values;
+
+  // r (store in tmps[0])
+  mpfr_cos(tmps[0], a->psi, MPFR_RNDN);
+  mpfr_add_ui(tmps[1], tmps[0], 1, MPFR_RNDN);
+  mpfr_ui_sub(tmps[0], 1, tmps[0], MPFR_RNDN);
+  mpfr_div(tmps[0], tmps[0], tmps[1], MPFR_RNDN);
+  mpfr_mul(tmps[0], tmps[0], z, MPFR_RNDN);
+  mpfr_add_ui(tmps[0], tmps[0], 1, MPFR_RNDN);
+  mpfr_ui_sub(tmps[1], 1, z, MPFR_RNDN);
+  mpfr_mul(tmps[0], tmps[0], tmps[1], MPFR_RNDN);
+
+  // varphi (store in tmps[1])
+  mpfr_mul_ui(tmps[1], a->psi, 2, MPFR_RNDN);
+  mpfr_sin(tmps[2], tmps[1], MPFR_RNDN);
+  mpfr_cos(tmps[1], tmps[1], MPFR_RNDN);
+  mpfr_pow_ui(tmps[2], tmps[2], 2, MPFR_RNDN);
+  mpfr_mul(tmps[2], tmps[2], tmps[0], MPFR_RNDN);
+  mpfr_div_ui(tmps[2], tmps[2], 2, MPFR_RNDN);
+  mpfr_sub(tmps[1], tmps[1], tmps[2], MPFR_RNDN);
+  mpfr_acos(tmps[1], tmps[1], MPFR_RNDN);
+  if(mpfr_cmp_ui(a->psi, 0)<0){
+    mpfr_neg(tmps[1], tmps[1], MPFR_RNDN);
+  }
+
+  // jacobian
+  mpfr_mul_ui(*out, a->psi, 2, MPFR_RNDN);
+  mpfr_cos(*out, *out, MPFR_RNDN);
+  mpfr_mul(*out, *out, tmps[0], MPFR_RNDN);
+  mpfr_add_ui(*out, *out, 1, MPFR_RNDN);
+  mpfr_cos(tmps[2], a->psi, MPFR_RNDN);
+  mpfr_pow_ui(tmps[3], tmps[2], 2, MPFR_RNDN);
+  mpfr_mul(tmps[3], tmps[3], tmps[0], MPFR_RNDN);
+  mpfr_add_ui(tmps[3], tmps[3], 1, MPFR_RNDN);
+  mpfr_sqrt(tmps[3], tmps[3], MPFR_RNDN);
+  mpfr_div(*out, *out, tmps[3], MPFR_RNDN);
+  mpfr_add_ui(tmps[2], tmps[2], 1, MPFR_RNDN);
+  mpfr_div(*out, *out, tmps[2], MPFR_RNDN);
+  mpfr_mul(*out, *out, tmps[0], MPFR_RNDN);
+  mpfr_mul(*out, *out, a->rho, MPFR_RNDN);
+  mpfr_mul(*out, *out, a->rho, MPFR_RNDN);
+  mpfr_mul(*out, *out, a->rho, MPFR_RNDN);
+  mpfr_mul_ui(*out, *out, 4, MPFR_RNDN);
+
+  // cutoffs
+  ss_cutoff(tmps[2], a->rho, tmps[3], tmps[4]);
+  mpfr_mul(*out, *out, tmps[2], MPFR_RNDN);
+  mpfr_mul(tmps[2], a->rho, tmps[0], MPFR_RNDN);
+  ss_cutoff(tmps[2], tmps[2], tmps[3], tmps[4]);
+  mpfr_mul(*out, *out, tmps[2], MPFR_RNDN);
+
+  // q1 (store in tmps[2])
+  mpfr_add(tmps[2], a->theta, tmps[1], MPFR_RNDN);
+  mpfr_cos(tmps[2], tmps[2], MPFR_RNDN);
+  mpfr_mul(tmps[2], tmps[2], tmps[0], MPFR_RNDN);
+  mpfr_mul(tmps[2], tmps[2], a->rho, MPFR_RNDN);
+  mpfr_add(tmps[2], tmps[2], a->cache.sqrt3, MPFR_RNDN);
+  // q2 (store in tmps[3])
+  mpfr_add(tmps[3], a->theta, tmps[1], MPFR_RNDN);
+  mpfr_sin(tmps[3], tmps[3], MPFR_RNDN);
+  mpfr_mul(tmps[3], tmps[3], tmps[0], MPFR_RNDN);
+  mpfr_mul(tmps[3], tmps[3], a->rho, MPFR_RNDN);
+  mpfr_add_ui(tmps[3], tmps[3], 1, MPFR_RNDN);
+  if(a->params.omega==-1){
+    mpfr_neg(tmps[3], tmps[3], MPFR_RNDN);
+  }
+  // p1 (store in tmps[0])
+  mpfr_cos(tmps[0], a->theta, MPFR_RNDN);
+  mpfr_mul(tmps[0], tmps[0], a->rho, MPFR_RNDN);
+  mpfr_add(tmps[0], tmps[0], a->cache.sqrt3, MPFR_RNDN);
+  // p2 (store in tmps[1])
+  mpfr_sin(tmps[1], a->theta, MPFR_RNDN);
+  mpfr_mul(tmps[1], tmps[1], a->rho, MPFR_RNDN);
+  mpfr_add_ui(tmps[1], tmps[1], 1, MPFR_RNDN);
+  if(a->params.omega==-1){
+    mpfr_neg(tmps[1], tmps[1], MPFR_RNDN);
+  }
+  // F1 (store in tmps[4])
+  mpfr_add(tmps[4], tmps[0], tmps[2], MPFR_RNDN);
+  mpfr_sub(tmps[4], tmps[4], a->cache.sqrt3, MPFR_RNDN);
+  // F2 (store in tmps[5])
+  mpfr_add(tmps[5], tmps[1], tmps[3], MPFR_RNDN);
+  mpfr_sub_si(tmps[5], tmps[5], a->params.omega, MPFR_RNDN);
+
+  // I
+  (*(a->I))(tmps[6], tmps[0], tmps[1], tmps[2], tmps[3], tmps[4], tmps[5], a->params.t1, a->params.t2, a->params.phi, a->params.W, tmps2, a->cache);
+  mpfr_mul(*out, *out, tmps[6], MPFR_RNDN);
+
+  return(0);
+}
+
+
+// compute the integral A_RF
+int ssRF_integrate(mpfr_t* out, TYPE_I, hh_params params, array_mpfr abcissa, array_mpfr weights, struct ss_cache cache, array_2_mpfr tmpss, unsigned int threads){
+  ssRF_argsint_theta args;
+  int ret;
+  mpfr_t val;
+  mpfr_t lower, upper;
+  unsigned int a;
+  array_pthread_t thread_ids;
+
+  // init
+  thread_ids.length=0;
+
+  args.params=params;
+  args.I=I;
+  args.abcissa=abcissa;
+  args.weights=weights;
+  args.cache=cache;
+  args.tmpss=tmpss;
+  args.thread_ids=&thread_ids;
+
+  mpfr_inits(val, lower, upper, NULL);
+
+  // split the integral in three parts to improve the precision
+  mpfr_set_ui(*out, 0, MPFR_RNDN);
+  for(a=0;a<3;a++){
+    // bounds
+    mpfr_set_ui(lower, a, MPFR_RNDN);
+    mpfr_mul_ui(lower, lower, 4, MPFR_RNDN);
+    mpfr_sub_ui(lower, lower, 1, MPFR_RNDN);
+    mpfr_div_ui(lower, lower, 6, MPFR_RNDN);
+    mpfr_mul(lower, lower, args.cache.pi, MPFR_RNDN);
+
+    mpfr_set_ui(upper, a, MPFR_RNDN);
+    mpfr_mul_ui(upper, upper, 4, MPFR_RNDN);
+    mpfr_add_ui(upper, upper, 3, MPFR_RNDN);
+    mpfr_div_ui(upper, upper, 6, MPFR_RNDN);
+    mpfr_mul(upper, upper, args.cache.pi, MPFR_RNDN);
+
+    if(threads==1){
+      ret=integrate_gauss_mpfr(&val, &ssRF_integrand_theta, lower, upper, abcissa, weights, &args);
+    }
+    else{
+      ret=integrate_gauss_multithread_mpfr(&val, &ssRF_integrand_theta, lower, upper, abcissa, weights, &args, threads, args.thread_ids);
+      array_pthread_t_free(*(args.thread_ids));
+    }
+    if(ret<0){
+      break;
+    }
+    
+    mpfr_add(*out, *out, val, MPFR_RNDN);
+  }
+
+  // extra factor
+  mpfr_mul_ui(*out, *out, 3, MPFR_RNDN);
+
+  mpfr_clears(val, lower, upper, NULL);
+  return(ret);
+}
+
+// integrand of the integral over theta in A_RF
+int ssRF_integrand_theta(mpfr_t* out, mpfr_t theta, void* args){
+  ssRF_argsint_varphi nargs;
+  int ret;
+  ssRF_argsint_theta* argument=(ssRF_argsint_theta*)args;
+  // tmps used in this function (not passed to integrand_varphi)
+  array_mpfr* tmps1;
+  array_mpfr* tmps2;
+  int thread_id;
+
+  // the number of the thread
+  if(argument->thread_ids->length>1){
+    thread_id=array_pthread_t_find(pthread_self(), *(argument->thread_ids));
+    if(thread_id<0){
+      fprintf(stderr, "error: thread not found\nIf you see this message, then you've uncovered a serious bug...\n");
+      return(-1);
+    }
+  }
+  else{
+    thread_id=0;
+  }
+
+  // tmps used in this function (not passed to integrand_varphi)
+  tmps1=argument->tmpss.values+8*thread_id;
+  tmps2=argument->tmpss.values+8*thread_id+1;
+
+  // new argument
+  nargs.params=argument->params;
+  nargs.I=argument->I;
+  nargs.abcissa=argument->abcissa;
+  nargs.weights=argument->weights;
+  nargs.cache=argument->cache;
+  nargs.theta=theta;
+
+  // copy tmps
+  // only pass tmps reserved for this thread
+  // warning: 'nargs.tmpss' must not be freed or resized
+  nargs.tmpss.values=argument->tmpss.values+8*thread_id+2;
+  // 6 tmps in integrand_varphi
+  nargs.tmpss.length=6;
+
+  // alloc tmps if needed
+  array_mpfr_alloc_tmps(2, tmps1);
+
+  // bounds
+  mpfr_div_si(tmps1->values[0], nargs.cache.pi, -6, MPFR_RNDN);
+  mpfr_div_ui(tmps1->values[1], nargs.cache.pi, 2, MPFR_RNDN);
+
+  ret=integrate_gauss_smarttmp_mpfr(out, &ssRF_integrand_varphi, tmps1->values[0], tmps1->values[1], argument->abcissa, argument->weights, tmps2, &nargs);
+
+  return(ret);
+}
+
+// integrand of the integral over varphi in A_RF
+int ssRF_integrand_varphi(mpfr_t* out, mpfr_t varphi, void* args){
+  ssRF_argsint_r nargs;
+  int ret;
+  ssRF_argsint_varphi* argument=(ssRF_argsint_varphi*)args;
+  // tmps used in this function (not passed to integrand_r)
+  array_mpfr* tmps1=argument->tmpss.values;
+  array_mpfr* tmps2=argument->tmpss.values+1;
+
+  // new argument
+  nargs.params=argument->params;
+  nargs.I=argument->I;
+  nargs.abcissa=argument->abcissa;
+  nargs.weights=argument->weights;
+  nargs.cache=argument->cache;
+  nargs.theta=argument->theta;
+  nargs.varphi=varphi;
+
+  // copy tmps
+  // warning: 'nargs.tmpss' must not be freed or resized
+  nargs.tmpss.values=argument->tmpss.values+2;
+  // 4 tmps in integrand_r
+  nargs.tmpss.length=4;
+
+  // alloc tmps if needed
+  array_mpfr_alloc_tmps(2, tmps1);
+
+  // bounds
+  mpfr_set_ui(tmps1->values[0], 0, MPFR_RNDN);
+  mpfr_set_ui(tmps1->values[1], 1, MPFR_RNDN);
+
+  ret=integrate_gauss_smarttmp_mpfr(out, &ssRF_integrand_r, tmps1->values[0], tmps1->values[1], argument->abcissa, argument->weights, tmps2, &nargs);
+
+  return(ret);
+}
+
+// integrand of the integral over r in A_RF
+int ssRF_integrand_r(mpfr_t* out, mpfr_t r, void* args){
+  ssRF_argsint_rho nargs;
+  int ret;
+  ssRF_argsint_r* argument=(ssRF_argsint_r*)args;
+  // tmps used in this function (not passed to integrand_rho)
+  array_mpfr* tmps1=argument->tmpss.values;
+  array_mpfr* tmps2=argument->tmpss.values+1;
+
+  // new argument
+  nargs.params=argument->params;
+  nargs.I=argument->I;
+  nargs.cache=argument->cache;
+  nargs.theta=argument->theta;
+  nargs.varphi=argument->varphi;
+  nargs.r=r;
+
+  // copy tmps
+  // warning: 'nargs.tmpss' must not be freed or resized
+  nargs.tmpss.values=argument->tmpss.values+2;
+  // 2 tmps in integrand_rho
+  nargs.tmpss.length=2;
+
+  // alloc tmps if needed
+  array_mpfr_alloc_tmps(2, tmps1);
+
+  // bounds
+  mpfr_set_ui(tmps1->values[0], 0, MPFR_RNDN);
+  ss_R(tmps1->values[1], nargs.varphi, 0, nargs.cache);
+
+  ret=integrate_gauss_smarttmp_mpfr(out, &ssRF_integrand_rho, tmps1->values[0], tmps1->values[1], argument->abcissa, argument->weights, tmps2, &nargs);
+
+  return(ret);
+}
+
+// integrand of the integral over rho in A_RF
+int ssRF_integrand_rho(mpfr_t* out, mpfr_t rho, void* args){
+  int nu;
+  ssRF_argsint_rho* a=(ssRF_argsint_rho*)args;
+  mpfr_t* tmps;
+  // tmps used in this function
+  array_mpfr* tmps1=a->tmpss.values;
+  array_mpfr* tmps2=a->tmpss.values+1;
+
+  // alloc tmps if needed
+  array_mpfr_alloc_tmps(11, tmps1);
+
+  tmps=tmps1->values;
+
+  mpfr_set_ui(*out, 0, MPFR_RNDN);
+  for(nu=-1;nu<=1;nu=nu+2){
+    // q1 (store in tmps[2])
+    mpfr_cos(tmps[2], a->theta, MPFR_RNDN);
+    mpfr_mul(tmps[2], tmps[2], a->r, MPFR_RNDN);
+    mpfr_add(tmps[2], tmps[2], a->cache.sqrt3, MPFR_RNDN);
+    // q2 (store in tmps[3])
+    mpfr_sin(tmps[3], a->theta, MPFR_RNDN);
+    mpfr_mul(tmps[3], tmps[3], a->r, MPFR_RNDN);
+    mpfr_add_ui(tmps[3], tmps[3], 1, MPFR_RNDN);
+    if(a->params.omega==-1){
+      mpfr_neg(tmps[3], tmps[3], MPFR_RNDN);
+    }
+    // F1 (store in tmps[4])
+    mpfr_cos(tmps[4], a->varphi, MPFR_RNDN);
+    mpfr_mul(tmps[4], tmps[4], rho, MPFR_RNDN);
+    mpfr_add(tmps[4], tmps[4], a->cache.sqrt3, MPFR_RNDN);
+    // F2 (store in tmps[5])
+    mpfr_sin(tmps[5], a->varphi, MPFR_RNDN);
+    mpfr_mul(tmps[5], tmps[5], rho, MPFR_RNDN);
+    mpfr_add_ui(tmps[5], tmps[5], 1, MPFR_RNDN);
+    if(nu==-1){
+      mpfr_neg(tmps[5], tmps[5], MPFR_RNDN);
+    }
+    // p1 (store in tmps[0])
+    mpfr_sub(tmps[0], tmps[4], tmps[2], MPFR_RNDN);
+    mpfr_add(tmps[0], tmps[0], a->cache.sqrt3, MPFR_RNDN);
+    // p2 (store in tmps[1])
+    mpfr_sub(tmps[1], tmps[5], tmps[3], MPFR_RNDN);
+    mpfr_add_si(tmps[1], tmps[1], a->params.omega, MPFR_RNDN);
+
+    // cutoff
+    // tmps[6]=(1-ss_cutoff(ss_norm(F1-q1,F2-q2)))*ss_cutoff(r)
+    mpfr_sub(tmps[6], tmps[4], tmps[2], MPFR_RNDN);
+    mpfr_sub(tmps[7], tmps[5], tmps[3], MPFR_RNDN);
+    ss_norm(tmps[8], tmps[6], tmps[7], a->cache, tmps[9], tmps[10]);
+    ss_cutoff(tmps[6], tmps[8], tmps[7], tmps[9]);
+    mpfr_ui_sub(tmps[6], 1, tmps[6], MPFR_RNDN);
+    ss_cutoff(tmps[7], a->r, tmps[8], tmps[9]);
+    mpfr_mul(tmps[6], tmps[6], tmps[7], MPFR_RNDN);
+
+    (*(a->I))(tmps[7], tmps[0], tmps[1], tmps[2], tmps[3], tmps[4], tmps[5], a->params.t1, a->params.t2, a->params.phi, a->params.W, tmps2, a->cache);
+
+    // r*rho*I*cutoffs
+    mpfr_mul(tmps[6], tmps[6], tmps[7], MPFR_RNDN);
+    mpfr_mul(tmps[6], tmps[6], a->r, MPFR_RNDN);
+    mpfr_mul(tmps[6], tmps[6], rho, MPFR_RNDN);
+
+    // add to *out
+    mpfr_add(*out, *out, tmps[6], MPFR_RNDN);
+  }
+
+  return(0);
+}
+
+
+// compute the integral A_RR
+int ssRR_integrate(mpfr_t* out, TYPE_I, hh_params params, array_mpfr abcissa, array_mpfr weights, struct ss_cache cache, array_2_mpfr tmpss, unsigned int threads){
+  ssRR_argsint_theta args;
+  int ret;
+  unsigned int a;
+  mpfr_t lower, upper;
+  mpfr_t val;
+  array_pthread_t thread_ids;
+
+  // init
+  thread_ids.length=0;
+
+  args.params=params;
+  args.I=I;
+  args.abcissa=abcissa;
+  args.weights=weights;
+  args.cache=cache;
+  args.tmpss=tmpss;
+  args.thread_ids=&thread_ids;
+
+  mpfr_inits(val, lower, upper, NULL);
+
+  // split the integral in three parts
+  mpfr_set_ui(*out, 0, MPFR_RNDN);
+  for(a=0;a<3;a++){
+    // bounds
+    mpfr_set_ui(lower, a, MPFR_RNDN);
+    mpfr_mul_ui(lower, lower, 4, MPFR_RNDN);
+    mpfr_sub_ui(lower, lower, 1, MPFR_RNDN);
+    mpfr_div_ui(lower, lower, 6, MPFR_RNDN);
+    mpfr_mul(lower, lower, args.cache.pi, MPFR_RNDN);
+
+    mpfr_set_ui(upper, a, MPFR_RNDN);
+    mpfr_mul_ui(upper, upper, 4, MPFR_RNDN);
+    mpfr_add_ui(upper, upper, 3, MPFR_RNDN);
+    mpfr_div_ui(upper, upper, 6, MPFR_RNDN);
+    mpfr_mul(upper, upper, args.cache.pi, MPFR_RNDN);
+
+    // save sector
+    args.sector_theta=a;
+
+    if(threads==1){
+      ret=integrate_gauss_mpfr(&val, &ssRR_integrand_theta, lower, upper, abcissa, weights, &args);
+    }
+    else{
+      ret=integrate_gauss_multithread_mpfr(&val, &ssRR_integrand_theta, lower, upper, abcissa, weights, &args, threads, args.thread_ids);
+      array_pthread_t_free(*(args.thread_ids));
+    }
+    if(ret<0){
+      break;
+    }
+    
+    mpfr_add(*out, *out, val, MPFR_RNDN);
+  }
+  // extra factor
+  mpfr_mul_ui(*out, *out, 3, MPFR_RNDN);
+
+  mpfr_clears(val, lower, upper, NULL);
+
+  return(ret);
+}
+
+// integrand of the integral over theta in A_RR
+int ssRR_integrand_theta(mpfr_t* out, mpfr_t theta, void* args){
+  ssRR_argsint_varphi nargs;
+  int ret;
+  ssRR_argsint_theta* argument=(ssRR_argsint_theta*)args;
+  array_mpfr* tmps1;
+  array_mpfr* tmps2;
+  int thread_id;
+
+  // the number of the thread
+  if(argument->thread_ids->length>1){
+    thread_id=array_pthread_t_find(pthread_self(), *(argument->thread_ids));
+    if(thread_id<0){
+      fprintf(stderr, "error: thread not found\nIf you see this message, then you've uncovered a serious bug...\n");
+      return(-1);
+    }
+  }
+  else{
+    thread_id=0;
+  }
+
+  // tmps used in this function (not passed to integrand_varphi)
+  tmps1=argument->tmpss.values+8*thread_id;
+  tmps2=argument->tmpss.values+8*thread_id+1;
+
+  // new argument
+  nargs.params=argument->params;
+  nargs.I=argument->I;
+  nargs.abcissa=argument->abcissa;
+  nargs.weights=argument->weights;
+  nargs.cache=argument->cache;
+  nargs.sector_theta=argument->sector_theta;
+  nargs.theta=theta;
+
+  // copy tmps
+  // only pass tmps reserved for this thread
+  // warning: 'nargs.tmpss' must not be freed or resized
+  nargs.tmpss.values=argument->tmpss.values+8*thread_id+2;
+  // 6 tmps in integrand_varphi
+  nargs.tmpss.length=6;
+
+  // alloc tmps if needed
+  array_mpfr_alloc_tmps(2, tmps1);
+
+  // bounds
+  mpfr_div_si(tmps1->values[0], nargs.cache.pi, -6, MPFR_RNDN);
+  mpfr_div_ui(tmps1->values[1], nargs.cache.pi, 2, MPFR_RNDN);
+
+  ret=integrate_gauss_smarttmp_mpfr(out, &ssRR_integrand_varphi, tmps1->values[0], tmps1->values[1], argument->abcissa, argument->weights, tmps2, &nargs);
+
+  return(ret);
+}
+
+// integrand of the integral over varphi in A_RR
+int ssRR_integrand_varphi(mpfr_t* out, mpfr_t varphi, void* args){
+  ssRR_argsint_r nargs;
+  int ret;
+  ssRR_argsint_varphi* argument=(ssRR_argsint_varphi*)args;
+  // tmps used in this function (not passed to integrand_r)
+  array_mpfr* tmps1=argument->tmpss.values;
+  array_mpfr* tmps2=argument->tmpss.values+1;
+
+  // new argument
+  nargs.params=argument->params;
+  nargs.I=argument->I;
+  nargs.abcissa=argument->abcissa;
+  nargs.weights=argument->weights;
+  nargs.cache=argument->cache;
+  nargs.theta=argument->theta;
+  nargs.varphi=varphi;
+
+  // copy tmps
+  // only pass tmps reserved for this thread
+  // warning: 'nargs.tmpss' must not be freed or resized
+  nargs.tmpss.values=argument->tmpss.values+2;
+  // 4 tmps in integrand_r
+  nargs.tmpss.length=4;
+
+  // alloc tmps if needed
+  array_mpfr_alloc_tmps(2, tmps1);
+
+  // bounds
+  mpfr_set_ui(tmps1->values[0], 0, MPFR_RNDN);
+  ss_R(tmps1->values[1], nargs.theta, argument->sector_theta, nargs.cache);
+
+  ret=integrate_gauss_smarttmp_mpfr(out, &ssRR_integrand_r, tmps1->values[0], tmps1->values[1], argument->abcissa, argument->weights, tmps2, &nargs);
+
+  return(ret);
+}
+
+// integrand of the integral over r in A_RR
+int ssRR_integrand_r(mpfr_t* out, mpfr_t r, void* args){
+  ssRR_argsint_rho nargs;
+  int ret;
+  ssRR_argsint_r* argument=(ssRR_argsint_r*)args;
+  // tmps used in this function (not passed to integrand_rho)
+  array_mpfr* tmps1=argument->tmpss.values;
+  array_mpfr* tmps2=argument->tmpss.values+1;
+
+  // new argument
+  nargs.params=argument->params;
+  nargs.I=argument->I;
+  nargs.cache=argument->cache;
+  nargs.theta=argument->theta;
+  nargs.varphi=argument->varphi;
+  nargs.r=r;
+
+  // copy tmps
+  // only pass tmps reserved for this thread
+  // warning: 'nargs.tmpss' must not be freed or resized
+  nargs.tmpss.values=argument->tmpss.values+2;
+  // 2 tmps in integrand_rho
+  nargs.tmpss.length=2;
+
+  // alloc tmps if needed
+  array_mpfr_alloc_tmps(2, tmps1);
+
+  // bounds
+  mpfr_set_ui(tmps1->values[0], 0, MPFR_RNDN);
+  ss_R(tmps1->values[1], nargs.varphi, 0, nargs.cache);
+
+  ret=integrate_gauss_smarttmp_mpfr(out, &ssRR_integrand_rho, tmps1->values[0], tmps1->values[1], argument->abcissa, argument->weights, tmps2, &nargs);
+
+  return(ret);
+}
+
+// integrand of the integral over rho in A_RR
+int ssRR_integrand_rho(mpfr_t* out, mpfr_t rho, void* args){
+  int eta, nu;
+  ssRR_argsint_rho* a=(ssRR_argsint_rho*)args;
+  mpfr_t* tmps;
+  // tmps used in this function (not passed to integrand_rho)
+  array_mpfr* tmps1=a->tmpss.values;
+  array_mpfr* tmps2=a->tmpss.values+1;
+
+  // alloc tmps if needed
+  array_mpfr_alloc_tmps(12, tmps1);
+
+  tmps=tmps1->values;
+
+  mpfr_set_ui(*out, 0, MPFR_RNDN);
+  for(eta=-1;eta<=1;eta=eta+2){
+    for(nu=-1;nu<=1;nu=nu+2){
+      // q1 (store in tmps[2])
+      mpfr_cos(tmps[2], a->theta, MPFR_RNDN);
+      mpfr_mul(tmps[2], tmps[2], a->r, MPFR_RNDN);
+      mpfr_add(tmps[2], tmps[2], a->cache.sqrt3, MPFR_RNDN);
+      // q2 (store in tmps[3])
+      mpfr_sin(tmps[3], a->theta, MPFR_RNDN);
+      mpfr_mul(tmps[3], tmps[3], a->r, MPFR_RNDN);
+      mpfr_add_ui(tmps[3], tmps[3], 1, MPFR_RNDN);
+      if(eta==-1){
+	mpfr_neg(tmps[3], tmps[3], MPFR_RNDN);
+      }
+      // F1 (store in tmps[4])
+      mpfr_cos(tmps[4], a->varphi, MPFR_RNDN);
+      mpfr_mul(tmps[4], tmps[4], rho, MPFR_RNDN);
+      mpfr_add(tmps[4], tmps[4], a->cache.sqrt3, MPFR_RNDN);
+      // F2 (store in tmps[5])
+      mpfr_sin(tmps[5], a->varphi, MPFR_RNDN);
+      mpfr_mul(tmps[5], tmps[5], rho, MPFR_RNDN);
+      mpfr_add_ui(tmps[5], tmps[5], 1, MPFR_RNDN);
+      if(nu==-1){
+	mpfr_neg(tmps[5], tmps[5], MPFR_RNDN);
+      }
+      // p1 (store in tmps[0])
+      mpfr_sub(tmps[0], tmps[4], tmps[2], MPFR_RNDN);
+      mpfr_add(tmps[0], tmps[0], a->cache.sqrt3, MPFR_RNDN);
+      // p2 (store in tmps[1])
+      mpfr_sub(tmps[1], tmps[5], tmps[3], MPFR_RNDN);
+      mpfr_add_si(tmps[1], tmps[1], a->params.omega, MPFR_RNDN);
+
+      // cutoff
+      // tmps[6]=(1-ss_cutoff(ss_norm(F1-q1,F2-q2)))*(1-ss_cutoff(ss_norm(q1-SQRT3,q2-omega)))
+      mpfr_sub(tmps[6], tmps[4], tmps[2], MPFR_RNDN);
+      mpfr_sub(tmps[7], tmps[5], tmps[3], MPFR_RNDN);
+      ss_norm(tmps[8], tmps[6], tmps[7], a->cache, tmps[9], tmps[10]);
+      ss_cutoff(tmps[6], tmps[8], tmps[7], tmps[9]);
+      mpfr_ui_sub(tmps[6], 1, tmps[6], MPFR_RNDN);
+      mpfr_sub(tmps[7], tmps[2], a->cache.sqrt3, MPFR_RNDN);
+      mpfr_sub_si(tmps[8], tmps[3], a->params.omega, MPFR_RNDN);
+      ss_norm(tmps[9], tmps[7], tmps[8], a->cache, tmps[10], tmps[11]);
+      ss_cutoff(tmps[7], tmps[9], tmps[8], tmps[10]);
+      mpfr_ui_sub(tmps[7], 1, tmps[7], MPFR_RNDN);
+      mpfr_mul(tmps[6], tmps[6], tmps[7], MPFR_RNDN);
+
+      (*(a->I))(tmps[7], tmps[0], tmps[1], tmps[2], tmps[3], tmps[4], tmps[5], a->params.t1, a->params.t2, a->params.phi, a->params.W, tmps2, a->cache);
+
+      // r*rho*I*cutoffs
+      mpfr_mul(tmps[6], tmps[6], tmps[7], MPFR_RNDN);
+      mpfr_mul(tmps[6], tmps[6], a->r, MPFR_RNDN);
+      mpfr_mul(tmps[6], tmps[6], rho, MPFR_RNDN);
+
+      // add to *out
+      mpfr_add(*out, *out, tmps[6], MPFR_RNDN);
+    }
+  }
+
+  return(0);
+}
+
+
+// R(theta)
+int ss_R(mpfr_t out, mpfr_t theta, unsigned int sector, struct ss_cache cache){
+  mpfr_set_ui(out, sector, MPFR_RNDN);
+  mpfr_mul_ui(out, out, 8, MPFR_RNDN);
+  mpfr_sub_ui(out, out, 1, MPFR_RNDN);
+  mpfr_mul(out, out, cache.pi, MPFR_RNDN);
+  mpfr_div_ui(out, out, 6, MPFR_RNDN);
+  mpfr_add(out, out, theta, MPFR_RNDN);
+  mpfr_cos(out, out, MPFR_RNDN);
+  mpfr_ui_div(out, 1, out, MPFR_RNDN);
+  return(0);
+}
+
+// cutoff function
+int ss_cutoff(mpfr_t out, mpfr_t x, mpfr_t tmp1, mpfr_t tmp2){
+
+  if(mpfr_cmp_d(x,0.5)<=0){
+    mpfr_set_ui(out, 1, MPFR_RNDN);
+    return(0);
+  }
+  if(mpfr_cmp_ui(x,1)>=0){
+    mpfr_set_ui(out, 0, MPFR_RNDN);
+    return(0);
+  }
+
+  mpfr_ui_sub(tmp1, 1, x, MPFR_RNDN);
+  mpfr_d_div(tmp1, -0.5, tmp1, MPFR_RNDN);
+  mpfr_exp(tmp1, tmp1, MPFR_RNDN);
+  
+  mpfr_sub_d(tmp2, x, 0.5, MPFR_RNDN);
+  mpfr_d_div(tmp2, -0.5, tmp2, MPFR_RNDN);
+  mpfr_exp(tmp2, tmp2, MPFR_RNDN);
+
+  mpfr_ui_sub(out, 1, tmp2, MPFR_RNDN);
+  mpfr_mul(out, out, tmp1, MPFR_RNDN);
+  mpfr_add(out, out, tmp2, MPFR_RNDN);
+  mpfr_div(out, tmp1, out, MPFR_RNDN);
+
+  return(0);
+}
+
+// periodic norm
+int ss_norm(mpfr_t out, mpfr_t k1, mpfr_t k2, struct ss_cache cache, mpfr_t tmp1, mpfr_t tmp2){
+  // split R^2 into equilateral triangles and find which triangle (k1,k2) is in
+  // tmp1=floor(k1/sqrt3)+1
+  // tmp2=floor(k1/2/sqrt3-k2/2)+1
+  // out=floor(k1/2/sqrt3+k2/2)+1
+  mpfr_div(tmp1, k1, cache.sqrt3, MPFR_RNDN);
+  mpfr_sub(tmp2, tmp1, k2, MPFR_RNDN);
+  mpfr_add(out, tmp1, k2, MPFR_RNDN);
+  mpfr_floor(tmp1, tmp1);
+  mpfr_add_ui(tmp1, tmp1, 1, MPFR_RNDN);
+  mpfr_div_ui(tmp2, tmp2, 2, MPFR_RNDN);
+  mpfr_floor(tmp2, tmp2);
+  mpfr_add_ui(tmp2, tmp2, 1, MPFR_RNDN);
+  mpfr_div_ui(out, out, 2, MPFR_RNDN);
+  mpfr_floor(out, out);
+  mpfr_add_ui(out, out, 1, MPFR_RNDN);
+
+  // translate (k1,k2) by -a*(SQRT3,3)-b(SQRT3,-3)
+  // a-b
+  // tmp2=b-a=floor((tmp2-out+1)/3);
+  mpfr_sub(tmp2, tmp2, out, MPFR_RNDN);
+  mpfr_add_ui(tmp2, tmp2, 1, MPFR_RNDN);
+  mpfr_div_ui(tmp2, tmp2, 3, MPFR_RNDN);
+  mpfr_floor(tmp2, tmp2);
+  // tmp1=b=floor((tmp1+b-a)/2)
+  mpfr_add(tmp1, tmp1, tmp2, MPFR_RNDN);
+  mpfr_div_ui(tmp1, tmp1, 2, MPFR_RNDN);
+  mpfr_floor(tmp1, tmp1);
+  // tmp2=a
+  mpfr_sub(tmp2, tmp1, tmp2, MPFR_RNDN);
+
+  // out=(k1-sqrt3*(a+b))**2
+  mpfr_add(out, tmp1, tmp2, MPFR_RNDN);
+  mpfr_mul(out, out, cache.sqrt3, MPFR_RNDN);
+  mpfr_sub(out, k1, out, MPFR_RNDN);
+  mpfr_pow_ui(out, out, 2, MPFR_RNDN);
+  // tmp1=(k2-3*(a-b))**2
+  mpfr_sub(tmp1, tmp2, tmp1, MPFR_RNDN);
+  mpfr_mul_ui(tmp1, tmp1, 3, MPFR_RNDN);
+  mpfr_sub(tmp1, k2, tmp1, MPFR_RNDN);
+  mpfr_pow_ui(tmp1, tmp1, 2, MPFR_RNDN);
+
+  // out=sqrt((k1-sqrt3*(a+b))**2+(k2-3*(a-b))**2)
+  mpfr_add(out, out, tmp1, MPFR_RNDN);
+  mpfr_sqrt(out, out, MPFR_RNDN);
+  
+  return(0);
+}
-- 
cgit v1.2.3-70-g09d2