buildtools/cloog/source/loop.c


   /**-------------------------------------------------------------------**
    **                              CLooG                                **
    **-------------------------------------------------------------------**
    **                             loop.c                                **
    **-------------------------------------------------------------------**
    **                  First version: october 26th 2001                 **
    **-------------------------------------------------------------------**/


/******************************************************************************
 *               CLooG : the Chunky Loop Generator (experimental)             *
 ******************************************************************************
 *                                                                            *
 * Copyright (C) 2001-2005 Cedric Bastoul                                     *
 *                                                                            *
 * This library is free software; you can redistribute it and/or              *
 * modify it under the terms of the GNU Lesser General Public                 *
 * License as published by the Free Software Foundation; either               *
 * version 2.1 of the License, or (at your option) any later version.         *
 *                                                                            *
 * This library is distributed in the hope that it will be useful,            *
 * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU          *
 * Lesser General Public License for more details.                            *
 *                                                                            *
 * You should have received a copy of the GNU Lesser General Public           *
 * License along with this library; if not, write to the Free Software        *
 * Foundation, Inc., 51 Franklin Street, Fifth Floor,                         *
 * Boston, MA  02110-1301  USA                                                *
 *                                                                            *
 * CLooG, the Chunky Loop Generator                                           *
 * Written by Cedric Bastoul, Cedric.Bastoul@inria.fr                         *
 *                                                                            *
 ******************************************************************************/
/* CAUTION: the english used for comments is probably the worst you ever read,
 *          please feel free to correct and improve it !
 */

# include <stdlib.h>
# include <stdio.h>
# include "../include/cloog/cloog.h"

#define ALLOC(type) (type*)malloc(sizeof(type))


/******************************************************************************
 *                             Memory leaks hunting                           *
 ******************************************************************************/


/**
 * These functions and global variables are devoted to memory leaks hunting: we
 * want to know at each moment how many CloogLoop structures had been allocated
 * (cloog_loop_allocated) and how many had been freed (cloog_loop_freed).
 * Each time a CloogLoog structure is allocated, a call to the function
 * cloog_loop_leak_up() must be carried out, and respectively
 * cloog_loop_leak_down() when a CloogLoop structure is freed. The special
 * variable cloog_loop_max gives the maximal number of CloogLoop structures
 * simultaneously alive (i.e. allocated and non-freed) in memory.
 * - July 3rd->11th 2003: first version (memory leaks hunt and correction).
 */


static void cloog_loop_leak_up(CloogState *state)
{
  state->loop_allocated++;
  if ((state->loop_allocated - state->loop_freed) > state->loop_max)
    state->loop_max = state->loop_allocated - state->loop_freed;
}


static void cloog_loop_leak_down(CloogState *state)
{
  state->loop_freed++;
}


/******************************************************************************
 *                          Structure display function                        *
 ******************************************************************************/


/**
 * cloog_loop_print_structure function:
 * Displays a loop structure in a way that trends to be understandable without
 * falling in a deep depression or, for the lucky ones, getting a headache...
 * Written by Olivier Chorier, Luc Marchaud, Pierre Martin and Romain Tartiere.
 * - April 24th 2005: Initial version.
 * - May   21rd 2005: - New parameter `F' for destination file (ie stdout),
 *                    - Minor tweaks.
 * - May   26th 2005: Memory leak hunt.
 * - June   2nd 2005: (Ced) Integration and minor fixes.
 * -June  22nd 2005: (Ced) Adaptation for GMP.
 */
void cloog_loop_print_structure(FILE * file, CloogLoop * loop, int level)
{ int i, j, first=1 ;

  if (loop)
  { /* Go to the right level. */
    for (i=0; i<level; i++)
    fprintf(file,"|\t") ;

    fprintf(file,"+-- CloogLoop\n") ;
  }

  /* For each loop. */
  while (loop)
  { if (!first)
    { /* Go to the right level. */
      for (i=0; i<level; i++)
      fprintf(file,"|\t") ;

      fprintf(file,"|   CloogLoop\n") ;
    }
    else
    first = 0 ;

    /* A blank line. */
    for(j=0; j<=level+1; j++)
    fprintf(file,"|\t") ;
    fprintf(file,"\n") ;

    /* Print the domain. */
    cloog_domain_print_structure(file, loop->domain, level+1, "CloogDomain");

    /* Print the stride. */
    for(j=0; j<=level; j++)
    fprintf(file,"|\t") ;
    if (loop->stride) {
	fprintf(file, "Stride: ");
	cloog_int_print(file, loop->stride->stride);
	fprintf(file, "\n");
	fprintf(file, "Offset: ");
	cloog_int_print(file, loop->stride->offset);
	fprintf(file, "\n");
    }

    /* A blank line. */
    for(j=0; j<=level+1; j++)
    fprintf(file,"|\t") ;
    fprintf(file,"\n") ;

    /* Print the block. */
    cloog_block_print_structure(file,loop->block,level+1) ;

    /* A blank line. */
    for (i=0; i<=level+1; i++)
    fprintf(file,"|\t") ;
    fprintf(file,"\n") ;

    /* Print inner if any. */
    if (loop->inner)
    cloog_loop_print_structure(file,loop->inner,level+1) ;

    /* And let's go for the next one. */
    loop = loop->next ;

    /* One more time something that is here only for a better look. */
    if (!loop)
    { /* Two blank lines if this is the end of the linked list. */
      for (j=0; j<2; j++)
      { for (i=0; i<=level; i++)
        fprintf(file,"|\t") ;

        fprintf(file,"\n") ;
      }
    }
    else
    { /* A special blank line if the is a next loop. */
      for (i=0; i<=level; i++)
      fprintf(file,"|\t") ;
      fprintf(file,"V\n") ;
    }
  }
}


/**
 * cloog_loop_print function:
 * This function prints the content of a CloogLoop structure (start) into a
 * file (file, possibly stdout).
 * - June 2nd 2005: Now this very old function (probably as old as CLooG) is
 *                  only a frontend to cloog_loop_print_structure, with a quite
 *                  better human-readable representation.
 */
void cloog_loop_print(FILE * file, CloogLoop * loop)
{ cloog_loop_print_structure(file,loop,0) ;
}


/******************************************************************************
 *                         Memory deallocation function                       *
 ******************************************************************************/


/**
 * cloog_loop_free function:
 * This function frees the allocated memory for a CloogLoop structure (loop),
 * and frees its inner loops and its next loops.
 * - June 22nd 2005: Adaptation for GMP.
 */
void cloog_loop_free(CloogLoop * loop)
{ CloogLoop * next ;

  while (loop != NULL) {
    cloog_loop_leak_down(loop->state);

    next = loop->next ;
    cloog_domain_free(loop->domain) ;
    cloog_domain_free(loop->unsimplified);
    cloog_block_free(loop->block) ;
    if (loop->inner != NULL)
    cloog_loop_free(loop->inner) ;

    cloog_stride_free(loop->stride);
    free(loop) ;
    loop = next ;
  }
}


/**
 * cloog_loop_free_parts function:
 * This function frees the allocated memory for some parts of a CloogLoop
 * structure (loop), each other argument is a boolean having to be set to 1 if
 * we want to free the corresponding part, 0 otherwise. This function applies
 * the same freeing policy to its inner ans next loops recursively.
 * - July  3rd 2003: first version.
 * - June 22nd 2005: Adaptation for GMP.
 */
void cloog_loop_free_parts(loop, domain, block, inner, next)
CloogLoop * loop ;
int domain, block, inner, next ;
{ CloogLoop * follow ;

  while (loop != NULL) {
    cloog_loop_leak_down(loop->state);
    follow = loop->next ;

    if (domain)
    cloog_domain_free(loop->domain) ;

    if (block)
    cloog_block_free(loop->block) ;

    if ((inner) && (loop->inner != NULL))
    cloog_loop_free_parts(loop->inner,domain,block,inner,1) ;

    cloog_domain_free(loop->unsimplified);
    cloog_stride_free(loop->stride);
    free(loop) ;
    if (next)
    loop = follow ;
    else
    loop = NULL ;
  }
}


/******************************************************************************
 *                              Reading functions                             *
 ******************************************************************************/


/**
 * Construct a CloogLoop structure from a given iteration domain
 * and statement number.
 */
CloogLoop *cloog_loop_from_domain(CloogState *state, CloogDomain *domain,
	int number)
{
  int nb_iterators;
  CloogLoop * loop ;
  CloogStatement * statement ;

  /* Memory allocation and information reading for the first domain: */
  loop = cloog_loop_malloc(state);
  /* domain. */
  loop->domain = domain;
  if (loop->domain != NULL)
    nb_iterators = cloog_domain_dimension(loop->domain);
  else
  nb_iterators = 0 ;
  /* included statement block. */
  statement = cloog_statement_alloc(state, number + 1);
  loop->block = cloog_block_alloc(statement, 0, NULL, nb_iterators);

  return loop ;
}


/**
 * cloog_loop_read function:
 * This function reads loop data from a file (foo, possibly stdin) and
 * returns a pointer to a CloogLoop structure containing the read information.
 * This function can be used only for input file reading, when one loop is
 * associated with one statement.
 * - number is the statement block number carried by the loop (-1 if none).
 * - nb_parameters is the number of parameters.
 **
 * - September 9th 2002: first version.
 * - April    16th 2005: adaptation to new CloogStatement struct (with number).
 * - June     11th 2005: adaptation to new CloogBlock structure.
 * - June     22nd 2005: Adaptation for GMP.
 */
CloogLoop *cloog_loop_read(CloogState *state,
			    FILE *foo, int number, int nb_parameters)
{
  int op1, op2, op3;
  char s[MAX_STRING];
  CloogDomain *domain;

  domain = cloog_domain_union_read(state, foo, nb_parameters);

  /* To read that stupid "0 0 0" line. */
  while (fgets(s,MAX_STRING,foo) == 0) ;
  while ((*s=='#' || *s=='\n') || (sscanf(s," %d %d %d",&op1,&op2,&op3)<3))
  fgets(s,MAX_STRING,foo) ;

  return cloog_loop_from_domain(state, domain, number);
}


/******************************************************************************
 *                            Processing functions                            *
 ******************************************************************************/


/**
 * cloog_loop_malloc function:
 * This function allocates the memory space for a CloogLoop structure and
 * sets its fields with default values. Then it returns a pointer to the
 * allocated space.
 * - November 21th 2005: first version.
 */
CloogLoop *cloog_loop_malloc(CloogState *state)
{ CloogLoop * loop ;

  /* Memory allocation for the CloogLoop structure. */
  loop = (CloogLoop *)malloc(sizeof(CloogLoop)) ;
  if (loop == NULL)
    cloog_die("memory overflow.\n");
  cloog_loop_leak_up(state);


  /* We set the various fields with default values. */
  loop->state    = state;
  loop->domain = NULL ;
  loop->unsimplified = NULL;
  loop->block  = NULL ;
  loop->usr    = NULL;
  loop->inner  = NULL ;
  loop->next   = NULL ;
  loop->otl = 0;
  loop->stride = NULL;

  return loop ;
}


/**
 * cloog_loop_alloc function:
 * This function allocates the memory space for a CloogLoop structure and
 * sets its fields with those given as input. Then it returns a pointer to the
 * allocated space.
 * - October  27th 2001: first version.
 * - June     22nd 2005: Adaptation for GMP.
 * - November 21th 2005: use of cloog_loop_malloc.
 */
CloogLoop *cloog_loop_alloc(CloogState *state,
	CloogDomain *domain, int otl, CloogStride *stride,
	CloogBlock *block, CloogLoop *inner, CloogLoop *next)
{ CloogLoop * loop ;

  loop = cloog_loop_malloc(state);

  loop->domain = domain ;
  loop->block  = block ;
  loop->inner  = inner ;
  loop->next   = next ;
  loop->otl = otl;
  loop->stride = cloog_stride_copy(stride);

  return(loop) ;
}


/**
 * cloog_loop_add function:
 * This function adds a CloogLoop structure (loop) at a given place (now) of a
 * NULL terminated list of CloogLoop structures. The beginning of this list
 * is (start). This function updates (now) to (loop), and updates (start) if the
 * added element is the first one -that is when (start) is NULL-.
 * - October 28th 2001: first version.
 */
void cloog_loop_add(CloogLoop ** start, CloogLoop ** now, CloogLoop * loop)
{ if (*start == NULL)
  { *start = loop ;
    *now = *start ;
  }
  else
  { (*now)->next = loop ;
    *now = (*now)->next ;
  }
}


/**
 * cloog_loop_add function:
 * This function adds a CloogLoop structure (loop) at a given place (now) of a
 * NULL terminated list of CloogLoop structures. The beginning of this list
 * is (start). This function updates (now) to the end of the loop list (loop),
 * and updates (start) if the added element is the first one -that is when
 * (start) is NULL-.
 * - September 9th 2005: first version.
 */
void cloog_loop_add_list(CloogLoop ** start, CloogLoop ** now, CloogLoop * loop)
{ if (*start == NULL)
  { *start = loop ;
    *now = *start ;
  }
  else
  { (*now)->next = loop ;
    *now = (*now)->next ;
  }

  while ((*now)->next != NULL)
  *now = (*now)->next ;
}


/**
 * cloog_loop_copy function:
 * This function returns a copy of the CloogLoop structure given as input. In
 * fact, there is just new allocations for the CloogLoop structures, but their
 * contents are the same.
 * - October 28th 2001: first version.
 * - July 3rd->11th 2003: memory leaks hunt and correction.
 */
CloogLoop * cloog_loop_copy(CloogLoop * source)
{ CloogLoop * loop ;
  CloogBlock * block ;
  CloogDomain * domain ;

  loop = NULL ;
  if (source != NULL)
  { domain = cloog_domain_copy(source->domain) ;
    block  = cloog_block_copy(source->block) ;
    loop   = cloog_loop_alloc(source->state, domain, source->otl,
		source->stride, block, NULL,  NULL);
    loop->usr = source->usr;
    loop->inner = cloog_loop_copy(source->inner) ;
    loop->next = cloog_loop_copy(source->next) ;
  }
  return(loop) ;
}


/**
 * cloog_loop_add_disjoint function:
 * This function adds some CloogLoop structures at a given place (now) of a
 * NULL terminated list of CloogLoop structures. The beginning of this list
 * is (start). (loop) can be an union of polyhedra, this function separates the
 * union into a list of *disjoint* polyhedra then adds the list. This function
 * updates (now) to the end of the list and updates (start) if first added
 * element is the first of the principal list -that is when (start) is NULL-.
 * (loop) can be freed by this function, basically when its domain is actually
 * a union of polyhedra, but don't worry, all the useful data are now stored
 * inside the list (start). We do not use PolyLib's Domain_Disjoint function,
 * since the number of union components is often higher (thus code size too).
 * - October   28th 2001: first version.
 * - November  14th 2001: bug correction (this one was hard to find !).
 * - July 3rd->11th 2003: memory leaks hunt and correction.
 * - June      22nd 2005: Adaptation for GMP.
 * - October   27th 2005: (debug) included blocks were not copied for new loops.
 */
void cloog_loop_add_disjoint(start, now, loop)
CloogLoop ** start, ** now, * loop ;
{
  CloogLoop * sep, * inner ;
  CloogDomain *domain, *seen, *temp, *rest;
  CloogBlock * block ;

  if (cloog_domain_isconvex(loop->domain))
  cloog_loop_add(start,now,loop) ;
  else {
    domain = cloog_domain_simplify_union(loop->domain);
    loop->domain = NULL ;

    /* We separate the first element of the rest of the union. */
    domain = cloog_domain_cut_first(domain, &rest);

    /* This first element is the first of the list of disjoint polyhedra. */
    sep = cloog_loop_alloc(loop->state, domain, 0, NULL,
			   loop->block, loop->inner, NULL);
    cloog_loop_add(start,now,sep) ;

    seen = cloog_domain_copy(domain);
    while (!cloog_domain_isempty(domain = rest)) {
      temp = cloog_domain_cut_first(domain, &rest);
      domain = cloog_domain_difference(temp, seen);
      cloog_domain_free(temp);

      if (cloog_domain_isempty(domain)) {
	cloog_domain_free(domain);
	continue;
      }

      /* Each new loop will have its own life, for instance we can free its
       * inner loop and included block. Then each one must have its own copy
       * of both 'inner' and 'block'.
       */
      inner = cloog_loop_copy(loop->inner) ;
      block = cloog_block_copy(loop->block) ;

      sep = cloog_loop_alloc(loop->state, cloog_domain_copy(domain),
			     0, NULL, block, inner, NULL);
      /* domain can be an union too. If so: recursion. */
      if (cloog_domain_isconvex(domain))
	cloog_loop_add(start,now,sep) ;
      else
	cloog_loop_add_disjoint(start,now,sep) ;

      if (cloog_domain_isempty(rest)) {
	cloog_domain_free(domain);
	break;
      }

      seen = cloog_domain_union(seen, domain);
    }
    cloog_domain_free(rest);
    cloog_domain_free(seen);
    cloog_loop_free_parts(loop,0,0,0,0) ;
  }
}


/**
 * cloog_loop_disjoint function:
 * This function returns a list of loops such that each loop with non-convex
 * domain in the input list (loop) is separated into several loops where the
 * domains are the components of the union of *disjoint* polyhedra equivalent
 * to the original non-convex domain. See cloog_loop_add_disjoint comments
 * for more details.
 * - September 16th 2005: first version.
 */
CloogLoop * cloog_loop_disjoint(CloogLoop * loop)
{ CloogLoop *res=NULL, * now=NULL, * next ;

  /* Because this is often the case, don't waste time ! */
  if (loop && !loop->next && cloog_domain_isconvex(loop->domain))
  return loop ;

  while (loop != NULL)
  { next = loop->next ;
    loop->next = NULL ;
    cloog_loop_add_disjoint(&res,&now,loop) ;
    loop = next ;
  }

  return res ;
}


/**
 * cloog_loop_restrict function:
 * This function returns the (loop) in the context of (context): it makes the
 * intersection between the (loop) domain and the (context), then it returns
 * a pointer to a new loop, with this intersection as domain.
 **
 * - October 27th 2001: first version.
 * - June    15th 2005: a memory leak fixed (domain was not freed when empty).
 * - June    22nd 2005: Adaptation for GMP.
 */
CloogLoop *cloog_loop_restrict(CloogLoop *loop, CloogDomain *context)
{ int new_dimension ;
  CloogDomain * domain, * extended_context, * new_domain ;
  CloogLoop * new_loop ;

  domain = loop->domain ;
  if (cloog_domain_dimension(domain) > cloog_domain_dimension(context))
  {
    new_dimension = cloog_domain_dimension(domain);
    extended_context = cloog_domain_extend(context, new_dimension);
    new_domain = cloog_domain_intersection(extended_context,loop->domain) ;
    cloog_domain_free(extended_context) ;
  }
  else
  new_domain = cloog_domain_intersection(context,loop->domain) ;

  if (cloog_domain_isempty(new_domain))
  { cloog_domain_free(new_domain) ;
    return(NULL) ;
  }
  else {
    new_loop = cloog_loop_alloc(loop->state, new_domain,
				0, NULL, loop->block, loop->inner, NULL);
    return(new_loop) ;
  }
}


/**
 * Call cloog_loop_restrict on each loop in the list "loop" and return
 * the concatenated result.
 */
CloogLoop *cloog_loop_restrict_all(CloogLoop *loop, CloogDomain *context)
{
    CloogLoop *next;
    CloogLoop *res = NULL;
    CloogLoop **res_next = &res;

    for (; loop; loop = next) {
	next = loop->next;

	*res_next = cloog_loop_restrict(loop, context);
	if (*res_next) {
	    res_next = &(*res_next)->next;
	    cloog_loop_free_parts(loop, 1, 0, 0, 0);
	} else {
	    loop->next = NULL;
	    cloog_loop_free(loop);
	}
    }

    return res;
}


/**
 * Restrict the domains of the inner loops of each loop l in the given
 * list of loops to the domain of the loop l.  If the domains of all
 * inner loops of a given loop l turn out to be empty, then remove l
 * from the list.
 */
CloogLoop *cloog_loop_restrict_inner(CloogLoop *loop)
{
    CloogLoop *next;
    CloogLoop *res;
    CloogLoop **res_next = &res;

    for (; loop; loop = next) {
	next = loop->next;

	loop->inner = cloog_loop_restrict_all(loop->inner, loop->domain);
	if (loop->inner) {
	    *res_next = loop;
	    res_next = &(*res_next)->next;
	} else {
	    loop->next = NULL;
	    cloog_loop_free(loop);
	}
    }

    *res_next = NULL;

    return res;
}

/**
 * cloog_loop_project function:
 * This function returns the projection of (loop) on the (level) first
 * dimensions (outer loops). It makes the projection of the (loop) domain,
 * then it returns a pointer to a new loop, with this projection as domain.
 **
 * - October   27th 2001: first version.
 * - July 3rd->11th 2003: memory leaks hunt and correction.
 * - June      22nd 2005: Adaptation for GMP.
 */
CloogLoop * cloog_loop_project(CloogLoop * loop, int level)
{
  CloogDomain * new_domain ;
  CloogLoop * new_loop, * copy ;

  copy = cloog_loop_alloc(loop->state, loop->domain, loop->otl, loop->stride,
			  loop->block, loop->inner, NULL);

  if (cloog_domain_dimension(loop->domain) == level)
  new_domain = cloog_domain_copy(loop->domain) ;
  else
    new_domain = cloog_domain_project(loop->domain, level);

  new_loop = cloog_loop_alloc(loop->state, new_domain, 0, NULL,
			      NULL, copy, NULL);

  return(new_loop) ;
}


/**
 * Call cloog_loop_project on each loop in the list "loop" and return
 * the concatenated result.
 */
CloogLoop *cloog_loop_project_all(CloogLoop *loop, int level)
{
    CloogLoop *next;
    CloogLoop *res = NULL;
    CloogLoop **res_next = &res;

    for (; loop; loop = next) {
	next = loop->next;

	*res_next = cloog_loop_project(loop, level);
	res_next = &(*res_next)->next;
	cloog_loop_free_parts(loop, 0, 0, 0, 0);
    }

    return res;
}


/**
 * cloog_loop_concat function:
 * This function returns a pointer to the concatenation of the
 * CloogLoop lists given as input.
 * - October 28th 2001: first version.
 */
CloogLoop * cloog_loop_concat(CloogLoop * a, CloogLoop * b)
{ CloogLoop * loop, * temp ;

  loop = a  ;
  temp = loop ;
  if (loop != NULL)
  { while (temp->next != NULL)
    temp = temp->next ;
    temp->next = b ;
  }
  else
  loop = b ;

  return(loop) ;
}


/**
 * cloog_loop_combine:
 * Combine consecutive loops with identical domains into
 * a single loop with the concatenation of their inner loops
 * as inner loop.
 */
CloogLoop *cloog_loop_combine(CloogLoop *loop)
{
    CloogLoop *first, *second;

    for (first = loop; first; first = first->next) {
	while (first->next) {
	    if (!cloog_domain_lazy_equal(first->domain, first->next->domain))
		break;
	    second = first->next;
	    first->inner = cloog_loop_concat(first->inner, second->inner);
	    first->next = second->next;
	    cloog_loop_free_parts(second, 1, 0, 0, 0);
	}
    }

    return loop;
}

/**
 * Remove loops from list that have an empty domain.
 */
CloogLoop *cloog_loop_remove_empty_domain_loops(CloogLoop *loop)
{
    CloogLoop *l, *res, *next, **res_next;

    res = NULL;
    res_next = &res;
    for (l = loop; l; l = next) {
	next = l->next;
	if (cloog_domain_isempty(l->domain))
	    cloog_loop_free_parts(l, 1, 1, 1, 0);
	else {
	    *res_next = l;
	    res_next = &(*res_next)->next;
	}
    }
    *res_next = NULL;

    return res;
}

CloogLoop *cloog_loop_decompose_inner(CloogLoop *loop,
	int level, int scalar, int *scaldims, int nb_scattdims);

/* For each loop with only one inner loop, replace the domain
 * of the loop with the projection of the domain of the inner
 * loop.  To increase the number of loops with a single inner
 * we first decompose the inner loops into strongly connected
 * components.
 */
CloogLoop *cloog_loop_specialize(CloogLoop *loop,
	int level, int scalar, int *scaldims, int nb_scattdims)
{
    int dim;
    CloogDomain *domain;
    CloogLoop *l;

    loop = cloog_loop_decompose_inner(loop, level, scalar,
					scaldims, nb_scattdims);

    for (l = loop; l; l = l->next) {
	if (l->inner->next)
	    continue;
	if (!cloog_domain_isconvex(l->inner->domain))
	    continue;

	dim = cloog_domain_dimension(l->domain);
	domain = cloog_domain_project(l->inner->domain, dim);
	if (cloog_domain_isconvex(domain)) {
	    cloog_domain_free(l->domain);
	    l->domain = domain;
	} else {
	    cloog_domain_free(domain);
	}
    }

    return cloog_loop_remove_empty_domain_loops(loop);
}

/* For each loop with only one inner loop, propagate the bounds from
 * the inner loop domain to the outer loop domain.  This is especially
 * useful if the inner loop domain has a non-trivial stride which
 * results in an update of the lower bound.
 */
CloogLoop *cloog_loop_propagate_lower_bound(CloogLoop *loop, int level)
{
    int dim;
    CloogDomain *domain, *t;
    CloogLoop *l;

    for (l = loop; l; l = l->next) {
	if (l->inner->next)
	    continue;
	if (!cloog_domain_isconvex(l->inner->domain))
	    continue;

	dim = cloog_domain_dimension(l->domain);
	domain = cloog_domain_project(l->inner->domain, dim);
	if (cloog_domain_isconvex(domain)) {
	    t = cloog_domain_intersection(domain, l->domain);
	    cloog_domain_free(l->domain);
	    l->domain = t;
	}
	cloog_domain_free(domain);
    }

    return loop;
}

/**
 * cloog_loop_separate function:
 * This function implements the Quillere algorithm for separation of multiple
 * loops: for a given set of polyhedra (loop), it computes a set of disjoint
 * polyhedra such that the unions of these sets are equal, and returns this set.
 * - October   28th 2001: first version.
 * - November  14th 2001: elimination of some unused blocks.
 * - August    13th 2002: (debug) in the case of union of polyhedra for one
 *                        loop, redundant constraints are fired.
 * - July 3rd->11th 2003: memory leaks hunt and correction.
 * - June      22nd 2005: Adaptation for GMP.
 * - October   16th 2005: Removal of the non-shared constraint elimination when
 *                        there is only one loop in the list (seems to work
 *                        without now, DomainSimplify may have been improved).
 *                        The problem was visible with test/iftest2.cloog.
 */
CloogLoop * cloog_loop_separate(CloogLoop * loop)
{ int lazy_equal=0, disjoint = 0;
  CloogLoop * new_loop, * new_inner, * res, * now, * temp, * Q,
            * inner, * old /*, * previous, * next*/  ;
  CloogDomain *UQ, *domain;

  if (loop == NULL)
  return NULL ;

  loop = cloog_loop_combine(loop);

  if (loop->next == NULL)
  return cloog_loop_disjoint(loop) ;

  UQ     = cloog_domain_copy(loop->domain) ;
  domain = cloog_domain_copy(loop->domain) ;
  res    = cloog_loop_alloc(loop->state, domain, 0, NULL,
			    loop->block, loop->inner, NULL);

  old = loop ;
  while((loop = loop->next) != NULL)
  { temp = NULL ;

    /* For all Q, add Q-loop associated with the blocks of Q alone,
     * and Q inter loop associated with the blocks of Q and loop.
     */
    for (Q = res; Q; Q = Q->next) {
        /* Add (Q inter loop). */
        if ((disjoint = cloog_domain_lazy_disjoint(Q->domain,loop->domain)))
	domain = NULL ;
	else
	{ if ((lazy_equal = cloog_domain_lazy_equal(Q->domain,loop->domain)))
	  domain = cloog_domain_copy(Q->domain) ;
          else
	  domain = cloog_domain_intersection(Q->domain,loop->domain) ;

	  if (!cloog_domain_isempty(domain))
          { new_inner = cloog_loop_concat(cloog_loop_copy(Q->inner),
                                          cloog_loop_copy(loop->inner)) ;
	    new_loop = cloog_loop_alloc(loop->state, domain, 0, NULL,
					NULL, new_inner, NULL);
            cloog_loop_add_disjoint(&temp,&now,new_loop) ;
          }
          else {
	    disjoint = 1;
	    cloog_domain_free(domain);
	  }
        }

	/* Add (Q - loop). */
        if (disjoint)
	domain = cloog_domain_copy(Q->domain) ;
	else
	{ if (lazy_equal)
	  domain = cloog_domain_empty(Q->domain);
	  else
	  domain = cloog_domain_difference(Q->domain,loop->domain) ;
	}

	if (!cloog_domain_isempty(domain)) {
          new_loop = cloog_loop_alloc(loop->state, domain, 0, NULL,
				      NULL, Q->inner, NULL);
          cloog_loop_add_disjoint(&temp,&now,new_loop) ;
        }
        else
        { cloog_domain_free(domain) ;
	  /* If Q->inner is no more useful, we can free it. */
          inner = Q->inner ;
          Q->inner = NULL ;
          cloog_loop_free(inner) ;
        }
    }

    /* Add loop-UQ associated with the blocks of loop alone.*/
    if (cloog_domain_lazy_disjoint(loop->domain,UQ))
    domain = cloog_domain_copy(loop->domain) ;
    else
    { if (cloog_domain_lazy_equal(loop->domain,UQ))
      domain = cloog_domain_empty(UQ);
      else
      domain = cloog_domain_difference(loop->domain,UQ) ;
    }

    if (!cloog_domain_isempty(domain)) {
      new_loop = cloog_loop_alloc(loop->state, domain, 0, NULL,
				  NULL, loop->inner, NULL);
      cloog_loop_add_disjoint(&temp,&now,new_loop) ;
    }
    else
    { cloog_domain_free(domain) ;
      /* If loop->inner is no more useful, we can free it. */
      cloog_loop_free(loop->inner) ;
    }

    loop->inner = NULL ;

    if (loop->next != NULL)
      UQ = cloog_domain_union(UQ, cloog_domain_copy(loop->domain));
    else
      cloog_domain_free(UQ);

    cloog_loop_free_parts(res,1,0,0,1) ;

    res = temp ;
  }
  cloog_loop_free_parts(old,1,0,0,1) ;

  return(res) ;
}


static CloogDomain *bounding_domain(CloogDomain *dom, CloogOptions *options)
{
    if (options->sh)
	return cloog_domain_simple_convex(dom);
    else
	return cloog_domain_convex(dom);
}


/**
 * cloog_loop_merge function:
 * This function is the 'soft' version of loop_separate if we are looking for
 * a code much simpler (and less efficicient).  This function returns the new
 * CloogLoop list.
 * - October 29th 2001: first version.
 * - July 3rd->11th 2003: memory leaks hunt and correction.
 * - June      22nd 2005: Adaptation for GMP.
 */
CloogLoop *cloog_loop_merge(CloogLoop *loop, int level, CloogOptions *options)
{
    CloogLoop *res, *new_inner, *old;
    CloogDomain *new_domain, *temp;

    if (loop == NULL)
	return loop;

    if (loop->next == NULL && cloog_domain_isconvex(loop->domain))
	return loop;

    old = loop;
    temp = loop->domain;
    loop->domain = NULL;
    new_inner = loop->inner;

    for (loop = loop->next; loop; loop = loop->next) {
	temp = cloog_domain_union(temp, loop->domain);
	loop->domain = NULL;
	new_inner = cloog_loop_concat(new_inner, loop->inner);
    }

    new_domain = bounding_domain(temp, options);

    if (level > 0 && !cloog_domain_is_bounded(new_domain, level) &&
	    	     cloog_domain_is_bounded(temp, level)) {
	CloogDomain *splitter, *t2;

	cloog_domain_free(new_domain);
	splitter = cloog_domain_bound_splitter(temp, level);

	res = NULL;
	while (!cloog_domain_isconvex(splitter)) {
	    CloogDomain *first, *rest;
	    first = cloog_domain_cut_first(splitter, &rest);
	    splitter = rest;
	    t2 = cloog_domain_intersection(first, temp);
	    cloog_domain_free(first);

	    new_domain = bounding_domain(t2, options);
	    cloog_domain_free(t2);

	    if (cloog_domain_isempty(new_domain)) {
		cloog_domain_free(new_domain);
		continue;
	    }
	    res = cloog_loop_alloc(old->state, new_domain, 0, NULL,
				   NULL, cloog_loop_copy(new_inner), res);
	}

	t2 = cloog_domain_intersection(splitter, temp);
	cloog_domain_free(splitter);

	new_domain = bounding_domain(t2, options);
	cloog_domain_free(t2);

	if (cloog_domain_isempty(new_domain)) {
	    cloog_domain_free(new_domain);
	    cloog_loop_free(new_inner);
	} else
	    res = cloog_loop_alloc(old->state, new_domain, 0, NULL,
				   NULL, new_inner, res);
    } else {
	res = cloog_loop_alloc(old->state, new_domain, 0, NULL,
			       NULL, new_inner, NULL);
    }
    cloog_domain_free(temp);

    cloog_loop_free_parts(old, 0, 0, 0, 1);

    return res;
}


static int cloog_loop_count(CloogLoop *loop)
{
    int nb_loops;

    for (nb_loops = 0; loop; loop = loop->next)
	nb_loops++;

    return  nb_loops;
}


/**
 * cloog_loop_sort function:
 * Adaptation from LoopGen 0.4 by F. Quillere. This function sorts a list of
 * parameterized disjoint polyhedra, in order to not have lexicographic order
 * violation (see Quillere paper).
 * - September 16th 2005: inclusion of cloog_loop_number (October 29th 2001).
 */
CloogLoop *cloog_loop_sort(CloogLoop *loop, int level)
{
  CloogLoop *res, *now, **loop_array;
  CloogDomain **doms;
  int i, nb_loops=0, * permut ;

  /* There is no need to sort the parameter domains. */
  if (!level)
    return loop;

  /* We will need to know how many loops are in the list. */
  nb_loops = cloog_loop_count(loop);

  /* If there is only one loop, it's the end. */
  if (nb_loops == 1)
  return(loop) ;

  /* We have to allocate memory for some useful components:
   * - loop_array: the loop array,
   * - doms: the array of domains to sort,
   * - permut: will give us a possible sort (maybe not the only one).
   */
  loop_array = (CloogLoop **)malloc(nb_loops*sizeof(CloogLoop *)) ;
  doms = (CloogDomain **)malloc(nb_loops*sizeof(CloogDomain *));
  permut = (int *)malloc(nb_loops*sizeof(int)) ;

  /* We fill up the loop and domain arrays. */
  for (i=0;i<nb_loops;i++,loop=loop->next)
  { loop_array[i] = loop ;
    doms[i] = loop_array[i]->domain;
  }

  /* cloog_domain_sort will fill up permut. */
  cloog_domain_sort(doms, nb_loops, level, permut);

  /* With permut and loop_array we build the sorted list. */
  res = NULL ;
  for (i=0;i<nb_loops;i++)
  { /* To avoid pointer looping... loop_add will rebuild the list. */
    loop_array[permut[i]-1]->next = NULL ;
    cloog_loop_add(&res,&now,loop_array[permut[i]-1]) ;
  }

  free(permut) ;
  free(doms);
  free(loop_array) ;

  return res;
}


/**
 * cloog_loop_nest function:
 * This function changes the loop list in such a way that we have no more than
 * one dimension added by level. It returns an equivalent loop list with
 * this property.
 * - October 29th 2001: first version.
 * - July 3rd->11th 2003: memory leaks hunt and correction.
 * - June      22nd 2005: Adaptation for GMP.
 * - November  21th 2005: (debug) now OK when cloog_loop_restrict returns NULL.
 */
CloogLoop *cloog_loop_nest(CloogLoop *loop, CloogDomain *context, int level)
{ int l ;
  CloogLoop * p, * temp, * res, * now, * next ;
  CloogDomain * new_domain ;

  loop = cloog_loop_disjoint(loop);

  res = NULL ;
  /* Each domain is changed by its intersection with the context. */
  while (loop != NULL)
  { p = cloog_loop_restrict(loop, context);
    next = loop->next ;

    if (p != NULL)
    { cloog_loop_free_parts(loop,1,0,0,0) ;

      temp = cloog_loop_alloc(p->state, p->domain, 0, NULL,
			      p->block, p->inner, NULL);

      /* If the intersection dimension is too big, we make projections smaller
       * and smaller, and each projection includes the preceding projection
       * (thus, in the target list, dimensions are added one by one).
       */
      if (cloog_domain_dimension(p->domain) >= level)
	for (l = cloog_domain_dimension(p->domain); l >= level; l--) {
	  new_domain = cloog_domain_project(p->domain, l);
	  temp = cloog_loop_alloc(p->state, new_domain, 0, NULL,
				  NULL, temp, NULL);
	}

       /* p is no more useful (but its content yes !). */
      cloog_loop_free_parts(p,0,0,0,0) ;

      cloog_loop_add(&res,&now,temp) ;
    }
    else
    cloog_loop_free_parts(loop,1,1,1,0) ;

    loop = next ;
  }

  return(res) ;
}


/* Check if the domains of the inner loops impose a stride constraint
 * on the given level.
 * The core of the search is implemented in cloog_domain_list_stride.
 * Here, we simply construct a list of domains to pass to this function
 * and if a stride is found, we adjust the lower bounds by calling
 * cloog_domain_stride_lower_bound.
 */
static int cloog_loop_variable_offset_stride(CloogLoop *loop, int level)
{
    CloogDomainList *list = NULL;
    CloogLoop *inner;
    CloogStride *stride;

    for (inner = loop->inner; inner; inner = inner->next) {
	CloogDomainList *entry = ALLOC(CloogDomainList);
	entry->domain = cloog_domain_copy(inner->domain);
	entry->next = list;
	list = entry;
    }

    stride = cloog_domain_list_stride(list, level);

    cloog_domain_list_free(list);

    if (!stride)
	return 0;

    loop->stride = stride;
    loop->domain = cloog_domain_stride_lower_bound(loop->domain, level, stride);

    return 1;
}


/**
 * cloog_loop_stride function:
 * This function will find the stride of a loop for the iterator at the column
 * number 'level' in the constraint matrix. It will update the lower bound of
 * the iterator accordingly. Basically, the function will try to find in the
 * inner loops a common condition on this iterator for the inner loop iterators
 * to be integral. For instance, let us consider a loop with the iterator i,
 * the iteration domain -4<=i<=n, and its two inner loops with the iterator j.
 * The first inner loop has the constraint 3j=i, and the second one has the
 * constraint 6j=i. Then the common constraint on i for j to be integral is
 * i%3=0, the stride for i is 3. Lastly, we have to find the new lower bound
 * for i: the first value satisfying the common constraint: -3. At the end, the
 * iteration domain for i is -3<=i<=n and the stride for i is 3.
 *
 * The algorithm implemented in this function only allows for strides
 * on loops with a lower bound that has a constant remainder on division
 * by the stride.  Before initiating this procedure, we first check
 * if we can find a stride with a lower bound with a variable offset in
 * cloog_loop_variable_offset_stride.
 *
 * - loop is the loop including the iteration domain of the considered iterator,
 * - level is the column number of the iterator in the matrix of contraints.
 **
 * - June 29th 2003: first version (work in progress since June 26th 2003).
 * - July 14th 2003: simpler version.
 * - June 22nd 2005: Adaptation for GMP (from S. Verdoolaege's 0.12.1 version).
 */
void cloog_loop_stride(CloogLoop * loop, int level)
{ int first_search ;
  cloog_int_t stride, ref_offset, offset, potential;
  CloogLoop * inner ;

  if (!cloog_domain_can_stride(loop->domain, level))
    return;

  if (cloog_loop_variable_offset_stride(loop, level))
    return;

  cloog_int_init(stride);
  cloog_int_init(ref_offset);
  cloog_int_init(offset);
  cloog_int_init(potential);

  cloog_int_set_si(ref_offset, 0);
  cloog_int_set_si(offset, 0);

  /* Default stride. */
  cloog_int_set_si(stride, 1);
  first_search = 1 ;
  inner = loop->inner ;

  while (inner != NULL)
  { /* If the minimun stride has not been found yet, find the stride. */
    if ((first_search) || (!cloog_int_is_one(stride)))
    {
      cloog_domain_stride(inner->domain, level, &potential, &offset);
      if (!cloog_int_is_one(potential) && (!first_search))
      { /* Offsets must be the same for common stride. */
	cloog_int_gcd(stride, potential, stride);
	if (!cloog_int_is_zero(stride)) {
	    cloog_int_fdiv_r(offset, offset, stride);
	    cloog_int_fdiv_r(ref_offset, ref_offset, stride);
	}
        if (cloog_int_ne(offset,ref_offset))
	    cloog_int_set_si(stride, 1);
      }
      else {
        cloog_int_set(stride, potential);
        cloog_int_set(ref_offset, offset);
      }

      first_search = 0 ;
    }

    inner = inner->next ;
  }

  if (cloog_int_is_zero(stride))
    cloog_int_set_si(stride, 1);

  /* Update the values if necessary. */
  if (!cloog_int_is_one(stride))
  { /* Update the stride value. */
    if (!cloog_int_is_zero(offset))
      cloog_int_sub(offset, stride, offset);
    loop->stride = cloog_stride_alloc(stride, offset);
    loop->domain = cloog_domain_stride_lower_bound(loop->domain, level,
						    loop->stride);
  }

  cloog_int_clear(stride);
  cloog_int_clear(ref_offset);
  cloog_int_clear(offset);
  cloog_int_clear(potential);
}


void cloog_loop_otl(CloogLoop *loop, int level)
{
    if (cloog_domain_is_otl(loop->domain, level))
	loop->otl = 1;
}


/**
 * cloog_loop_stop function:
 * This function implements the 'stop' option : each domain of each loop
 * in the list 'loop' is replaced by 'context'. 'context' should be the
 * domain of the outer loop. By using this method, there are no more dimensions
 * to scan and the simplification step will automaticaly remove the domains
 * since they are the same as the corresponding contexts. The effect of this
 * function is to stop the code generation at the level this function is called,
 * the resulting code do not consider the next dimensions.
 * - January 11th 2005: first version.
 */
CloogLoop * cloog_loop_stop(CloogLoop * loop, CloogDomain * context)
{ if (loop == NULL)
  return NULL ;
  else
  { cloog_domain_free(loop->domain) ;
    loop->domain = cloog_domain_copy(context) ;
    loop->next = cloog_loop_stop(loop->next, context) ;
  }

  return loop ;
}


static int level_is_constant(int level, int scalar, int *scaldims, int nb_scattdims)
{
  return level && (level+scalar <= nb_scattdims) && (scaldims[level+scalar-1]);
}


/**
 * Compare the constant dimensions of loops 'l1' and 'l2' starting at 'scalar'
 * and return -1 if the vector of constant dimensions of 'l1' is smaller
 * than that of 'l2', 0 if they are the same and +1 if that of 'l1' is
 * greater than that of 'l2'.
 * This function should be called on the innermost loop (the loop
 * containing a block).
 * \param l1 Loop to be compared with l2.
 * \param l2 Loop to be compared with l1.
 * \param level Current non-scalar dimension.
 * \param scaldims Boolean array saying whether a dimension is scalar or not.
 * \param nb_scattdims Size of the scaldims array.
 * \param scalar Current scalar dimension.
 * \return -1 if (l1 < l2), 0 if (l1 == l2) and +1 if (l1 > l2)
 */
int cloog_loop_constant_cmp(CloogLoop *l1, CloogLoop *l2, int level,
	int *scaldims, int nb_scattdims, int scalar)
{
  CloogBlock *b1, *b2;
  b1 = l1->block;
  b2 = l2->block;
  while (level_is_constant(level, scalar, scaldims, nb_scattdims)) {
    int cmp = cloog_int_cmp(b1->scaldims[scalar], b2->scaldims[scalar]);
    if (cmp)
	return cmp;
    scalar++;
  }
  return 0;
}


/**
 * cloog_loop_scalar_gt function:
 * This function returns 1 if loop 'l1' is greater than loop 'l2' for the
 * scalar dimension vector that begins at dimension 'scalar', 0 otherwise. What
 * we want to know is whether a loop is scheduled before another one or not.
 * This function solves the problem when the considered dimension for scheduling
 * is a scalar dimension. Since there may be a succession of scalar dimensions,
 * this function will reason about the vector of scalar dimension that begins
 * at dimension 'level+scalar' and finish to the first non-scalar dimension.
 * \param l1 Loop to be compared with l2.
 * \param l2 Loop to be compared with l1.
 * \param level Current non-scalar dimension.
 * \param scaldims Boolean array saying whether a dimension is scalar or not.
 * \param nb_scattdims Size of the scaldims array.
 * \param scalar Current scalar dimension.
 * \return 1 if (l1 > l2), 0 otherwise.
 **
 * - September 9th 2005: first version.
 * - October  15nd 2007: now "greater than" instead of "greater or equal".
 */
int cloog_loop_scalar_gt(l1, l2, level, scaldims, nb_scattdims, scalar)
CloogLoop * l1, * l2 ;
int level, * scaldims, nb_scattdims, scalar ;
{
  return cloog_loop_constant_cmp(l1, l2, level, scaldims, nb_scattdims, scalar) > 0;
}


/**
 * cloog_loop_scalar_eq function:
 * This function returns 1 if loop 'l1' is equal to loop 'l2' for the scalar
 * dimension vector that begins at dimension 'scalar', 0 otherwise. What we want
 * to know is whether two loops are scheduled for the same time or not.
 * This function solves the problem when the considered dimension for scheduling
 * is a scalar dimension. Since there may be a succession of scalar dimensions,
 * this function will reason about the vector of scalar dimension that begins
 * at dimension 'level+scalar' and finish to the first non-scalar dimension.
 * - l1 and l2 are the loops to compare,
 * - level is the current non-scalar dimension,
 * - scaldims is the boolean array saying whether a dimension is scalar or not,
 * - nb_scattdims is the size of the scaldims array,
 * - scalar is the current scalar dimension.
 **
 * - September 9th 2005 : first version.
 */
int cloog_loop_scalar_eq(l1, l2, level, scaldims, nb_scattdims, scalar)
CloogLoop * l1, * l2 ;
int level, * scaldims, nb_scattdims, scalar ;
{
  return cloog_loop_constant_cmp(l1, l2, level, scaldims, nb_scattdims, scalar) == 0;
}


/**
 * cloog_loop_scalar_sort function:
 * This function sorts a linked list of loops (loop) with respect to the
 * scalar dimension vector that begins at dimension 'scalar'. Since there may
 * be a succession of scalar dimensions, this function will reason about the
 * vector of scalar dimension that begins at dimension 'level+scalar' and
 * finish to the first non-scalar dimension.
 * \param loop Loop list to sort.
 * \param level Current non-scalar dimension.
 * \param scaldims Boolean array saying whether a dimension is scalar or not.
 * \param nb_scattdims Size of the scaldims array.
 * \param scalar Current scalar dimension.
 * \return A pointer to the sorted list.
 **
 * - July      2nd 2005: first developments.
 * - September 2nd 2005: first version.
 * - October  15nd 2007: complete rewrite to remove bugs, now a bubble sort.
 */
CloogLoop * cloog_loop_scalar_sort(loop, level, scaldims, nb_scattdims, scalar)
CloogLoop * loop ;
int level, * scaldims, nb_scattdims, scalar ;
{ int ok ;
  CloogLoop **current;

  do {
    ok = 1;
    for (current = &loop; (*current)->next; current = &(*current)->next) {
      CloogLoop *next = (*current)->next;
      if (cloog_loop_scalar_gt(*current,next,level,scaldims,nb_scattdims,scalar)) {
        ok = 0;
	(*current)->next = next->next;
	next->next = *current;
	*current = next;
      }
    }
  } while (!ok);

  return loop ;
}


/**
 * cloog_loop_generate_backtrack function:
 * adaptation from LoopGen 0.4 by F. Quillere. This function implements the
 * backtrack of the Quillere et al. algorithm (see the Quillere paper).
 * It eliminates unused iterations of the current level for the new one. See the
 * example called linearity-1-1 example with and without this part for an idea.
 * - October 26th 2001: first version in cloog_loop_generate_general.
 * - July    31th 2002: (debug) no more parasite loops (REALLY hard !).
 * - October 30th 2005: extraction from cloog_loop_generate_general.
 */
CloogLoop *cloog_loop_generate_backtrack(CloogLoop *loop,
	int level, CloogOptions *options)
{
  CloogDomain * domain ;
  CloogLoop * now, * now2, * next, * next2, * end, * temp, * l, * inner,
            * new_loop ;

  temp = loop ;
  loop = NULL ;

  while (temp != NULL)
  { l = NULL ;
    inner = temp->inner ;

    while (inner != NULL)
    { next = inner->next ;
      /* This 'if' and its first part is the debug of july 31th 2002. */
      if (inner->block != NULL) {
        end = cloog_loop_alloc(temp->state, inner->domain, 0, NULL,
			       inner->block, NULL, NULL);
        domain = cloog_domain_copy(temp->domain) ;
        new_loop = cloog_loop_alloc(temp->state, domain, 0, NULL,
				    NULL, end, NULL);
      }
      else
      new_loop = cloog_loop_project(inner, level);

      cloog_loop_free_parts(inner,0,0,0,0) ;
      cloog_loop_add(&l,&now2,new_loop) ;
      inner = next ;
    }

    temp->inner = NULL ;

    if (l != NULL)
    { l = cloog_loop_separate(l) ;
      l = cloog_loop_sort(l, level);
      while (l != NULL) {
	l->stride = cloog_stride_copy(l->stride);
        cloog_loop_add(&loop,&now,l) ;
        l = l->next ;
      }
    }
    next2 = temp->next ;
    cloog_loop_free_parts(temp,1,0,0,0) ;
    temp = next2 ;
  }

  return loop ;
}


/**
 * Return 1 if we need to continue recursing to the specified level.
 */
int cloog_loop_more(CloogLoop *loop, int level, int scalar, int nb_scattdims)
{
  return level + scalar <= nb_scattdims ||
	 cloog_domain_dimension(loop->domain) >= level;
}

/**
 * Return 1 if the domains of all loops in the given linked list
 * have a fixed value at the given level.
 * In principle, there would be no need to check that the fixed value is
 * the same for each of these loops because this function is only
 * called on a component.  However, not all backends perform a proper
 * decomposition into components.
 */
int cloog_loop_is_constant(CloogLoop *loop, int level)
{
    cloog_int_t c1, c2;
    int r = 1;

    cloog_int_init(c1);
    cloog_int_init(c2);

    if (!cloog_domain_lazy_isconstant(loop->domain, level - 1, &c1))
	r = 0;

    for (loop = loop->next; r && loop; loop = loop->next) {
	if (!cloog_domain_lazy_isconstant(loop->domain, level - 1, &c2))
	    r = 0;
	else if (cloog_int_ne(c1, c2))
	    r = 0;
    }

    cloog_int_clear(c1);
    cloog_int_clear(c2);

    return r;
}

/**
 * Assuming all domains in the given linked list of loop
 * have a fixed values at level, return a single loop with
 * a domain corresponding to this fixed value and with as
 * list of inner loops the concatenation of all inner loops
 * in the original list.
 */
CloogLoop *cloog_loop_constant(CloogLoop *loop, int level)
{
    CloogLoop *res, *inner, *tmp;
    CloogDomain *domain, *t;

    if (!loop)
	return loop;

    inner = loop->inner;
    domain = loop->domain;
    for (tmp = loop->next; tmp; tmp = tmp->next) {
	inner = cloog_loop_concat(inner, tmp->inner);
	domain = cloog_domain_union(domain, tmp->domain);
    }

    domain = cloog_domain_simple_convex(t = domain);
    cloog_domain_free(t);

    res = cloog_loop_alloc(loop->state, domain, 0, NULL, NULL, inner, NULL);

    cloog_loop_free_parts(loop, 0, 0, 0, 1);

    return res;
}


/* Unroll the given loop at the given level, provided it is allowed
 * by cloog_domain_can_unroll.
 * If so, we return a list of loops, one for each iteration of the original
 * loop.  Otherwise, we simply return the original loop.
 */
static CloogLoop *loop_unroll(CloogLoop *loop, int level)
{
    int can_unroll;
    cloog_int_t i;
    cloog_int_t n;
    CloogConstraint *lb;
    CloogLoop *res = NULL;
    CloogLoop **next_res = &res;
    CloogDomain *domain;
    CloogLoop *inner;

    cloog_int_init(n);
    can_unroll = cloog_domain_can_unroll(loop->domain, level, &n, &lb);
    if (!can_unroll) {
	cloog_int_clear(n);
	return loop;
    }

    cloog_int_init(i);

    for (cloog_int_set_si(i, 0); cloog_int_lt(i, n); cloog_int_add_ui(i, i, 1)) {
	domain = cloog_domain_copy(loop->domain);
	domain = cloog_domain_fixed_offset(domain, level, lb, i);
	inner = cloog_loop_copy(loop->inner);
	inner = cloog_loop_restrict_all(inner, domain);
	if (!inner) {
	    cloog_domain_free(domain);
	    continue;
	}
	*next_res = cloog_loop_alloc(loop->state, domain, 1, NULL, NULL,
					inner, NULL);
	next_res = &(*next_res)->next;
    }

    cloog_int_clear(i);
    cloog_int_clear(n);
    cloog_constraint_release(lb);

    cloog_loop_free(loop);

    return res;
}


/* Unroll all loops in the given list at the given level, provided
 * they can be unrolled.
 */
CloogLoop *cloog_loop_unroll(CloogLoop *loop, int level)
{
    CloogLoop *now, *next;
    CloogLoop *res = NULL;
    CloogLoop **next_res = &res;

    for (now = loop; now; now = next) {
	next = now->next;
	now->next = NULL;

	*next_res = loop_unroll(now, level);

	while (*next_res)
	    next_res = &(*next_res)->next;
    }

    return res;
}

CloogLoop *cloog_loop_generate_restricted_or_stop(CloogLoop *loop,
	CloogDomain *context,
	int level, int scalar, int *scaldims, int nb_scattdims,
	CloogOptions *options);

CloogLoop *cloog_loop_recurse(CloogLoop *loop,
	int level, int scalar, int *scaldims, int nb_scattdims,
	int constant, CloogOptions *options);


/**
 * Recurse on the inner loops of the given single loop.
 *
 * - loop is the loop for which we have to generate scanning code,
 * - level is the current non-scalar dimension,
 * - scalar is the current scalar dimension,
 * - scaldims is the boolean array saying whether a dimension is scalar or not,
 * - nb_scattdims is the size of the scaldims array,
 * - constant is true if the loop is known to be executed at most once
 * - options are the general code generation options.
 */
static CloogLoop *loop_recurse(CloogLoop *loop,
	int level, int scalar, int *scaldims, int nb_scattdims,
	int constant, CloogOptions *options)
{
    CloogLoop *inner, *into, *end, *next, *l, *now;
    CloogDomain *domain;

    if (level && options->strides && !constant)
      cloog_loop_stride(loop, level);

    if (!constant &&
	options->first_unroll >= 0 && level + scalar >= options->first_unroll) {
	loop = cloog_loop_unroll(loop, level);
	if (loop->next)
	    return cloog_loop_recurse(loop, level, scalar, scaldims,
					nb_scattdims, 1, options);
    }

    if (level && options->otl)
      cloog_loop_otl(loop, level);
    inner = loop->inner;
    domain = cloog_domain_copy(loop->domain);
    domain = cloog_domain_add_stride_constraint(domain, loop->stride);
    into = NULL ;
    while (inner != NULL)
    { /* 4b. -ced- recurse for each sub-list of non terminal loops. */
      if (cloog_loop_more(inner, level + 1, scalar, nb_scattdims)) {
	end = inner;
        while ((end->next != NULL) &&
               cloog_loop_more(end->next, level + 1, scalar, nb_scattdims))
        end = end->next ;

	next = end->next ;
        end->next = NULL ;

        l = cloog_loop_generate_restricted_or_stop(inner, domain,
			level + 1, scalar, scaldims, nb_scattdims, options);

	if (l != NULL)
        cloog_loop_add_list(&into,&now,l) ;

        inner = next ;
      }
      else
      { cloog_loop_add(&into,&now,inner) ;
        inner = inner->next ;
      }
    }

    cloog_domain_free(domain);
    loop->inner = into;
    return loop;
}


/**
 * Recurse on the inner loops of each of the loops in the loop list.
 *
 * - loop is the loop list for which we have to generate scanning code,
 * - level is the current non-scalar dimension,
 * - scalar is the current scalar dimension,
 * - scaldims is the boolean array saying whether a dimension is scalar or not,
 * - nb_scattdims is the size of the scaldims array,
 * - constant is true if the loop is known to be executed at most once
 * - options are the general code generation options.
 */
CloogLoop *cloog_loop_recurse(CloogLoop *loop,
	int level, int scalar, int *scaldims, int nb_scattdims,
	int constant, CloogOptions *options)
{
    CloogLoop *now, *next;
    CloogLoop *res = NULL;
    CloogLoop **next_res = &res;

    for (now = loop; now; now = next) {
	next = now->next;
	now->next = NULL;

	*next_res = loop_recurse(now, level, scalar, scaldims, nb_scattdims,
				 constant, options);

	while (*next_res)
	    next_res = &(*next_res)->next;
    }

    return res;
}


/* Get the max across all 'first' depths for statements in this
 * stmt list, and the max across all 'last' depths */
void cloog_statement_get_fl(CloogStatement *s, int *f, int *l,
        CloogOptions *options)
{
    if (s == NULL) return;

    int fs, ls;

    if (options->fs != NULL && options->ls != NULL) {
        fs = options->fs[s->number-1];
        ls = options->ls[s->number-1];
        *f = (fs > *f)? fs: *f;
        *l = (ls > *l)? ls: *l;
    }else{
        *f = -1;
        *l = -1;
    }

    cloog_statement_get_fl(s->next, f, l, options);
}

/* Get the max across all 'first' depths for statements under
 * this loop, and the max across all 'last' depths */
void cloog_loop_get_fl(CloogLoop *loop, int *f, int *l,
        CloogOptions *options)
{
    if (loop == NULL)   return;

    CloogBlock *block = loop->block;

    if (block != NULL && block->statement != NULL) {
        cloog_statement_get_fl(block->statement, f, l, options);
    }

    cloog_loop_get_fl(loop->inner, f, l, options);
    cloog_loop_get_fl(loop->next, f, l, options);
}

/**
 * cloog_loop_generate_general function:
 * Adaptation from LoopGen 0.4 by F. Quillere. This function implements the
 * Quillere algorithm for polyhedron scanning from step 3 to 5.
 * (see the Quillere paper).
 * - loop is the loop for which we have to generate a scanning code,
 * - level is the current non-scalar dimension,
 * - scalar is the current scalar dimension,
 * - scaldims is the boolean array saying whether a dimension is scalar or not,
 * - nb_scattdims is the size of the scaldims array,
 * - options are the general code generation options.
 **
 * - October 26th 2001: first version.
 * - July 3rd->11th 2003: memory leaks hunt and correction.
 * - June      22nd 2005: Adaptation for GMP.
 * - September  2nd 2005: The function have been cutted out in two pieces:
 *                        cloog_loop_generate and this one, in order to handle
 *                        the scalar dimension case more efficiently with
 *                        cloog_loop_generate_scalar.
 * - November  15th 2005: (debug) the result of the cloog_loop_generate call may
 *                        be a list of polyhedra (especially if stop option is
 *                        used): cloog_loop_add_list instead of cloog_loop_add.
 * - May 31, 2012: statement-wise first and last depth for loop separation
 *   if options->fs and option->ls arrays are set, it will override what has
 *   been supplied via "-f/-l"
 *
 */
CloogLoop *cloog_loop_generate_general(CloogLoop *loop,
	int level, int scalar, int *scaldims, int nb_scattdims,
	CloogOptions *options)
{
  CloogLoop *res, *now, *temp, *l, *new_loop, *next;
  int separate = 0;
  int constant = 0;

    int first = -1;
    int last = -1;

    now = NULL;

    /* Get the -f and -l for each statement */
    cloog_loop_get_fl(loop, &first, &last, options);

    /* If stmt-wise options are not set or set inconsistently, use -f/-l ones globally */
    if (first <= 0 || last < first) {
        first = options->f;
        last = options->l;
    }

  /* 3. Separate all projections into disjoint polyhedra. */
  if (level > 0 && cloog_loop_is_constant(loop, level)) {
    res = cloog_loop_constant(loop, level);
    constant = 1;
    }else if ((first > level+scalar) || (first < 0)) {
    res = cloog_loop_merge(loop, level, options);
    }else{
    res = cloog_loop_separate(loop);
    separate = 1;
  }

  /* 3b. -correction- sort the loops to determine their textual order. */
  res = cloog_loop_sort(res, level);

  res = cloog_loop_restrict_inner(res);

  if (separate)
    res = cloog_loop_specialize(res, level, scalar, scaldims, nb_scattdims);

  /* 4. Recurse for each loop with the current domain as context. */
  temp = res ;
  res = NULL ;
    if (!level || (level+scalar < last) || (last < 0))
    res = cloog_loop_recurse(temp, level, scalar, scaldims, nb_scattdims,
			     constant, options);
  else
  while (temp != NULL)
  { next = temp->next ;
    l = cloog_loop_nest(temp->inner, temp->domain, level+1);
    new_loop = cloog_loop_alloc(temp->state, temp->domain, 0, NULL,
				NULL, l, NULL);
    temp->inner = NULL ;
    temp->next = NULL ;
    cloog_loop_free_parts(temp,0,0,0,0) ;
    cloog_loop_add(&res,&now,new_loop) ;
    temp = next ;
  }

  if (options->strides)
    res = cloog_loop_propagate_lower_bound(res, level);

  /* 5. eliminate unused iterations of the current level for the new one. See
   *    the example called linearity-1-1 example with and without this part
   *    for an idea.
   */
  if (options->backtrack && level &&
            ((level+scalar < last) || (last < 0)) &&
            ((first <= level+scalar) && !(first < 0)))
  res = cloog_loop_generate_backtrack(res, level, options);

  /* Pray for my new paper to be accepted somewhere since the following stuff
   * is really amazing :-) !
   * Far long later: The paper has been accepted to PACT 2004 :-))). But there
   * are still some bugs and I have no time to fix them. Thus now you have to
   * pray for me to get an academic position for that really amazing stuff :-) !
   * Later again: OK, I get my academic position, but still I have not enough
   * time to fix and clean this part... Pray again :-) !!!
   */
  /* res = cloog_loop_unisolate(res,level) ;*/

  return(res) ;
}


CloogLoop *cloog_loop_generate_restricted(CloogLoop *loop,
	int level, int scalar, int *scaldims, int nb_scattdims,
	CloogOptions *options);


/**
 * cloog_loop_generate_scalar function:
 * This function applies the simplified code generation scheme in the trivial
 * case of scalar dimensions. When dealing with scalar dimensions, there is
 * no need of costly polyhedral operations for separation or sorting: sorting
 * is a question of comparing scalar vectors and separation amounts to consider
 * only loops with the same scalar vector for the next step of the code
 * generation process. This function achieves the separation/sorting process
 * for the vector of scalar dimension that begins at dimension 'level+scalar'
 * and finish to the first non-scalar dimension.
 * - loop is the loop for which we have to generate a scanning code,
 * - level is the current non-scalar dimension,
 * - scalar is the current scalar dimension,
 * - scaldims is the boolean array saying whether a dimension is scalar or not,
 * - nb_scattdims is the size of the scaldims array,
 * - options are the general code generation options.
 **
 * - September  2nd 2005: First version.
 */
CloogLoop *cloog_loop_generate_scalar(CloogLoop *loop,
	int level, int scalar, int *scaldims, int nb_scattdims,
	CloogOptions *options)
{ CloogLoop * res, * now, * temp, * l, * end, * next, * ref ;
  int scalar_new;

  /* We sort the loop list with respect to the current scalar vector. */
  res = cloog_loop_scalar_sort(loop,level,scaldims,nb_scattdims,scalar) ;

  scalar_new = scalar + scaldims[level + scalar - 1];

  temp = res ;
  res = NULL ;
  while (temp != NULL)
  { /* Then we will appy the general code generation process to each sub-list
     * of loops with the same scalar vector.
     */
    end = temp ;
    ref = temp ;

    while((end->next != NULL) &&
	 cloog_loop_more(end->next, level, scalar_new, nb_scattdims) &&
         cloog_loop_scalar_eq(ref,end->next,level,scaldims,nb_scattdims,scalar))
    end = end->next ;

    next = end->next ;
    end->next = NULL ;

    /* For the next dimension, scalar value is updated by adding the scalar
     * vector size, which is stored at scaldims[level+scalar-1].
     */
    if (cloog_loop_more(temp, level, scalar_new, nb_scattdims)) {
      l = cloog_loop_generate_restricted(temp, level, scalar_new,
				      scaldims, nb_scattdims, options);

      if (l != NULL)
	cloog_loop_add_list(&res, &now, l);
    } else
      cloog_loop_add(&res, &now, temp);

    temp = next ;
  }

  return res ;
}


/* Compare loop with the next loop based on their constant dimensions.
 * The result is < 0, == 0 or > 0 depending on whether the constant
 * dimensions of loop are lexicographically smaller, equal or greater
 * than those of loop->next.
 * If loop is the last in the list, then it is assumed to be smaller
 * than the "next" one.
 */
static int cloog_loop_next_scal_cmp(CloogLoop *loop)
{
    int i;
    int nb_scaldims;

    if (!loop->next)
	return -1;

    nb_scaldims = loop->block->nb_scaldims;
    if (loop->next->block->nb_scaldims < nb_scaldims)
	nb_scaldims = loop->next->block->nb_scaldims;

    for (i = 0; i < nb_scaldims; ++i) {
	int cmp = cloog_int_cmp(loop->block->scaldims[i],
				loop->next->block->scaldims[i]);
	if (cmp)
	    return cmp;
    }
    return loop->block->nb_scaldims - loop->next->block->nb_scaldims;
}


/* Check whether the globally constant dimensions of a and b
 * have the same value for all globally constant dimensions
 * that are situated before any (locally) non-constant dimension.
 */
static int cloog_loop_equal_prefix(CloogLoop *a, CloogLoop *b,
				    int *scaldims, int nb_scattdims)
{
    int i;
    int cst = 0;
    int dim = 0;

    for (i = 0; i < nb_scattdims; ++i) {
	if (!scaldims[i]) {
	    dim++;
	    continue;
	}
	if (!cloog_int_eq(a->block->scaldims[cst], b->block->scaldims[cst]))
	    break;
	cst++;
    }
    for (i = i + 1; i < nb_scattdims; ++i) {
	if (scaldims[i])
	    continue;
	if (!cloog_domain_lazy_isconstant(a->domain, dim, NULL))
	    return 0;
	/* No need to check that dim is also constant in b and that the
	 * constant values are equal.  That will happen during the check
	 * whether the two domains are equal.
	 */
	dim++;
    }
    return 1;
}


/* Try to block adjacent loops in the loop list "loop".
 * We only attempt blocking if the constant dimensions of the loops
 * in the least are (not necessarily strictly) increasing.
 * Then we look for a sublist such that the first (begin) has constant
 * dimensions strictly larger than the previous loop in the complete
 * list and such that the loop (end) after the last loop in the sublist
 * has constant dimensions strictly larger than the last loop in the sublist.
 * Furthermore, all loops in the sublist should have the same domain
 * (with globally constant dimensions removed) and the difference
 * (if any) in constant dimensions may only occur after all the
 * (locally) constant dimensions.
 * If we find such a sublist, then the blocks of all but the first
 * are merged into the block of the first.
 *
 * Note that this function can only be called before the global
 * blocklist has been created because it may otherwise modify and destroy
 * elements on that list.
 */
CloogLoop *cloog_loop_block(CloogLoop *loop, int *scaldims, int nb_scattdims)
{
    CloogLoop *begin, *end, *l;
    int begin_after_previous;
    int end_after_previous;

    if (!loop->next)
	return loop;
    for (begin = loop; begin; begin = begin->next) {
	if (!begin->block || !begin->block->scaldims)
	    return loop;
	if (cloog_loop_next_scal_cmp(begin) > 0)
	    return loop;
    }

    begin_after_previous = 1;
    for (begin = loop; begin; begin = begin->next) {
	if (!begin_after_previous) {
	    begin_after_previous = cloog_loop_next_scal_cmp(begin) < 0;
	    continue;
	}

	end_after_previous = cloog_loop_next_scal_cmp(begin) < 0;
	for (end = begin->next; end; end = end->next) {
	    if (!cloog_loop_equal_prefix(begin, end, scaldims, nb_scattdims))
		break;
	    if (!cloog_domain_lazy_equal(begin->domain, end->domain))
		break;
	    end_after_previous = cloog_loop_next_scal_cmp(end) < 0;
	}
	if (end != begin->next && end_after_previous) {
	    for (l = begin->next; l != end; l = begin->next) {
		cloog_block_merge(begin->block, l->block);
		begin->next = l->next;
		cloog_loop_free_parts(l, 1, 0, 1, 0);
	    }
	}

	begin_after_previous = cloog_loop_next_scal_cmp(begin) < 0;
    }

    return loop;
}


/**
 * Check whether for any fixed iteration of the outer loops,
 * there is an iteration of loop1 that is lexicographically greater
 * than an iteration of loop2.
 * Return 1 if there exists (or may exist) such a pair.
 * Return 0 if all iterations of loop1 are lexicographically smaller
 * than the iterations of loop2.
 * If no iteration is lexicographically greater, but if there are
 * iterations that are equal to iterations of loop2, then return "def".
 * This is useful for ensuring that such statements are not reordered.
 * Some users, including the test_run target in test, expect
 * the statements at a given point to be run in the original order.
 * Passing the value "0" for "def" would allow such statements to be reordered
 * and would allow for the detection of more components.
 */
int cloog_loop_follows(CloogLoop *loop1, CloogLoop *loop2,
	int level, int scalar, int *scaldims, int nb_scattdims, int def)
{
    int dim1, dim2;

    dim1 = cloog_domain_dimension(loop1->domain);
    dim2 = cloog_domain_dimension(loop2->domain);
    while ((level <= dim1 && level <= dim2) ||
	   level_is_constant(level, scalar, scaldims, nb_scattdims)) {
	if (level_is_constant(level, scalar, scaldims, nb_scattdims)) {
	    int cmp = cloog_loop_constant_cmp(loop1, loop2, level, scaldims,
					    nb_scattdims, scalar);
	    if (cmp > 0)
		return 1;
	    if (cmp < 0)
		return 0;
	    scalar += scaldims[level + scalar - 1];
	} else {
	    int follows = cloog_domain_follows(loop1->domain, loop2->domain,
						level);
	    if (follows > 0)
		return 1;
	    if (follows < 0)
		return 0;
	    level++;
	}
    }

    return def;
}


/* Structure for representing the nodes in the graph being traversed
 * using Tarjan's algorithm.
 * index represents the order in which nodes are visited.
 * min_index is the index of the root of a (sub)component.
 * on_stack indicates whether the node is currently on the stack.
 */
struct cloog_loop_sort_node {
    int index;
    int min_index;
    int on_stack;
};
/* Structure for representing the graph being traversed
 * using Tarjan's algorithm.
 * len is the number of nodes
 * node is an array of nodes
 * stack contains the nodes on the path from the root to the current node
 * sp is the stack pointer
 * index is the index of the last node visited
 * order contains the elements of the components separated by -1
 * op represents the current position in order
 */
struct cloog_loop_sort {
    int len;
    struct cloog_loop_sort_node *node;
    int *stack;
    int sp;
    int index;
    int *order;
    int op;
};

/* Allocate and initialize cloog_loop_sort structure.
 */
static struct cloog_loop_sort *cloog_loop_sort_alloc(int len)
{
    struct cloog_loop_sort *s;
    int i;

    s = (struct cloog_loop_sort *)malloc(sizeof(struct cloog_loop_sort));
    assert(s);
    s->len = len;
    s->node = (struct cloog_loop_sort_node *)
			malloc(len * sizeof(struct cloog_loop_sort_node));
    assert(s->node);
    for (i = 0; i < len; ++i)
	s->node[i].index = -1;
    s->stack = (int *)malloc(len * sizeof(int));
    assert(s->stack);
    s->order = (int *)malloc(2 * len * sizeof(int));
    assert(s->order);

    s->sp = 0;
    s->index = 0;
    s->op = 0;

    return s;
}

/* Free cloog_loop_sort structure.
 */
static void cloog_loop_sort_free(struct cloog_loop_sort *s)
{
    free(s->node);
    free(s->stack);
    free(s->order);
    free(s);
}


/* Check whether for any fixed iteration of the outer loops,
 * there is an iteration of loop1 that is lexicographically greater
 * than an iteration of loop2, where the iteration domains are
 * available in the inner loops of the arguments.
 *
 * By using this functions to detect components, we ensure that
 * two CloogLoops appear in the same component if some iterations of
 * each loop should be executed before some iterations of the other loop.
 * Since we also want two CloogLoops that have exactly the same
 * iteration domain at the current level to be placed in the same component,
 * we first check if these domains are indeed the same.
 */
static int inner_loop_follows(CloogLoop *loop1, CloogLoop *loop2,
	int level, int scalar, int *scaldims, int nb_scattdims, int def)
{
    int f;

    f = cloog_domain_lazy_equal(loop1->domain, loop2->domain);
    if (!f)
	f = cloog_loop_follows(loop1->inner, loop2->inner,
				level, scalar, scaldims, nb_scattdims, def);

    return f;
}


/* Perform Tarjan's algorithm for computing the strongly connected components
 * in the graph with the individual CloogLoops as vertices.
 * Two CloopLoops appear in the same component if they both (indirectly)
 * "follow" each other, where the following relation is determined
 * by the follows function.
 */
static void cloog_loop_components_tarjan(struct cloog_loop_sort *s,
	CloogLoop **loop_array, int i, int level, int scalar, int *scaldims,
	int nb_scattdims,
	int (*follows)(CloogLoop *loop1, CloogLoop *loop2,
	    int level, int scalar, int *scaldims, int nb_scattdims, int def))
{
    int j;

    s->node[i].index = s->index;
    s->node[i].min_index = s->index;
    s->node[i].on_stack = 1;
    s->index++;
    s->stack[s->sp++] = i;

    for (j = s->len - 1; j >= 0; --j) {
	int f;

	if (j == i)
	    continue;
	if (s->node[j].index >= 0 &&
		(!s->node[j].on_stack ||
		 s->node[j].index > s->node[i].min_index))
	    continue;

	f = follows(loop_array[i], loop_array[j],
				level, scalar, scaldims, nb_scattdims, i > j);
	if (!f)
	    continue;

	if (s->node[j].index < 0) {
	    cloog_loop_components_tarjan(s, loop_array, j, level, scalar,
					 scaldims, nb_scattdims, follows);
	    if (s->node[j].min_index < s->node[i].min_index)
		s->node[i].min_index = s->node[j].min_index;
	} else if (s->node[j].index < s->node[i].min_index)
		s->node[i].min_index = s->node[j].index;
    }

    if (s->node[i].index != s->node[i].min_index)
	return;

    do {
	j = s->stack[--s->sp];
	s->node[j].on_stack = 0;
	s->order[s->op++] = j;
    } while (j != i);
    s->order[s->op++] = -1;
}


static int qsort_index_cmp(const void *p1, const void *p2)
{
    return *(int *)p1 - *(int *)p2;
}

/* Sort the elements of the component starting at list.
 * The list is terminated by a -1.
 */
static void sort_component(int *list)
{
    int len;

    for (len = 0; list[len] != -1; ++len)
	;

    qsort(list, len, sizeof(int), qsort_index_cmp);
}

/* Given an array of indices "list" into the "loop_array" array,
 * terminated by -1, construct a linked list of the corresponding
 * entries and put the result in *res.
 * The value returned is the number of CloogLoops in the (linked) list
 */
static int extract_component(CloogLoop **loop_array, int *list, CloogLoop **res)
{
    int i = 0;

    sort_component(list);
    while (list[i] != -1) {
	*res = loop_array[list[i]];
	res = &(*res)->next;
	++i;
    }
    *res = NULL;

    return i;
}


/**
 * Call cloog_loop_generate_scalar or cloog_loop_generate_general
 * on each of the strongly connected components in the list of CloogLoops
 * pointed to by "loop".
 *
 * We use Tarjan's algorithm to find the strongly connected components.
 * Note that this algorithm also topologically sorts the components.
 *
 * The components are treated separately to avoid spurious separations.
 * The concatentation of the results may contain successive loops
 * with the same bounds, so we try to combine such loops.
 */
CloogLoop *cloog_loop_generate_components(CloogLoop *loop,
	int level, int scalar, int *scaldims, int nb_scattdims,
	CloogOptions *options)
{
    int i, nb_loops;
    CloogLoop *tmp;
    CloogLoop *res, **res_next;
    CloogLoop **loop_array;
    struct cloog_loop_sort *s;

    if (level == 0 || !loop->next)
	return cloog_loop_generate_general(loop, level, scalar,
					     scaldims, nb_scattdims, options);

    nb_loops = cloog_loop_count(loop);

    loop_array = (CloogLoop **)malloc(nb_loops * sizeof(CloogLoop *));
    assert(loop_array);

    for (i = 0, tmp = loop; i < nb_loops; i++, tmp = tmp->next)
	loop_array[i] = tmp;

    s = cloog_loop_sort_alloc(nb_loops);
    for (i = nb_loops - 1; i >= 0; --i) {
	if (s->node[i].index >= 0)
	    continue;
	cloog_loop_components_tarjan(s, loop_array, i, level, scalar, scaldims,
					nb_scattdims, &inner_loop_follows);
    }

    i = 0;
    res = NULL;
    res_next = &res;
    while (nb_loops) {
	int n = extract_component(loop_array, &s->order[i], &tmp);
	i += n + 1;
	nb_loops -= n;
	*res_next = cloog_loop_generate_general(tmp, level, scalar,
					     scaldims, nb_scattdims, options);
    	while (*res_next)
	    res_next = &(*res_next)->next;
    }

    cloog_loop_sort_free(s);

    free(loop_array);

    res = cloog_loop_combine(res);

    return res;
}


/* For each loop in the list "loop", decompose the list of
 * inner loops into strongly connected components and put
 * the components into separate loops at the top level.
 */
CloogLoop *cloog_loop_decompose_inner(CloogLoop *loop,
	int level, int scalar, int *scaldims, int nb_scattdims)
{
    CloogLoop *l, *tmp;
    CloogLoop **loop_array;
    int i, n_loops, max_loops = 0;
    struct cloog_loop_sort *s;

    for (l = loop; l; l = l->next) {
	n_loops = cloog_loop_count(l->inner);
	if (max_loops < n_loops)
	    max_loops = n_loops;
    }

    if (max_loops <= 1)
	return loop;

    loop_array = (CloogLoop **)malloc(max_loops * sizeof(CloogLoop *));
    assert(loop_array);

    for (l = loop; l; l = l->next) {
	int n;

	for (i = 0, tmp = l->inner; tmp; i++, tmp = tmp->next)
	    loop_array[i] = tmp;
	n_loops = i;
	if (n_loops <= 1)
	    continue;

	s = cloog_loop_sort_alloc(n_loops);
	for (i = n_loops - 1; i >= 0; --i) {
	    if (s->node[i].index >= 0)
		continue;
	    cloog_loop_components_tarjan(s, loop_array, i, level, scalar,
				scaldims, nb_scattdims, &cloog_loop_follows);
	}

	n = extract_component(loop_array, s->order, &l->inner);
	n_loops -= n;
	i = n + 1;
	while (n_loops) {
	    CloogLoop *inner;

	    n = extract_component(loop_array, &s->order[i], &inner);
	    n_loops -= n;
	    i += n + 1;
	    tmp = cloog_loop_alloc(l->state, cloog_domain_copy(l->domain),
			l->otl, l->stride, l->block, inner, l->next);
	    l->next = tmp;
	    l = tmp;
	}

	cloog_loop_sort_free(s);
    }

    free(loop_array);

    return loop;
}


CloogLoop *cloog_loop_generate_restricted(CloogLoop *loop,
	int level, int scalar, int *scaldims, int nb_scattdims,
	CloogOptions *options)
{
  /* To save both time and memory, we switch here depending on whether the
   * current dimension is scalar (simplified processing) or not (general
   * processing).
   */
  if (level_is_constant(level, scalar, scaldims, nb_scattdims))
    return cloog_loop_generate_scalar(loop, level, scalar,
                                   scaldims, nb_scattdims, options);
  /*
   * 2. Compute the projection of each polyhedron onto the outermost
   *    loop variable and the parameters.
   */
  loop = cloog_loop_project_all(loop, level);

  return cloog_loop_generate_components(loop, level, scalar, scaldims,
					nb_scattdims, options);
}


CloogLoop *cloog_loop_generate_restricted_or_stop(CloogLoop *loop,
	CloogDomain *context,
	int level, int scalar, int *scaldims, int nb_scattdims,
	CloogOptions *options)
{
  /* If the user asked to stop code generation at this level, let's stop. */
  if ((options->stop >= 0) && (level+scalar >= options->stop+1))
    return cloog_loop_stop(loop,context) ;

  return cloog_loop_generate_restricted(loop, level, scalar, scaldims,
  				nb_scattdims, options);
}


/**
 * cloog_loop_generate function:
 * Adaptation from LoopGen 0.4 by F. Quillere. This function implements the
 * Quillere algorithm for polyhedron scanning from step 1 to 2.
 * (see the Quillere paper).
 * - loop is the loop for which we have to generate a scanning code,
 * - context is the context of the current loop (constraints on parameter and/or
 *   on outer loop counters),
 * - level is the current non-scalar dimension,
 * - scalar is the current scalar dimension,
 * - scaldims is the boolean array saying whether a dimension is scalar or not,
 * - nb_scattdims is the size of the scaldims array,
 * - options are the general code generation options.
 **
 * - October 26th 2001: first version.
 * - July 3rd->11th 2003: memory leaks hunt and correction.
 * - June      15th 2005: a memory leak fixed (loop was not entirely freed when
 *                        the result of cloog_loop_restrict was NULL).
 * - June      22nd 2005: Adaptation for GMP.
 * - September  2nd 2005: The function have been cutted out in two pieces:
 *                        cloog_loop_generate and this one, in order to handle
 *                        the scalar dimension case more efficiently with
 *                        cloog_loop_generate_scalar.
 * - November  15th 2005: (debug) Condition for stop option no more take care of
 *                        further scalar dimensions.
 */
CloogLoop *cloog_loop_generate(CloogLoop *loop, CloogDomain *context,
	int level, int scalar, int *scaldims, int nb_scattdims,
	CloogOptions *options)
{
  /* 1. Replace each polyhedron by its intersection with the context.
   */
  loop = cloog_loop_restrict_all(loop, context);
  if (!loop)
    return NULL;

  return cloog_loop_generate_restricted_or_stop(loop, context,
			      level, scalar, scaldims, nb_scattdims, options);
}


/*
 * Internal function for simplifying a single loop in a list of loops.
 * See cloog_loop_simplify.
 */
static CloogLoop *loop_simplify(CloogLoop *loop, CloogDomain *context,
	int level, int nb_scattdims, CloogOptions *options)
{
  int domain_dim;
  CloogBlock * new_block ;
  CloogLoop *simplified, *inner;
  CloogDomain * domain, * simp, * inter, * extended_context ;

  domain = loop->domain ;

  domain_dim = cloog_domain_dimension(domain);
  extended_context = cloog_domain_extend(context, domain_dim);
  inter = cloog_domain_intersection(domain,extended_context) ;
  simp = cloog_domain_simplify(domain, extended_context);
  cloog_domain_free(extended_context) ;

  /* If the constraint system is never true, go to the next one. */
  if (cloog_domain_never_integral(simp)) {
    cloog_loop_free(loop->inner);
    cloog_domain_free(inter);
    cloog_domain_free(simp);
    return NULL;
  }

  inner = cloog_loop_simplify(loop->inner, inter, level+1, nb_scattdims,
                              options);

  if ((inner == NULL) && (loop->block == NULL)) {
    cloog_domain_free(inter);
    cloog_domain_free(simp);
    return NULL;
  }

  new_block = cloog_block_copy(loop->block) ;

  simplified = cloog_loop_alloc(loop->state, simp, loop->otl, loop->stride,
				new_block, inner, NULL);

  if (options->save_domains) {
    inter = cloog_domain_add_stride_constraint(inter, loop->stride);
    if (domain_dim > nb_scattdims) {
      CloogDomain *t;
      inter = cloog_domain_project(t = inter, nb_scattdims);
      cloog_domain_free(t);
    }
    simplified->unsimplified = inter;
  } else
    cloog_domain_free(inter);

  return(simplified) ;
}


/**
 * cloog_loop_simplify function:
 * This function implements the part 6. of the Quillere algorithm, it
 * recursively simplifies each loop in the context of the preceding loop domain.
 * It returns a pointer to the simplified loop list.
 * The cloog_domain_simplify (DomainSimplify) behaviour is really bad with
 * polyhedra union and some really awful sidesteppings were written, I plan
 * to solve that...
 * - October   31th 2001: first version.
 * - July 3rd->11th 2003: memory leaks hunt and correction.
 * - April     16th 2005: a memory leak fixed (extended_context was not freed).
 * - June      15th 2005: a memory leak fixed (loop was not conveniently freed
 *                        when the constraint system is never true).
 * - October   27th 2005: - this function called before cloog_loop_fast_simplify
 *                          is now the official cloog_loop_simplify function in
 *                          replacement of a slower and more complex one (after
 *                          deep changes in the pretty printer).
 *                        - we use cloog_loop_disjoint to fix the problem when
 *                          simplifying gives a union of polyhedra (before, it
 *                          was under the responsibility of the pretty printer).
 */
CloogLoop *cloog_loop_simplify(CloogLoop *loop, CloogDomain *context, int level,
	                       int nb_scattdims, CloogOptions *options)
{
  CloogLoop *now;
  CloogLoop *res = NULL;
  CloogLoop **next = &res;
  int need_split = 0;

  for (now = loop; now; now = now->next)
    if (!cloog_domain_isconvex(now->domain)) {
      now->domain = cloog_domain_simplify_union(now->domain);
      if (!cloog_domain_isconvex(now->domain))
	need_split = 1;
    }

  /* If the input of CLooG contains any union domains, then they
   * may not have been split yet at this point.  Do so now as the
   * clast construction assumes there are no union domains.
   */
  if (need_split)
    loop = cloog_loop_disjoint(loop);

  for (now = loop; now; now = now->next) {
    *next = loop_simplify(now, context, level, nb_scattdims, options);

    now->inner = NULL; /* For loop integrity. */
    cloog_domain_free(now->domain);
    now->domain = NULL;

    if (*next)
      next = &(*next)->next;
  }
  cloog_loop_free(loop);

  return res;
}


/**
 * cloog_loop_scatter function:
 * This function add the scattering (scheduling) informations in a loop.
 */
void cloog_loop_scatter(CloogLoop * loop, CloogScattering *scatt)
{
  loop->domain = cloog_domain_scatter(loop->domain, scatt);
}