A class that represents computations. More...

#include <core.h>

Inherited by tiramisu::communicator, tiramisu::constant, tiramisu::input, and tiramisu::view.

Public Member Functions
	computation (std::string iteration_domain, tiramisu::expr e, bool schedule_this_computation, tiramisu::primitive_t t, tiramisu::function *fct)
	Constructor for computations. More...

	computation (std::string name, std::vector< var > iterator_variables, tiramisu::expr e, bool schedule_this_computation)
	Constructor for computations. More...

	computation (std::vector< var > iterator_variables, tiramisu::expr e)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts. More...

	computation (std::string name, std::vector< var > iterator_variables, tiramisu::expr e)
	Constructor for computations. More...

	computation (std::vector< var > iterator_variables, tiramisu::expr e, bool schedule_this_computation)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts. More...

	computation (std::string name, std::vector< var > iterator_variables, primitive_t t)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.`t` is the type of the computation, i.e. More...

	computation (std::vector< var > iterator_variables, primitive_t t)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts. More...

virtual bool	is_send () const

virtual bool	is_recv () const

virtual bool	is_send_recv () const

virtual bool	is_wait () const

void	add_associated_let_stmt (std::string access_name, tiramisu::expr e)
	Add a let statement that is associated to this computation. More...

void	unschedule_this_computation ()
	Don't scheduled a previously scheduled computation. More...

virtual void	add_definitions (std::string iteration_domain_str, tiramisu::expr e, bool schedule_this_computation, tiramisu::primitive_t t, tiramisu::function *fct)
	Add definitions of computations that have the same name as this computation. More...

void	add_predicate (tiramisu::expr predicate)
	Add a predicate (condition) on the computation. More...

void	after (computation &comp, tiramisu::var iterator)
	Schedule this computation to run after the computation `comp`. More...

void	after (computation &comp, int level)
	This function is equivalent to void after(computation &comp, tiramisu::var iterator); except that it uses loop level numbers (0, 1, 2, ...) instead of using loop variables (tiramisu::var). More...

void	allocate_and_map_buffer_automatically (tiramisu::argument_t type=tiramisu::a_temporary)

void	apply_transformation_on_schedule (std::string map_str)
	Apply a transformation on the schedule. More...

void	between (computation &before_comp, tiramisu::var before_l, computation &after_comp, tiramisu::var after_l)
	Schedule this computation to run after `before_comp` at the loop level `before_l`, and before `after_comp` at loop level `after_l`. More...

tiramisu::buffer *	get_automatically_allocated_buffer ()
	Return the buffer that was allocated automatically using high level data mapping functions. More...

void	interchange (tiramisu::var L0, tiramisu::var L1)
	Interchange (swap) the two loop levels `L0` and `L1`. More...

void	interchange (int L0, int L1)
	Identical to void interchange(tiramisu::var L0, tiramisu::var L1);. More...

void	mark_as_let_statement ()
	Mark this statement as a let statement. More...

void	mark_as_library_call ()
	Mark this statement as a library call. More...

void	parallelize (tiramisu::var L)
	Tag the loop level `L` to be parallelized. More...

void	set_wait_access (std::string access_str)

void	set_wait_access (isl_map *access)

void	set_expression (const tiramisu::expr &e)
	Set the expression of the computation. More...

void	set_inline (bool is_inline=true)
	Sets whether the computation is inline or not, based on the value of `is_inline`. More...

const bool	is_inline_computation () const
	Returns true if and only if the computation is inline. More...

void	shift (tiramisu::var L0, int n)
	Shift the loop level `L0` of the iteration space by `n` iterations. More...

void	skew (tiramisu::var i, tiramisu::var j, int f, tiramisu::var ni, tiramisu::var nj)
	Apply loop skewing on the loop levels `i` and `j` with a skewing factor of `f`. More...

void	skew (tiramisu::var i, tiramisu::var j, tiramisu::var k, int factor, tiramisu::var ni, tiramisu::var nj, tiramisu::var nk)
	Apply loop skewing on the loop levels `i`, `j` and `k` with a skewing factor of `f`. More...

void	skew (tiramisu::var i, tiramisu::var j, tiramisu::var k, tiramisu::var l, int factor, tiramisu::var ni, tiramisu::var nj, tiramisu::var nk, tiramisu::var nl)
	Apply loop skewing on the loop levels `i`, `j`, `k`, `l` with a skewing factor of `f`. More...

void	skew (tiramisu::var i, tiramisu::var j, int factor)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts. More...

void	skew (tiramisu::var i, tiramisu::var j, tiramisu::var k, int factor)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts. More...

void	skew (tiramisu::var i, tiramisu::var j, tiramisu::var k, tiramisu::var l, int factor)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts. More...

void	skew (int i, int j, int factor)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts. More...

void	skew (int i, int j, int k, int factor)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts. More...

void	skew (int i, int j, int k, int l, int factor)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts. More...

void	split (int L0, int sizeX)
	Identical to void split(tiramisu::var L0, int sizeX);. More...

void	storage_fold (tiramisu::var dim, int f)
	Fold the storage of the computation. More...

tiramisu::computation *	store_at (tiramisu::computation &comp, tiramisu::var L0)
	Allocate the storage of this computation in the loop level `L0`. More...

void	tag_parallel_level (tiramisu::var L)
	Tag the loop level `L` to be parallelized. More...

void	tag_parallel_level (int L)
	Identical to void tag_parallel_level(int L);. More...

void	tag_vector_level (tiramisu::var L, int len)
	Tag the loop level `L` to be vectorized. More...

void	tag_vector_level (int L, int len)
	Identical to void tag_vector_level(tiramisu::var L, int len);. More...

void	tag_unroll_level (tiramisu::var L)
	Tag the loop level `L` to be unrolled. More...

void	tag_unroll_level (int L)
	Identical to void tag_unroll_level(tiramisu::var L);. More...

template<typename... Args>
tiramisu::expr	operator() (Args...args)
	Access operator: C0(i,j) represents an access to the element (i,j) of the computation C0. More...

	operator expr ()


void	after_low_level (computation &comp, int level)
	Schedule this computation to run after the computation `comp`. More...

void	after_low_level (computation &comp, std::vector< int > levels)
	Schedule this computation to run after the computation `comp`. More...


void	before (computation &consumer, tiramisu::var L)
	Schedule this computation to run before the computation `consumer` at the loop level `L`. More...


void	store_in (buffer *buff)
	Store this computation in `buff`. More...

void	store_in (buffer *buff, std::vector< expr > iterators)
	Store this computation in `buff`. More...

void	compute_at (computation &consumer, tiramisu::var L)
	This function assumes that `consumer` consumes values produced by this computation (which is the producer). More...

void	compute_at (computation &consumer, int L)
	Store this computation in `buff`. More...

int	compute_maximal_AST_depth ()
	Generates the time-space domain and construct an AST that scans that time-space domain, then compute the depth of this AST. More...

void	dump_iteration_domain () const
	Dump the iteration domain of the computation. More...

void	dump_schedule () const
	Dump the schedule of the computation. More...

void	dump () const
	Dump the computation on stdout. More...

void	fuse_after (tiramisu::var lev, computation &comp)
	Fuse this computation with the computation passed as argument in the same loop. More...

void	gen_time_space_domain ()
	Generate the time-space domain of the computation. More...

void	drop_rank_iter (var level)
	Specify that the rank loop iterator should be removed from linearization. More...

tiramisu::primitive_t	get_data_type () const
	Get the data type of the computation. More...

const tiramisu::expr &	get_expr () const
	Return the Tiramisu expression associated with the computation. More...

isl_set *	get_iteration_domain () const
	Return the iteration domain of the computation. More...

tiramisu::computation &	get_last_update ()
	Get the last update of a computation. More...

int	get_loop_level_number_from_dimension_name (std::string dim_name)
	Search the time-space domain (the range of the schedule) and return the loop level number that correspond to the dimension named `dim`. More...

const std::string &	get_name () const
	Return the name of the computation. More...

computation *	get_predecessor ()
	Returns a pointer to the computation scheduled immediately before this computation, or a null pointer if none exist. More...

tiramisu::computation &	get_update (int index)
	Returns the `index` update that has been added to this computation such that: More...

isl_map *	get_schedule () const
	Get the schedule of the computation. More...

void	gpu_tile (tiramisu::var L0, tiramisu::var L1, int sizeX, int sizeY)
	Tile the computation and then tag the outermost tile dimension to be mapped to GPU blocks and tag the innermost tile dimensions to be mapped to GPU threads. More...

void	gpu_tile (tiramisu::var L0, tiramisu::var L1, int sizeX, int sizeY, tiramisu::var L0_outer, tiramisu::var L1_outer, tiramisu::var L0_inner, tiramisu::var L1_inner)
	Store this computation in `buff`. More...

void	gpu_tile (tiramisu::var L0, tiramisu::var L1, tiramisu::var L2, int sizeX, int sizeY, int sizeZ)
	Store this computation in `buff`. More...

void	gpu_tile (tiramisu::var L0, tiramisu::var L1, tiramisu::var L2, int sizeX, int sizeY, int sizeZ, tiramisu::var L0_outer, tiramisu::var L1_outer, tiramisu::var L2_outer, tiramisu::var L0_inner, tiramisu::var L1_inner, tiramisu::var L2_inner)
	Store this computation in `buff`. More...


void	set_access (std::string access_str)
	Set the access relation of the computation. More...

void	set_access (isl_map *access)
	Set the access relation of the computation. More...


void	set_low_level_schedule (isl_map *map)
	Set the schedule indicated by `map`. More...

void	set_low_level_schedule (std::string map_str)
	Set the schedule indicated by `map`. More...


void	split (tiramisu::var L0, int sizeX)
	Split the loop level `L0` of the iteration space into two new loop levels. More...

void	split (tiramisu::var L0, int sizeX, tiramisu::var L0_outer, tiramisu::var L0_inner)
	Split the loop level `L0` of the iteration space into two new loop levels. More...


void	tag_gpu_level (tiramisu::var L0, tiramisu::var L1)
	Tag the loop level `L0` and `L1` to be mapped to GPU. More...

void	tag_gpu_level (tiramisu::var L0, tiramisu::var L1, tiramisu::var L2, tiramisu::var L3)
	Tag the loop level `L0` and `L1` to be mapped to GPU. More...

void	tag_gpu_level (tiramisu::var L0, tiramisu::var L1, tiramisu::var L2, tiramisu::var L3, tiramisu::var L4, tiramisu::var L5)
	Tag the loop level `L0` and `L1` to be mapped to GPU. More...


void	tag_distribute_level (tiramisu::var L)
	Tag the loop level `L` to be distributed. More...

void	tag_distribute_level (int L)
	Tag the loop level `L` to be distributed. More...


computation &	then (computation &next_computation, tiramisu::var L)
	Schedule this computation to run before the computation `next_computation` at the loop level `L` and return `next_computation`. More...


void	tile (tiramisu::var L0, tiramisu::var L1, int sizeX, int sizeY)
	Tile the two loop levels `L0` and `L1` with rectangular tiling. More...

void	tile (tiramisu::var L0, tiramisu::var L1, int sizeX, int sizeY, tiramisu::var L0_outer, tiramisu::var L1_outer, tiramisu::var L0_inner, tiramisu::var L1_inner)
	Tile the two loop levels `L0` and `L1` with rectangular tiling. More...

void	tile (tiramisu::var L0, tiramisu::var L1, tiramisu::var L2, int sizeX, int sizeY, int sizeZ)
	Tile the two loop levels `L0` and `L1` with rectangular tiling. More...

void	tile (tiramisu::var L0, tiramisu::var L1, tiramisu::var L2, int sizeX, int sizeY, int sizeZ, tiramisu::var L0_outer, tiramisu::var L1_outer, tiramisu::var L2_outer, tiramisu::var L0_inner, tiramisu::var L1_inner, tiramisu::var L2_inner)
	Tile the two loop levels `L0` and `L1` with rectangular tiling. More...


void	tile (int L0, int L1, int sizeX, int sizeY)
	Tile the two loop levels `L0` and `L1` with rectangular tiling. More...

void	tile (int L0, int L1, int L2, int sizeX, int sizeY, int sizeZ)
	Tile the two loop levels `L0` and `L1` with rectangular tiling. More...


void	unroll (tiramisu::var L, int fac)
	Unroll the loop level `L` with an unrolling factor `fac`. More...

void	unroll (tiramisu::var L, int fac, tiramisu::var L_outer, tiramisu::var L_inner)
	Unroll the loop level `L` with an unrolling factor `fac`. More...


void	vectorize (tiramisu::var L, int v)
	Vectorize the loop level `L`. More...

void	vectorize (tiramisu::var L, int v, tiramisu::var L_outer, tiramisu::var L_inner)
	Vectorize the loop level `L`. More...

Static Public Member Functions
static xfer	create_xfer (std::string send_iter_domain, std::string recv_iter_domain, tiramisu::expr send_dest, tiramisu::expr recv_src, xfer_prop send_prop, xfer_prop recv_prop, tiramisu::expr send_expr, tiramisu::function *fct)

static xfer	create_xfer (std::string iter_domain, xfer_prop prop, tiramisu::expr expr, tiramisu::function *fct)

Static Public Attributes
static const var	root
	root_dimension is a number used to specify the dimension level known as root. More...

static const int	root_dimension = -1
	Equivalent of computation::root but to be used with scheduling functions that take loop level (integers) as input instead of tiramisu::var. More...

Protected Member Functions
	computation ()
	Dummy constructor for derived classes. More...

std::vector< tiramisu::expr > *	compute_buffer_size ()
	Compute the size of the buffer allocated automatically to hold the results of this computation. More...

isl_ctx *	get_ctx () const
	Return the context of the computations. More...

tiramisu::expr	get_predicate ()
	Return the predicate around this computation if a predicate was added using add_predicate(). More...

const std::string	get_unique_name () const
	Return a unique name of computation; made of the following pattern: [computation name]@[computation address in memory]. More...

bool	has_multiple_definitions ()
	Return true if the computation has multiple definitions. More...

void	init_computation (std::string iteration_space_str, tiramisu::function *fct, const tiramisu::expr &e, bool schedule_this_computation, tiramisu::primitive_t t)
	Initialize a computation. More...

void	set_name (const std::string &n)
	Set the name of the computation. More...

void	full_loop_level_collapse (int level, tiramisu::expr collapse_from_iter)
	Collapse all the iterations of a loop into one single iteration. More...

	computation (std::string name, std::vector< var > iterator_variables, tiramisu::expr e, bool schedule_this_computation, primitive_t t)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts. More...


void	set_schedule (isl_map *map)
	Set the schedule indicated by `map`. More...

void	set_schedule (std::string map_str)
	Set the schedule indicated by `map`. More...

Protected Attributes
bool	_is_library_call
	True if this computation represents a library call. More...

std::string	library_call_name
	If the computation represents a library call, this is the name of the function. More...

isl_ast_expr *	wait_index_expr
	An index expression just for the request buffer. More...

Detailed Description

A class that represents computations.

A computation is an expression associated with an iteration domain. A computation indicates what needs to be computed (the expression that should be computed). A computation has three representations:

Level I: this level specifies "what" should be computed but does not specify "when" (order) and "where" (on which processor) each expression should be computed. This level also does not specify where computations should be stored in memory and in which data layout.
Level II: this level specifies "what" should be computed, "when", i.e. the order in which the computation should be executed with regard to the other computations. And "where" each computation should be computed (i.e., on which processor). This level still does not specify where computations should be stored in memory and their data layout.
Level III: this level is similar to Level 2 but it specifies where computations should be stored in memory and the data layout.

Definition at line 1320 of file core.h.

Constructor & Destructor Documentation

tiramisu::computation::computation ( )

protected

Dummy constructor for derived classes.

tiramisu::computation::computation	(	std::string	name,
		std::vector< var >	iterator_variables,
		tiramisu::expr	e,
		bool	schedule_this_computation,
		primitive_t	t
	)

protected

This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

tiramisu::computation::computation	(	std::string	iteration_domain,
		tiramisu::expr	e,
		bool	schedule_this_computation,
		tiramisu::primitive_t	t,
		tiramisu::function *	fct
	)

Constructor for computations.

iteration_domain is a string that represents the iteration domain of the computation. The iteration domain should be written in the ISL format (http://barvinok.gforge.inria.fr/barvinok.pdf Section 1.2.1).

The iteration domain of a statement is a set that contains all of the execution instances of the statement (a statement in a loop has an execution instance for each loop iteration in which it executes). Each execution instance of a statement in a loop nest is uniquely represented by an identifier and a tuple of integers (typically, the values of the outer loop iterators).

For example, the iteration space of the statement S0 in the following loop nest

for (i=0; i<2; i++)
  for (j=0; j<3; j++)
     S0;

is {S0[0,0], S0[0,1], S0[0,2], S0[1,0], S0[1,1], S0[1,2]}

S0[0,0] is the execution instance of S0 in the iteration [0,0].

The previous set of integer tuples can be compactly described by affine constraints as follows

{S0[i,j]: 0<=i<2 and 0<=j<3}

In general, the loop nest

for (i=0; i<N; i++)
  for (j=0; j<M; j++)
     S0;

has the following iteration domain

{S0[i,j]: 0<=i<N and 0<=j<M}

This should be read as: the set of points [i,j] such that 0<=i<N and 0<=j<M.

The name of the computation in the iteration domain should not start with _ (an underscore). Names starting with _ are reserved names.

e is the expression computed by the computation. It is possible to declare the computation without specifying the expression. The expression can be specified later using computation::set_expression(). An example of setting the expression after declaring the computation is presented in tests/test_04.cpp.

schedule_this_computation should be set to true if the computation is supposed to be schedule and code is supposed to be generated from the computation. Set it to false if you just want to use the computation to represent a buffer (that is passed as an argument to the function) and you do not intend to generate code for the computation. An example where this argument is set to false is presented in tests/test_14.cpp.

t is the type of the computation, i.e. the type of the expression computed by the computation. Example of types include (p_uint8, p_uint16, p_uint32, ...).

fct is a pointer to the Tiramisu function where this computation should be added.

Bound Inference: The user can declare computations without providing any constraint about the iteration domain, in this case he can rely on bound inference to infer the constraints about each iteration domain. The user needs only to provide constraints over the domains of the last computations (last consumers), and Tiramisu will propagate these constraints to all the chain of computations that precede those consumers. Note that bound inference is not possible if you have multiple definitions of the same computation. In such a case, you should provide constraints over the iteration domain when you declare the computation.

Examples about bound inference are provided in test_22 to test_25.

tiramisu::computation::computation	(	std::string	name,
		std::vector< var >	iterator_variables,
		tiramisu::expr	e,
		bool	schedule_this_computation
	)

Constructor for computations.

Same as tiramisu::computation::computation(std::string name, std::vector<var> iterator_variables, tiramisu::expr e) except that is has the following additional argument:

schedule_this_computation indicates whether this computation should to be scheduled.

tiramisu::computation::computation	(	std::vector< var >	iterator_variables,
		tiramisu::expr	e
	)

This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

tiramisu::computation::computation	(	std::string	name,
		std::vector< var >	iterator_variables,
		tiramisu::expr	e
	)

Constructor for computations.

Declare a computation. A computation in Tiramisu is the equivalent of a a statement surrounded by a loop nest in C (an example is provided later).

name is the name of the computation.

iterator_variables is a vector that represents the loop iterators around the computation.

e is the expression computed by the computation.

For example, if we have two iterator variables

var i("i", 0, 20), j("j", 0, 30);

and we have the following computation declaration

computation S("S", {i,j}, 4);

This is equivalent to writing the following C code

for (i=0; i<20; i++)
  for (j=0; j<30; j++)
     S(i,j) = 4;

More precisely, the vector {i, j} specifies the iteration domain of the computation S0. In this case, 0<=i<20 and 0<=j<30.

It is possible to declare the computation without specifying the expression. The expression can be specified later using computation::set_expression(). An example of setting the expression after declaring the computation is presented in tutorial 04. Usually this is needed for writing reductions.

tiramisu::computation::computation	(	std::vector< var >	iterator_variables,
		tiramisu::expr	e,
		bool	schedule_this_computation
	)

This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

tiramisu::computation::computation	(	std::string	name,
		std::vector< var >	iterator_variables,
		primitive_t	t
	)

inline

This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.t is the type of the computation, i.e.

the type of the expression computed by the computation. Example of types include (p_uint8, p_uint16, p_uint32, ...).

Usually, this constructor is used to declare buffer wrappers.

For example, if we have two iterator variables

var i("i", 0, 20), j("j", 0, 30);

and we have the following computation declaration

computation S("S", {i,j}, p_uint8);

This can be used a wrapper on a buffer buf[20, 30] where the buffer elements are of type uint8.

Definition at line 2676 of file core.h.

References tiramisu::expr::dtype.

tiramisu::computation::computation	(	std::vector< var >	iterator_variables,
		primitive_t	t
	)

inline

This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

Definition at line 2686 of file core.h.

References tiramisu::a_temporary, and tiramisu::expr::dtype.

Member Function Documentation

void tiramisu::computation::add_associated_let_stmt	(	std::string	access_name,
		tiramisu::expr	e
	)

Add a let statement that is associated to this computation.

The let statement will be executed before the computation (more precisely, between this computation and any computation that preceeds it). The variable defined by the let statement can be accessible by this computation alone. i.e., it cannot be used in any other computation.

virtual void tiramisu::computation::add_definitions	(	std::string	iteration_domain_str,
		tiramisu::expr	e,
		bool	schedule_this_computation,
		tiramisu::primitive_t	t,
		tiramisu::function *	fct
	)

virtual

Add definitions of computations that have the same name as this computation.

The arguments of this function are identical to the arguments of the computation constructor. In general, this function is used to express reductions and to express computation updates.

In other words, this function should be used if the user has already declared a set of computations C and wants to declare more computations that have the same name.

Example: Let's assume we want to declare the following two computations.

// First computation
{C[i]: 0<=i<10}: 0
// Second computation
{C[i]: 10<=i<20}: 1

To do this this, we can declare the first computation using the computation constructor and declare the second computation using add_definitions().

The use of add_computation is purely due to restrictions imposed by the C++ language and not by the Tiramisu framework itself. This is mainly because in C++, it is not possible to declare two objects with the same name, for example one cannot do

computation C(...); computation C(...);

In order to declare the second set of computations, we chose to use the add_definitions function to avoid this problem.

The newly added computations must have the same name and the same access function as the initial set of computations but can have a different expression.

An example of using this function is available in test_26.

Reimplemented in tiramisu::wait, tiramisu::recv, and tiramisu::send.

void tiramisu::computation::add_predicate ( tiramisu::expr predicate )

Add a predicate (condition) on the computation.

The computation will be executed only if this condition is true.

The predicate can be an affine or a non-affine expression. If you need to put a condition around a block of statements (i.e., a sequence of computations), then you can perform that by adding a predicate to each one of those computations. The compiler will then transform automatically the multiple conditions (condition around each computation) into one condition around the whole block.

void tiramisu::computation::after	(	computation &	comp,
		tiramisu::var	iterator
	)

Schedule this computation to run after the computation comp.

This computation is placed after comp in the loop level level. level is a loop level in this computation.

The root level is computation::root. The global variable computation::root is equivalent to var("root").

For example assuming we have the two computations

{S0[i,j]: 0<=i<N and 0<=j<N} and {S1[i,j]: 0<=i<N and 0<=j<N}

In order to make S1 run after S0 in the i loop, one should use

S1.after(S0, i)

which means: S1 is after S0 at the loop level i (which is loop level 0).

The corresponding code is

for (i=0; i<N; i++)
{
    for (j=0; j<N; j++)
        S0;
    for (j=0; j<N; j++)
        S1;
}

S1.after(S0, j)

means: S1 is after S0 at the loop level j (which is 1) and would yield the following code

for (i=0; i<N; i++)
  for (j=0; j<N; j++)
  {
    S0;
    S1;
  }

S1.after(S0, computation::root) means S1 is after S0 at the main program level and would yield the following code

for (i=0; i<N; i++)
  for (j=0; j<N; j++)
    S0;
for (i=0; i<N; i++)
  for (j=0; j<N; j++)
    S1;

Note that as with all other scheduling methods:

Calling this method with the same computations overwrites the level if it is higher.
A computation being scheduled after another computation at level L means it is scheduled after that computation at all levels lower than L.
There should be exactly one computation with no computation scheduled before it.
Each other computation should have exactly one computation scheduled before it.

void tiramisu::computation::after	(	computation &	comp,
		int	level
	)

This function is equivalent to void after(computation &comp, tiramisu::var iterator); except that it uses loop level numbers (0, 1, 2, ...) instead of using loop variables (tiramisu::var).

Tiramisu internally represent loop levels using numbers instead of variable names, and this is the actual function used internally.

The outermost loop level is 0. The root level is computation::root_dimension.

For example assuming we have the two computations

{S0[i,j]: 0<=i<N and 0<=j<N} and {S1[i,j]: 0<=i<N and 0<=j<N}

In order to make S1 run after S0 in the i loop, one should use

S1.after(S0,0)

which means: S1 is after S0 at the loop level 0 (which is i).

The corresponding code is

for (i=0; i<N; i++)
{
    for (j=0; j<N; j++)
        S0;
    for (j=0; j<N; j++)
        S1;
}

void tiramisu::computation::after_low_level	(	computation &	comp,
		int	level
	)

Schedule this computation to run after the computation comp.

The computations are placed after each other in the loop level level. The outermost loop level is 0. The root level is computation::root_dimension.

For example assuming we have the two computations

{S0[i,j]: 0<=i<N and 0<=j<N} and {S1[i,j]: 0<=i<N and 0<=j<N}

In order to make S1 run after S0 in the i loop, one should use

S1.after_low_level(S0,0)

which means: S1 is after S0 at the loop level 0 (which is i).

The corresponding code is

for (i=0; i<N; i++)
{
    for (j=0; j<N; j++)
        S0;
    for (j=0; j<N; j++)
        S1;
}

S1.after_low_level(S0,1)

means: S1 is after S0 at the loop level 1 (which is j) and would yield the following code

for (i=0; i<N; i++)
  for (j=0; j<N; j++)
  {
    S0;
    S1;
  }

S1.after_low_level(S0, computation::root_dimension) means S1 is after S0 at the main program level and would yield the following code

for (i=0; i<N; i++)
  for (j=0; j<N; j++)
    S0;
for (i=0; i<N; i++)
  for (j=0; j<N; j++)
    S1;

To specify that this computation is after comp in multiple levels, the user can provide those levels in the levels vector.

S1.after_low_level(S0, {0,1})

means that S1 is after S0 in the loop level 0 and in the loop level 1.

Note that

S1.after_low_level(S0, L)

would mean that S1 and S0 share the same loop nests for all the loop levels that are before L and that S1 is after S0 in L only. S1 is not after S0 in the loop levels that are before L.

void tiramisu::computation::after_low_level	(	computation &	comp,
		std::vector< int >	levels
	)

Schedule this computation to run after the computation comp.

The computations are placed after each other in the loop level level. The outermost loop level is 0. The root level is computation::root_dimension.

For example assuming we have the two computations

{S0[i,j]: 0<=i<N and 0<=j<N} and {S1[i,j]: 0<=i<N and 0<=j<N}

In order to make S1 run after S0 in the i loop, one should use

S1.after_low_level(S0,0)

which means: S1 is after S0 at the loop level 0 (which is i).

The corresponding code is

for (i=0; i<N; i++)
{
    for (j=0; j<N; j++)
        S0;
    for (j=0; j<N; j++)
        S1;
}

S1.after_low_level(S0,1)

means: S1 is after S0 at the loop level 1 (which is j) and would yield the following code

for (i=0; i<N; i++)
  for (j=0; j<N; j++)
  {
    S0;
    S1;
  }

S1.after_low_level(S0, computation::root_dimension) means S1 is after S0 at the main program level and would yield the following code

for (i=0; i<N; i++)
  for (j=0; j<N; j++)
    S0;
for (i=0; i<N; i++)
  for (j=0; j<N; j++)
    S1;

To specify that this computation is after comp in multiple levels, the user can provide those levels in the levels vector.

S1.after_low_level(S0, {0,1})

means that S1 is after S0 in the loop level 0 and in the loop level 1.

Note that

S1.after_low_level(S0, L)

would mean that S1 and S0 share the same loop nests for all the loop levels that are before L and that S1 is after S0 in L only. S1 is not after S0 in the loop levels that are before L.

void tiramisu::computation::allocate_and_map_buffer_automatically ( tiramisu::argument_t type = tiramisu::a_temporary )

void tiramisu::computation::apply_transformation_on_schedule ( std::string map_str )

Apply a transformation on the schedule.

This transformation is from the time-space domain to the time-space domain. It is applied on the range of the schedule (i.e., on the output of the schedule relation).

For example, to shift the i dimension of the time-processor domain of C0, you can apply the transformation

C0[0, 0, i, 0, j, 0] -> C0[0, 0, i+2, 0, j, 0]

To apply an interchange, you would do

C0[0, 0, i, 0, j, 0] -> C0[0, 0, j, 0, i, 0]

void tiramisu::computation::before	(	computation &	consumer,
		tiramisu::var	L
	)

Schedule this computation to run before the computation consumer at the loop level L.

Notes

The loop level L is a loop level of this computation.
Use computation::root to indicate the root dimension (i.e. the outermost time-space dimension).
Calling this method with the same computations overwrites the level if it is higher.
A computation being scheduled after another computation at level L means it is scheduled after that computation at all levels lower than L.
There should be exactly one computation with no computation scheduled before it.
Each other computation should have exactly one computation scheduled before it.

void tiramisu::computation::between	(	computation &	before_comp,
		tiramisu::var	before_l,
		computation &	after_comp,
		tiramisu::var	after_l
	)

Schedule this computation to run after before_comp at the loop level before_l, and before after_comp at loop level after_l.

The outermost loop level is 0.

Use computation::root_dimension to indicate the root dimension (i.e. the outermost time-space dimension).

If there was already a direct scheduling between before_comp and after_comp (e.g. using before, after, between...), that schedule is overwritten; i.e. it no longer exists/has an effect.

Note that as with all other scheduling methods:

Calling this method with the same computations overwrites the levels if they are higher.
A computation being scheduled after another computation at level L means it is scheduled after that computation at all levels lower than L.
There should be exactly one computation with no computation scheduled before it.
Each other computation should have exactly one computation scheduled before it.

void tiramisu::computation::compute_at	(	computation &	consumer,
		tiramisu::var	L
	)

This function assumes that consumer consumes values produced by this computation (which is the producer).

Compute this computation as needed for each unique value of the consumer.

This computation is scheduled so that the values consumed by the consumer are computed at the level L and in the same loop nest of the consumer. L is a loop level in the consumer.

If the consumer needs this computation to be computed redundantly, the function creates the necessary redundant computations and schedules them before the consumer.

This function performs the following:

schedules this computation to be executed as needed before the consumer.
if this computation needs to be computed redundantly, redundant computations are create.

This function does not:

create any data mapping to this computation. It is up to the user to provide an access relation to this computation as he would do to any other normal computation.
it does not allocate any buffer to this computation. It is up to the user to declare a buffer where the results of this computation will be stored.

If this functions creates a duplicate of the computation, the user does not need to set its access relation. The duplicated computation will automatically have the same access relation as the original computation. This access relation is set automatically.

This function does not return a handler to manipulate the duplicate computation. It does not allow the user to manipulate the duplicate freely. The duplicate is scheduled automatically to be executed before the consumer.

void tiramisu::computation::compute_at	(	computation &	consumer,
		int	L
	)

Store this computation in buff.

Let us assume that we have a computation C:

{C[i]: 0<=i<N}

and that we want to store each C(i) in bufC[i]. Then we can use store_in() to indicate that as follows:

C.store_in(&bufC)

This mans that each computation C(i) will be stored in the buffer location bufC[i].

If iterators is specified, the iterators are used to specify how the computation is mapped to the buffer. If the dimensions of this computation are in0, in1, ..., inn and if iterators are equal to im0, im1, ..., imm then the computation is mapped as follows

C[in0, in1, ..., inn]->bufC[im0, im1, ..., imm].

i.e., the computation C[in0, in1, ..., inn] is stored in bufC[im0, im1, ..., imm].

This can be used to store the data in many ways (reordering the storage, storing into modulo buffers, ...).

Assuming we have have computation D(i,j) that has the following iteration domain:

{D[i,j]: 0<=i<N and 0<=j<N}

and assuming we have a buffer bufD.

The store_in() function can be used to implement many types of data mappings:

Store the computation D to a scalar: D.store_in(&bufD, {}). This mans that D(i) will be stored in bufD[0] (which represents a scalar).
Store a 2 dimensional computation into a 1-dimensional buffer: D.store_in(&bufD, {i});
Change the order of storage. D.store_in(&bufD, {j, i}) will store D(i,j) in bufD(j,i).
Store the computation in a circular buffer (modulo storage). D.store_in(&bufD, {i%4, j%4}); This will store D(i,j) in bufD[i%4, j%4]. Assuming the buffer bufD is a 4x4 buffer.

std::vector<tiramisu::expr>* tiramisu::computation::compute_buffer_size ( )

protected

Compute the size of the buffer allocated automatically to hold the results of this computation.

int tiramisu::computation::compute_maximal_AST_depth ( )

Generates the time-space domain and construct an AST that scans that time-space domain, then compute the depth of this AST.

This is useful for example to know if all the dimensions of the time-space domain will correspond to a loop level in the final generated AST.

static xfer tiramisu::computation::create_xfer	(	std::string	send_iter_domain,
		std::string	recv_iter_domain,
		tiramisu::expr	send_dest,
		tiramisu::expr	recv_src,
		xfer_prop	send_prop,
		xfer_prop	recv_prop,
		tiramisu::expr	send_expr,
		tiramisu::function *	fct
	)

static

static xfer tiramisu::computation::create_xfer	(	std::string	iter_domain,
		xfer_prop	prop,
		tiramisu::expr	expr,
		tiramisu::function *	fct
	)

static

void tiramisu::computation::drop_rank_iter ( var level )

Specify that the rank loop iterator should be removed from linearization.

void tiramisu::computation::dump ( ) const

Dump the computation on stdout.

This is mainly useful for debugging.

void tiramisu::computation::dump_iteration_domain ( ) const

Dump the iteration domain of the computation.

This is useful for debugging.

void tiramisu::computation::dump_schedule ( ) const

Dump the schedule of the computation.

This is mainly useful for debugging.

The schedule is a relation between the iteration space and the time space. The relation provides a logical date of execution for each point in the iteration space. The schedule needs first to be set before calling this function.

void tiramisu::computation::full_loop_level_collapse	(	int	level,
		tiramisu::expr	collapse_from_iter
	)

protected

Collapse all the iterations of a loop into one single iteration.

void tiramisu::computation::fuse_after	(	tiramisu::var	lev,
		computation &	comp
	)

inline

Fuse this computation with the computation passed as argument in the same loop.

Run this computation after that computation. Fuse them at the loop level lev.

For example, assuming we have the following computations

{S0[i,j]: 0<=i<N and 0<=j<N}, {S1[i,j]: 0<=i<N and 0<=j<N} and {S2[i,j]: 0<=i<N and 0<=j<N}.

Without fusion, these computations would be equivalent to the following loops nests

for (i=0; i<N; i++)
  for (j=0; j<N; j++)
    S0;
for (i=0; i<N; i++)
  for (j=0; j<N; j++)
    S1;
for (i=0; i<N; i++)
  for (j=0; j<N; j++)
    S2;

To fuse them, one should call

S2.fuse_after(j, S1);

S1.fuse_after(j, S0);

This would result in fusing S2 with S0 and S1 at loop level j. S2 will be scheduled for execution after S0 and S1. The resulting code would look like

for (i=0; i<N; i++)
  for (j=0; j<N; j++)
  {
    S0;
    S1;
    S2;
  }

Calling

S2.fuse_after(i, S1);

S1.fuse_after(i, S0);

would result in the following code

for (i=0; i<N; i++)
{
  for (j=0; j<N; j++)
    S0;
  for (j=0; j<N; j++)
    S1;
  for (j=0; j<N; j++)
    S2;
}

Definition at line 3230 of file core.h.

References tiramisu::expr::get_name().

void tiramisu::computation::gen_time_space_domain ( )

Generate the time-space domain of the computation.

In this representation, the logical time of execution and the processor where the computation will be executed are both specified. The memory location where computations will be stored in memory is not specified at the level.

tiramisu::buffer* tiramisu::computation::get_automatically_allocated_buffer ( )

Return the buffer that was allocated automatically using high level data mapping functions.

If no automatic buffer was allocated, this function returns NULL.

isl_ctx* tiramisu::computation::get_ctx ( ) const

protected

Return the context of the computations.

tiramisu::primitive_t tiramisu::computation::get_data_type ( ) const

Get the data type of the computation.

const tiramisu::expr& tiramisu::computation::get_expr ( ) const

Return the Tiramisu expression associated with the computation.

isl_set* tiramisu::computation::get_iteration_domain ( ) const

Return the iteration domain of the computation.

In this representation, the order of execution of computations is not specified, the computations are also not mapped to memory.

tiramisu::computation& tiramisu::computation::get_last_update ( )

Get the last update of a computation.

int tiramisu::computation::get_loop_level_number_from_dimension_name ( std::string dim_name )

inline

Search the time-space domain (the range of the schedule) and return the loop level number that correspond to the dimension named dim.

In other words, translate the vector of dimension name (dim_name) into a loop level number.

Definition at line 3281 of file core.h.

const std::string& tiramisu::computation::get_name ( ) const

Return the name of the computation.

computation* tiramisu::computation::get_predecessor ( )

Returns a pointer to the computation scheduled immediately before this computation, or a null pointer if none exist.

tiramisu::expr tiramisu::computation::get_predicate ( )

protected

Return the predicate around this computation if a predicate was added using add_predicate().

isl_map* tiramisu::computation::get_schedule ( ) const

Get the schedule of the computation.

const std::string tiramisu::computation::get_unique_name ( ) const

protected

Return a unique name of computation; made of the following pattern: [computation name]@[computation address in memory].

tiramisu::computation& tiramisu::computation::get_update ( int index )

Returns the index update that has been added to this computation such that:

If index == 0, then this computation is returned.
If > 0, then it returns the pth computation added through add_definitions.

void tiramisu::computation::gpu_tile	(	tiramisu::var	L0,
		tiramisu::var	L1,
		int	sizeX,
		int	sizeY
	)

Tile the computation and then tag the outermost tile dimension to be mapped to GPU blocks and tag the innermost tile dimensions to be mapped to GPU threads.

Tile the two loop levels L0 and L1 with rectangular tiling. sizeX and sizeY represent the tile size. L0 and L1 should be two consecutive loop levels (i.e., L0 = L1 + 1) and they should satisfy L0 > L1.

L0_outer, L1_outer, L0_inner, L1_inner are the names of the new dimensions created after tiling.

void tiramisu::computation::gpu_tile	(	tiramisu::var	L0,
		tiramisu::var	L1,
		int	sizeX,
		int	sizeY,
		tiramisu::var	L0_outer,
		tiramisu::var	L1_outer,
		tiramisu::var	L0_inner,
		tiramisu::var	L1_inner
	)

Store this computation in buff.

Let us assume that we have a computation C:

{C[i]: 0<=i<N}

and that we want to store each C(i) in bufC[i]. Then we can use store_in() to indicate that as follows:

C.store_in(&bufC)

This mans that each computation C(i) will be stored in the buffer location bufC[i].

If iterators is specified, the iterators are used to specify how the computation is mapped to the buffer. If the dimensions of this computation are in0, in1, ..., inn and if iterators are equal to im0, im1, ..., imm then the computation is mapped as follows

C[in0, in1, ..., inn]->bufC[im0, im1, ..., imm].

i.e., the computation C[in0, in1, ..., inn] is stored in bufC[im0, im1, ..., imm].

This can be used to store the data in many ways (reordering the storage, storing into modulo buffers, ...).

Assuming we have have computation D(i,j) that has the following iteration domain:

{D[i,j]: 0<=i<N and 0<=j<N}

and assuming we have a buffer bufD.

The store_in() function can be used to implement many types of data mappings:

Store the computation D to a scalar: D.store_in(&bufD, {}). This mans that D(i) will be stored in bufD[0] (which represents a scalar).
Store a 2 dimensional computation into a 1-dimensional buffer: D.store_in(&bufD, {i});
Change the order of storage. D.store_in(&bufD, {j, i}) will store D(i,j) in bufD(j,i).
Store the computation in a circular buffer (modulo storage). D.store_in(&bufD, {i%4, j%4}); This will store D(i,j) in bufD[i%4, j%4]. Assuming the buffer bufD is a 4x4 buffer.

void tiramisu::computation::gpu_tile	(	tiramisu::var	L0,
		tiramisu::var	L1,
		tiramisu::var	L2,
		int	sizeX,
		int	sizeY,
		int	sizeZ
	)

Store this computation in buff.

Let us assume that we have a computation C:

{C[i]: 0<=i<N}

and that we want to store each C(i) in bufC[i]. Then we can use store_in() to indicate that as follows:

C.store_in(&bufC)

This mans that each computation C(i) will be stored in the buffer location bufC[i].

If iterators is specified, the iterators are used to specify how the computation is mapped to the buffer. If the dimensions of this computation are in0, in1, ..., inn and if iterators are equal to im0, im1, ..., imm then the computation is mapped as follows

C[in0, in1, ..., inn]->bufC[im0, im1, ..., imm].

i.e., the computation C[in0, in1, ..., inn] is stored in bufC[im0, im1, ..., imm].

This can be used to store the data in many ways (reordering the storage, storing into modulo buffers, ...).

Assuming we have have computation D(i,j) that has the following iteration domain:

{D[i,j]: 0<=i<N and 0<=j<N}

and assuming we have a buffer bufD.

The store_in() function can be used to implement many types of data mappings:

Store the computation D to a scalar: D.store_in(&bufD, {}). This mans that D(i) will be stored in bufD[0] (which represents a scalar).
Store a 2 dimensional computation into a 1-dimensional buffer: D.store_in(&bufD, {i});
Change the order of storage. D.store_in(&bufD, {j, i}) will store D(i,j) in bufD(j,i).
Store the computation in a circular buffer (modulo storage). D.store_in(&bufD, {i%4, j%4}); This will store D(i,j) in bufD[i%4, j%4]. Assuming the buffer bufD is a 4x4 buffer.

void tiramisu::computation::gpu_tile	(	tiramisu::var	L0,
		tiramisu::var	L1,
		tiramisu::var	L2,
		int	sizeX,
		int	sizeY,
		int	sizeZ,
		tiramisu::var	L0_outer,
		tiramisu::var	L1_outer,
		tiramisu::var	L2_outer,
		tiramisu::var	L0_inner,
		tiramisu::var	L1_inner,
		tiramisu::var	L2_inner
	)

Store this computation in buff.

Let us assume that we have a computation C:

{C[i]: 0<=i<N}

and that we want to store each C(i) in bufC[i]. Then we can use store_in() to indicate that as follows:

C.store_in(&bufC)

This mans that each computation C(i) will be stored in the buffer location bufC[i].

If iterators is specified, the iterators are used to specify how the computation is mapped to the buffer. If the dimensions of this computation are in0, in1, ..., inn and if iterators are equal to im0, im1, ..., imm then the computation is mapped as follows

C[in0, in1, ..., inn]->bufC[im0, im1, ..., imm].

i.e., the computation C[in0, in1, ..., inn] is stored in bufC[im0, im1, ..., imm].

This can be used to store the data in many ways (reordering the storage, storing into modulo buffers, ...).

Assuming we have have computation D(i,j) that has the following iteration domain:

{D[i,j]: 0<=i<N and 0<=j<N}

and assuming we have a buffer bufD.

The store_in() function can be used to implement many types of data mappings:

Store the computation D to a scalar: D.store_in(&bufD, {}). This mans that D(i) will be stored in bufD[0] (which represents a scalar).
Store a 2 dimensional computation into a 1-dimensional buffer: D.store_in(&bufD, {i});
Change the order of storage. D.store_in(&bufD, {j, i}) will store D(i,j) in bufD(j,i).
Store the computation in a circular buffer (modulo storage). D.store_in(&bufD, {i%4, j%4}); This will store D(i,j) in bufD[i%4, j%4]. Assuming the buffer bufD is a 4x4 buffer.

bool tiramisu::computation::has_multiple_definitions ( )

protected

Return true if the computation has multiple definitions.

i.e., if the computation is defined multiple times. An update is a special case where a computation is defined multiple times. Duplicate computations are another example.

In the following example, C is defined multiple times whereas D is defined only once.

C(0) = 0
C(i) = C(i-1) + 1
D(i) = C(i) + 1

void tiramisu::computation::init_computation	(	std::string	iteration_space_str,
		tiramisu::function *	fct,
		const tiramisu::expr &	e,
		bool	schedule_this_computation,
		tiramisu::primitive_t	t
	)

protected

Initialize a computation.

This is a private function that should not be called explicitly by users.

void tiramisu::computation::interchange	(	tiramisu::var	L0,
		tiramisu::var	L1
	)

Interchange (swap) the two loop levels L0 and L1.

void tiramisu::computation::interchange	(	int	L0,
		int	L1
	)

Identical to void interchange(tiramisu::var L0, tiramisu::var L1);.

const bool tiramisu::computation::is_inline_computation ( ) const

Returns true if and only if the computation is inline.

virtual bool tiramisu::computation::is_recv ( ) const

virtual

Reimplemented in tiramisu::recv.

virtual bool tiramisu::computation::is_send ( ) const

virtual

Reimplemented in tiramisu::send.

virtual bool tiramisu::computation::is_send_recv ( ) const

virtual

Reimplemented in tiramisu::send_recv.

virtual bool tiramisu::computation::is_wait ( ) const

virtual

Reimplemented in tiramisu::wait.

void tiramisu::computation::mark_as_let_statement ( )

Mark this statement as a let statement.

void tiramisu::computation::mark_as_library_call ( )

Mark this statement as a library call.

tiramisu::computation::operator expr ( )

template<typename... Args>

tiramisu::expr tiramisu::computation::operator() ( Args... args )

inline

Access operator: C0(i,j) represents an access to the element (i,j) of the computation C0.

C0(i,j) represents the value computed by the computation C0(i,j)

Definition at line 4005 of file core.h.

References tiramisu::expr::dump(), tiramisu::o_access, and tiramisu::expr::substitute().

void tiramisu::computation::parallelize ( tiramisu::var L )

Tag the loop level L to be parallelized.

This function is equivalent to the function tiramisu::computation::tag_parallel_level() . There is no difference between the two.

void tiramisu::computation::set_access ( std::string access_str )

Set the access relation of the computation.

The access relation is a relation from computations to buffer locations. access_str is a string that represents the relation. It is encoded in the ISL format, http://isl.gforge.inria.fr/user.html#Sets-and-Relations of relations.

Note that, in TIramisu, the access relations of computation that have the same name must be identical.

Examples: tutorial_01, tutorial_02, tutorial_08 (actually most tutorials have set_access()).

void tiramisu::computation::set_access ( isl_map * access )

Set the access relation of the computation.

The access relation is a relation from computations to buffer locations. access_str is a string that represents the relation. It is encoded in the ISL format, http://isl.gforge.inria.fr/user.html#Sets-and-Relations of relations.

Note that, in TIramisu, the access relations of computation that have the same name must be identical.

Examples: tutorial_01, tutorial_02, tutorial_08 (actually most tutorials have set_access()).

void tiramisu::computation::set_expression ( const tiramisu::expr & e )

Set the expression of the computation.

void tiramisu::computation::set_inline ( bool is_inline = true )

Sets whether the computation is inline or not, based on the value of is_inline.

If a computation is inline, accesses to the computation return the expression of that computation. E.g. if an inline computation S(i,j) is defined with the expression i + j, then S(i + 1, j * i) returns the expression i + 1 + j * i. If is_inline is not provided, the computation is set to be inline.

void tiramisu::computation::set_low_level_schedule ( isl_map * map )

Set the schedule indicated by map.

map is a string that represents a mapping from the iteration domain to the time-space domain (the ISL format to represent maps is documented in http://barvinok.gforge.inria.fr/barvinok.pdf in Sec 1.2.2).

The schedule is a map from the iteration domain to a time space domain. The same name of space should be used for both the range and the domain of the schedule.

In general, users can set the schedule using high level functions such as before(), after(), tile(), compute_at(), vectorize(), split(), ... The use of this function is only reserved for advanced users who want a low level control of the schedule.

Vectors in the time-space domain have the following form

computation_name[redundancy_ID,static,dynamic,static,dynamic,static,dynamic,static,...]

The first dimension of the vector is used to indicate the redundancy ID (the notion of the redundancy ID is explained later).

The following dimensions are interleaved dimensions: static, dynamic, static, dynamic, ... Dynamic dimensions represent the loop levels, while static dimensions are used to order statements within a given block of statements in a given loop level. For example, the computations c0 and c1 in the following loop nest

for (i=0; i<N: i++)
  for (j=0; j<N; j++)
  {
    c0;
    c1;
  }

have the following representations in the iteration domain

{c0[i,j]: 0<=i<N and 0<=j<N}

{c1[i,j]: 0<=i<N and 0<=j<N}

and the following representation in the time-space domain

{c0[0,0,i,0,j,0]: 0<=i<N and 0<=j<N}

{c1[0,0,i,0,j,1]: 0<=i<N and 0<=j<N}

The first dimension (dimension 0) in the time-space domain (the leftmost dimension) is the redundancy ID (in this case it is 0, the meaning of this ID will be explained later). The second dimension (starting from the left) is a static dimension, the third dimension is a dynamic dimension that represents the loop level i, ..., the fifth dimension is a dynamic dimension that represents the loop level j and the last dimension (dimension 5) is a static dimension and allows the ordering of c1 after c0 in the loop nest.

To transform the previous iteration domain to the time-space domain, the following schedule should be used:

{c0[i,j]->c0[0,0,i,0,j,0]: 0<=i<N and 0<=j<N}

{c1[i,j]->c1[0,0,i,0,j,1]: 0<=i<N and 0<=j<N}

The first dimension called "redundancy ID" is only meaningful if the computation is redundant. i.e., some parts of the computation are redundantly computed. Redundant computations are in general used to maximize parallelism and data locality on the expense of doing some computations redundantly. For example, if the two computations c1(i,j) and c2(i,j) both depend on the computation c0(i,j), instead of waiting for c0(i,j) to be computed and then computing c1(i,j) and c2(i,j) in parallel, the thread executing c1(i,j) can compute c0(i,j) by itself and then run c1(i,j). The thread that computes c2(i,j) can do the same and compute c0(i,j) by itself and then compute c2(i,j). In this case the two threads do not need to wait. This is done at the expense of redundant computation since c0(i,j) is computed by both threads.

In general redundant computations are useful when tiling stencil computations. In the context of stencils such a tiling is called "overlapped tiling". Tiles that depend on results computed by other tiles that run in parallel can compute the boundaries redundantly which allows them to avoid waiting and thus can run in parallel.

In Tiramisu, the user can indicate that a chunk of a computation should be computed redundantly. The original computation always has a redundancy ID equal to 0 (which means this is the original computation). The redundant chunk has an ID that is different from 0 and that is used to uniquely identify it.

For example if we want to compute all of c0 three times (that is, compute the original computation and compute two redundant computations), we can use the following schedules:

The schedule of the original computation: {c0[i,j]->c0[0,0,i,0,j,0]: 0<=i<N and 0<=j<N} The schedule of the redundant computation N°1: {c0[i,j]->c0[1,0,i,0,j,0]: 0<=i<N and 0<=j<N} The schedule of the redundant computation N°2: {c0[i,j]->c0[2,0,i,0,j,0]: 0<=i<N and 0<=j<N}

The schedule of c0 in this case would be three maps that map c0[i,j] to the three different redundant computations in the time-processor domain:

{c0[i,j]->c0[0,0,i,0,j,0]: 0<=i<N and 0<=j<N;
 c0[i,j]->c0[1,0,i,0,j,0]: 0<=i<N and 0<=j<N;
 c0[i,j]->c0[2,0,i,0,j,0]: 0<=i<N and 0<=j<N}

The function set_schedule() overrides any other schedule set by the high level scheduling functions. Currently the user has to choose between using the high level scheduling functions or using this low level set_schedule function. The user cannot mix the use of the two in the same program because they are not compatible.

void tiramisu::computation::set_low_level_schedule ( std::string map_str )

Set the schedule indicated by map.

map is a string that represents a mapping from the iteration domain to the time-space domain (the ISL format to represent maps is documented in http://barvinok.gforge.inria.fr/barvinok.pdf in Sec 1.2.2).

The schedule is a map from the iteration domain to a time space domain. The same name of space should be used for both the range and the domain of the schedule.

In general, users can set the schedule using high level functions such as before(), after(), tile(), compute_at(), vectorize(), split(), ... The use of this function is only reserved for advanced users who want a low level control of the schedule.

Vectors in the time-space domain have the following form

computation_name[redundancy_ID,static,dynamic,static,dynamic,static,dynamic,static,...]

The first dimension of the vector is used to indicate the redundancy ID (the notion of the redundancy ID is explained later).

The following dimensions are interleaved dimensions: static, dynamic, static, dynamic, ... Dynamic dimensions represent the loop levels, while static dimensions are used to order statements within a given block of statements in a given loop level. For example, the computations c0 and c1 in the following loop nest

for (i=0; i<N: i++)
  for (j=0; j<N; j++)
  {
    c0;
    c1;
  }

have the following representations in the iteration domain

{c0[i,j]: 0<=i<N and 0<=j<N}

{c1[i,j]: 0<=i<N and 0<=j<N}

and the following representation in the time-space domain

{c0[0,0,i,0,j,0]: 0<=i<N and 0<=j<N}

{c1[0,0,i,0,j,1]: 0<=i<N and 0<=j<N}

The first dimension (dimension 0) in the time-space domain (the leftmost dimension) is the redundancy ID (in this case it is 0, the meaning of this ID will be explained later). The second dimension (starting from the left) is a static dimension, the third dimension is a dynamic dimension that represents the loop level i, ..., the fifth dimension is a dynamic dimension that represents the loop level j and the last dimension (dimension 5) is a static dimension and allows the ordering of c1 after c0 in the loop nest.

To transform the previous iteration domain to the time-space domain, the following schedule should be used:

{c0[i,j]->c0[0,0,i,0,j,0]: 0<=i<N and 0<=j<N}

{c1[i,j]->c1[0,0,i,0,j,1]: 0<=i<N and 0<=j<N}

The first dimension called "redundancy ID" is only meaningful if the computation is redundant. i.e., some parts of the computation are redundantly computed. Redundant computations are in general used to maximize parallelism and data locality on the expense of doing some computations redundantly. For example, if the two computations c1(i,j) and c2(i,j) both depend on the computation c0(i,j), instead of waiting for c0(i,j) to be computed and then computing c1(i,j) and c2(i,j) in parallel, the thread executing c1(i,j) can compute c0(i,j) by itself and then run c1(i,j). The thread that computes c2(i,j) can do the same and compute c0(i,j) by itself and then compute c2(i,j). In this case the two threads do not need to wait. This is done at the expense of redundant computation since c0(i,j) is computed by both threads.

In general redundant computations are useful when tiling stencil computations. In the context of stencils such a tiling is called "overlapped tiling". Tiles that depend on results computed by other tiles that run in parallel can compute the boundaries redundantly which allows them to avoid waiting and thus can run in parallel.

In Tiramisu, the user can indicate that a chunk of a computation should be computed redundantly. The original computation always has a redundancy ID equal to 0 (which means this is the original computation). The redundant chunk has an ID that is different from 0 and that is used to uniquely identify it.

For example if we want to compute all of c0 three times (that is, compute the original computation and compute two redundant computations), we can use the following schedules:

The schedule of the original computation: {c0[i,j]->c0[0,0,i,0,j,0]: 0<=i<N and 0<=j<N} The schedule of the redundant computation N°1: {c0[i,j]->c0[1,0,i,0,j,0]: 0<=i<N and 0<=j<N} The schedule of the redundant computation N°2: {c0[i,j]->c0[2,0,i,0,j,0]: 0<=i<N and 0<=j<N}

The schedule of c0 in this case would be three maps that map c0[i,j] to the three different redundant computations in the time-processor domain:

{c0[i,j]->c0[0,0,i,0,j,0]: 0<=i<N and 0<=j<N;
 c0[i,j]->c0[1,0,i,0,j,0]: 0<=i<N and 0<=j<N;
 c0[i,j]->c0[2,0,i,0,j,0]: 0<=i<N and 0<=j<N}

The function set_schedule() overrides any other schedule set by the high level scheduling functions. Currently the user has to choose between using the high level scheduling functions or using this low level set_schedule function. The user cannot mix the use of the two in the same program because they are not compatible.

void tiramisu::computation::set_name ( const std::string & n )

protected

Set the name of the computation.

void tiramisu::computation::set_schedule ( isl_map * map )

protected

Set the schedule indicated by map.

map is a string that represents a mapping from the iteration domain to the time-space domain (the ISL format to represent maps is documented in http://barvinok.gforge.inria.fr/barvinok.pdf in Sec 1.2.2).

The schedule is a map from the iteration domain to a time space domain. The same name of space should be used for both the range and the domain of the schedule.

In general, users can set the schedule using high level functions such as before(), after(), tile(), compute_at(), vectorize(), split(), ... The use of this function is only reserved for advanced users who want a low level control of the schedule.

Vectors in the time-space domain have the following form

computation_name[redundancy_ID,static,dynamic,static,dynamic,static,dynamic,static,...]

The first dimension of the vector is used to indicate the redundancy ID (the notion of the redundancy ID is explained later).

The following dimensions are interleaved dimensions: static, dynamic, static, dynamic, ... Dynamic dimensions represent the loop levels, while static dimensions are used to order statements within a given block of statements in a given loop level. For example, the computations c0 and c1 in the following loop nest

for (i=0; i<N: i++)
  for (j=0; j<N; j++)
  {
    c0;
    c1;
  }

have the following representations in the iteration domain

{c0[i,j]: 0<=i<N and 0<=j<N}

{c1[i,j]: 0<=i<N and 0<=j<N}

and the following representation in the time-space domain

{c0[0,0,i,0,j,0]: 0<=i<N and 0<=j<N}

{c1[0,0,i,0,j,1]: 0<=i<N and 0<=j<N}

The first dimension (dimension 0) in the time-space domain (the leftmost dimension) is the redundancy ID (in this case it is 0, the meaning of this ID will be explained later). The second dimension (starting from the left) is a static dimension, the third dimension is a dynamic dimension that represents the loop level i, ..., the fifth dimension is a dynamic dimension that represents the loop level j and the last dimension (dimension 5) is a static dimension and allows the ordering of c1 after c0 in the loop nest.

To transform the previous iteration domain to the time-space domain, the following schedule should be used:

{c0[i,j]->c0[0,0,i,0,j,0]: 0<=i<N and 0<=j<N}

{c1[i,j]->c1[0,0,i,0,j,1]: 0<=i<N and 0<=j<N}

The first dimension called "redundancy ID" is only meaningful if the computation is redundant. i.e., some parts of the computation are redundantly computed. Redundant computations are in general used to maximize parallelism and data locality on the expense of doing some computations redundantly. For example, if the two computations c1(i,j) and c2(i,j) both depend on the computation c0(i,j), instead of waiting for c0(i,j) to be computed and then computing c1(i,j) and c2(i,j) in parallel, the thread executing c1(i,j) can compute c0(i,j) by itself and then run c1(i,j). The thread that computes c2(i,j) can do the same and compute c0(i,j) by itself and then compute c2(i,j). In this case the two threads do not need to wait. This is done at the expense of redundant computation since c0(i,j) is computed by both threads.

In general redundant computations are useful when tiling stencil computations. In the context of stencils such a tiling is called "overlapped tiling". Tiles that depend on results computed by other tiles that run in parallel can compute the boundaries redundantly which allows them to avoid waiting and thus can run in parallel.

In Tiramisu, the user can indicate that a chunk of a computation should be computed redundantly. The original computation always has a redundancy ID equal to 0 (which means this is the original computation). The redundant chunk has an ID that is different from 0 and that is used to uniquely identify it.

For example if we want to compute all of c0 three times (that is, compute the original computation and compute two redundant computations), we can use the following schedules:

The schedule of the original computation: {c0[i,j]->c0[0,0,i,0,j,0]: 0<=i<N and 0<=j<N} The schedule of the redundant computation N°1: {c0[i,j]->c0[1,0,i,0,j,0]: 0<=i<N and 0<=j<N} The schedule of the redundant computation N°2: {c0[i,j]->c0[2,0,i,0,j,0]: 0<=i<N and 0<=j<N}

The schedule of c0 in this case would be three maps that map c0[i,j] to the three different redundant computations in the time-processor domain:

{c0[i,j]->c0[0,0,i,0,j,0]: 0<=i<N and 0<=j<N;
 c0[i,j]->c0[1,0,i,0,j,0]: 0<=i<N and 0<=j<N;
 c0[i,j]->c0[2,0,i,0,j,0]: 0<=i<N and 0<=j<N}

The function set_schedule() overrides any other schedule set by the high level scheduling functions. Currently the user has to choose between using the high level scheduling functions or using this low level set_schedule function. The user cannot mix the use of the two in the same program because they are not compatible.

void tiramisu::computation::set_schedule ( std::string map_str )

protected

Set the schedule indicated by map.

map is a string that represents a mapping from the iteration domain to the time-space domain (the ISL format to represent maps is documented in http://barvinok.gforge.inria.fr/barvinok.pdf in Sec 1.2.2).

The schedule is a map from the iteration domain to a time space domain. The same name of space should be used for both the range and the domain of the schedule.

In general, users can set the schedule using high level functions such as before(), after(), tile(), compute_at(), vectorize(), split(), ... The use of this function is only reserved for advanced users who want a low level control of the schedule.

Vectors in the time-space domain have the following form

computation_name[redundancy_ID,static,dynamic,static,dynamic,static,dynamic,static,...]

The first dimension of the vector is used to indicate the redundancy ID (the notion of the redundancy ID is explained later).

The following dimensions are interleaved dimensions: static, dynamic, static, dynamic, ... Dynamic dimensions represent the loop levels, while static dimensions are used to order statements within a given block of statements in a given loop level. For example, the computations c0 and c1 in the following loop nest

for (i=0; i<N: i++)
  for (j=0; j<N; j++)
  {
    c0;
    c1;
  }

have the following representations in the iteration domain

{c0[i,j]: 0<=i<N and 0<=j<N}

{c1[i,j]: 0<=i<N and 0<=j<N}

and the following representation in the time-space domain

{c0[0,0,i,0,j,0]: 0<=i<N and 0<=j<N}

{c1[0,0,i,0,j,1]: 0<=i<N and 0<=j<N}

The first dimension (dimension 0) in the time-space domain (the leftmost dimension) is the redundancy ID (in this case it is 0, the meaning of this ID will be explained later). The second dimension (starting from the left) is a static dimension, the third dimension is a dynamic dimension that represents the loop level i, ..., the fifth dimension is a dynamic dimension that represents the loop level j and the last dimension (dimension 5) is a static dimension and allows the ordering of c1 after c0 in the loop nest.

To transform the previous iteration domain to the time-space domain, the following schedule should be used:

{c0[i,j]->c0[0,0,i,0,j,0]: 0<=i<N and 0<=j<N}

{c1[i,j]->c1[0,0,i,0,j,1]: 0<=i<N and 0<=j<N}

The first dimension called "redundancy ID" is only meaningful if the computation is redundant. i.e., some parts of the computation are redundantly computed. Redundant computations are in general used to maximize parallelism and data locality on the expense of doing some computations redundantly. For example, if the two computations c1(i,j) and c2(i,j) both depend on the computation c0(i,j), instead of waiting for c0(i,j) to be computed and then computing c1(i,j) and c2(i,j) in parallel, the thread executing c1(i,j) can compute c0(i,j) by itself and then run c1(i,j). The thread that computes c2(i,j) can do the same and compute c0(i,j) by itself and then compute c2(i,j). In this case the two threads do not need to wait. This is done at the expense of redundant computation since c0(i,j) is computed by both threads.

In general redundant computations are useful when tiling stencil computations. In the context of stencils such a tiling is called "overlapped tiling". Tiles that depend on results computed by other tiles that run in parallel can compute the boundaries redundantly which allows them to avoid waiting and thus can run in parallel.

In Tiramisu, the user can indicate that a chunk of a computation should be computed redundantly. The original computation always has a redundancy ID equal to 0 (which means this is the original computation). The redundant chunk has an ID that is different from 0 and that is used to uniquely identify it.

For example if we want to compute all of c0 three times (that is, compute the original computation and compute two redundant computations), we can use the following schedules:

The schedule of the original computation: {c0[i,j]->c0[0,0,i,0,j,0]: 0<=i<N and 0<=j<N} The schedule of the redundant computation N°1: {c0[i,j]->c0[1,0,i,0,j,0]: 0<=i<N and 0<=j<N} The schedule of the redundant computation N°2: {c0[i,j]->c0[2,0,i,0,j,0]: 0<=i<N and 0<=j<N}

The schedule of c0 in this case would be three maps that map c0[i,j] to the three different redundant computations in the time-processor domain:

{c0[i,j]->c0[0,0,i,0,j,0]: 0<=i<N and 0<=j<N;
 c0[i,j]->c0[1,0,i,0,j,0]: 0<=i<N and 0<=j<N;
 c0[i,j]->c0[2,0,i,0,j,0]: 0<=i<N and 0<=j<N}

The function set_schedule() overrides any other schedule set by the high level scheduling functions. Currently the user has to choose between using the high level scheduling functions or using this low level set_schedule function. The user cannot mix the use of the two in the same program because they are not compatible.

void tiramisu::computation::set_wait_access ( std::string access_str )

void tiramisu::computation::set_wait_access ( isl_map * access )

void tiramisu::computation::shift	(	tiramisu::var	L0,
		int	n
	)

Shift the loop level L0 of the iteration space by n iterations.

n can be a positive or a negative number. A positive number means a shift forward of the loop iterations while a negative value would mean a shift backward.

void tiramisu::computation::skew	(	tiramisu::var	i,
		tiramisu::var	j,
		int	f,
		tiramisu::var	ni,
		tiramisu::var	nj
	)

Apply loop skewing on the loop levels i and j with a skewing factor of f.

The names of the new loop levels is ni and nj.

This command transforms the loop (i, j) into the loop (i, f*i+j). For example if you have the following loop

for (int i = 1; i < N; i++)
  for (int j = 1; j < M; j++)
    a[i][j] = a[i - 1][j] + a[i][j - 1];

and apply

a.skew(i, j, 1, ni, nj);

you would get

for (int i = 1; i < N; i++)
  for (int j = i+1; j < i+M; j++)
    a[i][j - i] = a[i - 1][j - i] + a[i][j - i - 1];

void tiramisu::computation::skew	(	tiramisu::var	i,
		tiramisu::var	j,
		tiramisu::var	k,
		int	factor,
		tiramisu::var	ni,
		tiramisu::var	nj,
		tiramisu::var	nk
	)

Apply loop skewing on the loop levels i, j and k with a skewing factor of f.

The names of the new loop levels is ni, nj and nk.

This command transforms the loop (i, j, k) into the loop (i, f*i+j, f*i+k).

void tiramisu::computation::skew	(	tiramisu::var	i,
		tiramisu::var	j,
		tiramisu::var	k,
		tiramisu::var	l,
		int	factor,
		tiramisu::var	ni,
		tiramisu::var	nj,
		tiramisu::var	nk,
		tiramisu::var	nl
	)

Apply loop skewing on the loop levels i, j, k, l with a skewing factor of f.

The names of the new loop levels is ni, nj, nk and nl.

This command transforms the loop (i, j, k, l) into the loop (i, f*i+j, f*i+k, f*i+l).

void tiramisu::computation::skew	(	tiramisu::var	i,
		tiramisu::var	j,
		int	factor
	)

This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

void tiramisu::computation::skew	(	tiramisu::var	i,
		tiramisu::var	j,
		tiramisu::var	k,
		int	factor
	)

This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

void tiramisu::computation::skew	(	tiramisu::var	i,
		tiramisu::var	j,
		tiramisu::var	k,
		tiramisu::var	l,
		int	factor
	)

This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

void tiramisu::computation::skew	(	int	i,
		int	j,
		int	factor
	)

This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

void tiramisu::computation::skew	(	int	i,
		int	j,
		int	k,
		int	factor
	)

This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

void tiramisu::computation::skew	(	int	i,
		int	j,
		int	k,
		int	l,
		int	factor
	)

This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

void tiramisu::computation::split	(	tiramisu::var	L0,
		int	sizeX
	)

Split the loop level L0 of the iteration space into two new loop levels.

sizeX is the extent (size) of the inner loop created after splitting.

void tiramisu::computation::split	(	tiramisu::var	L0,
		int	sizeX,
		tiramisu::var	L0_outer,
		tiramisu::var	L0_inner
	)

Split the loop level L0 of the iteration space into two new loop levels.

sizeX is the extent (size) of the inner loop created after splitting.

void tiramisu::computation::split	(	int	L0,
		int	sizeX
	)

Identical to void split(tiramisu::var L0, int sizeX);.

void tiramisu::computation::storage_fold	(	tiramisu::var	dim,
		int	f
	)

Fold the storage of the computation.

Fold the loop level dim by a factor f.

tiramisu::computation* tiramisu::computation::store_at	(	tiramisu::computation &	comp,
		tiramisu::var	L0
	)

Allocate the storage of this computation in the loop level L0.

This function does the following:

computes the size of the buffer needed to store this computation (TODO: current the size computed by Tiramisu is equal to the size of the computation, Tiramisu does not allocate smaller buffers if such a thing is possible, this is left for future work).
allocates a temporary buffer with the appropriate size,
schedules the allocation operation to be executed in the loop nest where comp is computated at the loop level L0.

The function returns the computation (operation) that allocates the buffer. The allocated buffer is not returned.

void tiramisu::computation::store_in ( buffer * buff )

Store this computation in buff.

Let us assume that we have a computation C:

{C[i]: 0<=i<N}

and that we want to store each C(i) in bufC[i]. Then we can use store_in() to indicate that as follows:

C.store_in(&bufC)

This mans that each computation C(i) will be stored in the buffer location bufC[i].

If iterators is specified, the iterators are used to specify how the computation is mapped to the buffer. If the dimensions of this computation are in0, in1, ..., inn and if iterators are equal to im0, im1, ..., imm then the computation is mapped as follows

C[in0, in1, ..., inn]->bufC[im0, im1, ..., imm].

i.e., the computation C[in0, in1, ..., inn] is stored in bufC[im0, im1, ..., imm].

This can be used to store the data in many ways (reordering the storage, storing into modulo buffers, ...).

Assuming we have have computation D(i,j) that has the following iteration domain:

{D[i,j]: 0<=i<N and 0<=j<N}

and assuming we have a buffer bufD.

The store_in() function can be used to implement many types of data mappings:

Store the computation D to a scalar: D.store_in(&bufD, {}). This mans that D(i) will be stored in bufD[0] (which represents a scalar).
Store a 2 dimensional computation into a 1-dimensional buffer: D.store_in(&bufD, {i});
Change the order of storage. D.store_in(&bufD, {j, i}) will store D(i,j) in bufD(j,i).
Store the computation in a circular buffer (modulo storage). D.store_in(&bufD, {i%4, j%4}); This will store D(i,j) in bufD[i%4, j%4]. Assuming the buffer bufD is a 4x4 buffer.

void tiramisu::computation::store_in	(	buffer *	buff,
		std::vector< expr >	iterators
	)

Store this computation in buff.

Let us assume that we have a computation C:

{C[i]: 0<=i<N}

and that we want to store each C(i) in bufC[i]. Then we can use store_in() to indicate that as follows:

C.store_in(&bufC)

This mans that each computation C(i) will be stored in the buffer location bufC[i].

If iterators is specified, the iterators are used to specify how the computation is mapped to the buffer. If the dimensions of this computation are in0, in1, ..., inn and if iterators are equal to im0, im1, ..., imm then the computation is mapped as follows

C[in0, in1, ..., inn]->bufC[im0, im1, ..., imm].

i.e., the computation C[in0, in1, ..., inn] is stored in bufC[im0, im1, ..., imm].

This can be used to store the data in many ways (reordering the storage, storing into modulo buffers, ...).

Assuming we have have computation D(i,j) that has the following iteration domain:

{D[i,j]: 0<=i<N and 0<=j<N}

and assuming we have a buffer bufD.

The store_in() function can be used to implement many types of data mappings:

Store the computation D to a scalar: D.store_in(&bufD, {}). This mans that D(i) will be stored in bufD[0] (which represents a scalar).
Store a 2 dimensional computation into a 1-dimensional buffer: D.store_in(&bufD, {i});
Change the order of storage. D.store_in(&bufD, {j, i}) will store D(i,j) in bufD(j,i).
Store the computation in a circular buffer (modulo storage). D.store_in(&bufD, {i%4, j%4}); This will store D(i,j) in bufD[i%4, j%4]. Assuming the buffer bufD is a 4x4 buffer.

void tiramisu::computation::tag_distribute_level ( tiramisu::var L )

Tag the loop level L to be distributed.

void tiramisu::computation::tag_distribute_level ( int L )

Tag the loop level L to be distributed.

void tiramisu::computation::tag_gpu_level	(	tiramisu::var	L0,
		tiramisu::var	L1
	)

Tag the loop level L0 and L1 to be mapped to GPU.

void tiramisu::computation::tag_gpu_level	(	tiramisu::var	L0,
		tiramisu::var	L1,
		tiramisu::var	L2,
		tiramisu::var	L3
	)

Tag the loop level L0 and L1 to be mapped to GPU.

void tiramisu::computation::tag_gpu_level	(	tiramisu::var	L0,
		tiramisu::var	L1,
		tiramisu::var	L2,
		tiramisu::var	L3,
		tiramisu::var	L4,
		tiramisu::var	L5
	)

Tag the loop level L0 and L1 to be mapped to GPU.

void tiramisu::computation::tag_parallel_level ( tiramisu::var L )

Tag the loop level L to be parallelized.

void tiramisu::computation::tag_parallel_level ( int L )

Identical to void tag_parallel_level(int L);.

void tiramisu::computation::tag_unroll_level ( tiramisu::var L )

Tag the loop level L to be unrolled.

The user can only tag loop levels that have constant extent.

void tiramisu::computation::tag_unroll_level ( int L )

Identical to void tag_unroll_level(tiramisu::var L);.

void tiramisu::computation::tag_vector_level	(	tiramisu::var	L,
		int	len
	)

Tag the loop level L to be vectorized.

len is the vector length.

The user can only tag loop levels that have constant extent. If a loop level does not have a constant extent, the user should call .vectorize() command instead or he can call separate() and split() manually.

The user has to make sure that the extent of the dimension is bigger than len. The vectorization of a loop that has less than len iterations is not correct.

void tiramisu::computation::tag_vector_level	(	int	L,
		int	len
	)

Identical to void tag_vector_level(tiramisu::var L, int len);.

computation& tiramisu::computation::then	(	computation &	next_computation,
		tiramisu::var	L
	)

Schedule this computation to run before the computation next_computation at the loop level L and return next_computation.

Notes

This method is a simple wrapper around computation::after to help schedule chaining as in:
C1.then(C2, j)
.then(C3, computation::root)
.then(C4, i)
.then(C5, j);
The loop level L is a loop level of this computation.
Use computation::root to indicate the root dimension (i.e. the outermost time-space dimension).
Calling this method with the same computations overwrites the level if it is higher.
A computation being scheduled after another computation at level L means it is scheduled after that computation at all levels lower than L.

void tiramisu::computation::tile	(	tiramisu::var	L0,
		tiramisu::var	L1,
		int	sizeX,
		int	sizeY
	)

Tile the two loop levels L0 and L1 with rectangular tiling.

sizeX and sizeY represent the tile size. L0 and L1 should be two consecutive loop levels. L0_outer, L1_outer, L0_inner, L1_inner are the names of the new dimensions created after tiling.

void tiramisu::computation::tile	(	tiramisu::var	L0,
		tiramisu::var	L1,
		int	sizeX,
		int	sizeY,
		tiramisu::var	L0_outer,
		tiramisu::var	L1_outer,
		tiramisu::var	L0_inner,
		tiramisu::var	L1_inner
	)

Tile the two loop levels L0 and L1 with rectangular tiling.

sizeX and sizeY represent the tile size. L0 and L1 should be two consecutive loop levels. L0_outer, L1_outer, L0_inner, L1_inner are the names of the new dimensions created after tiling.

void tiramisu::computation::tile	(	tiramisu::var	L0,
		tiramisu::var	L1,
		tiramisu::var	L2,
		int	sizeX,
		int	sizeY,
		int	sizeZ
	)

Tile the two loop levels L0 and L1 with rectangular tiling.

sizeX and sizeY represent the tile size. L0 and L1 should be two consecutive loop levels. L0_outer, L1_outer, L0_inner, L1_inner are the names of the new dimensions created after tiling.

void tiramisu::computation::tile	(	tiramisu::var	L0,
		tiramisu::var	L1,
		tiramisu::var	L2,
		int	sizeX,
		int	sizeY,
		int	sizeZ,
		tiramisu::var	L0_outer,
		tiramisu::var	L1_outer,
		tiramisu::var	L2_outer,
		tiramisu::var	L0_inner,
		tiramisu::var	L1_inner,
		tiramisu::var	L2_inner
	)

Tile the two loop levels L0 and L1 with rectangular tiling.

sizeX and sizeY represent the tile size. L0 and L1 should be two consecutive loop levels. L0_outer, L1_outer, L0_inner, L1_inner are the names of the new dimensions created after tiling.

void tiramisu::computation::tile	(	int	L0,
		int	L1,
		int	sizeX,
		int	sizeY
	)

Tile the two loop levels L0 and L1 with rectangular tiling.

sizeX and sizeY represent the tile size. L0 and L1 should be two consecutive loop levels (i.e., L0 = L1 + 1) and they should satisfy L0 > L1.

void tiramisu::computation::tile	(	int	L0,
		int	L1,
		int	L2,
		int	sizeX,
		int	sizeY,
		int	sizeZ
	)

Tile the two loop levels L0 and L1 with rectangular tiling.

sizeX and sizeY represent the tile size. L0 and L1 should be two consecutive loop levels (i.e., L0 = L1 + 1) and they should satisfy L0 > L1.

void tiramisu::computation::unroll	(	tiramisu::var	L,
		int	fac
	)

Unroll the loop level L with an unrolling factor fac.

The difference between this function and the function tag_unroll_level() is that this function separates the iteration domain into full and partial iteration domains for unrolling first and then it calls tag_unroll_level(). tag_unroll_level() only tags a dimension to be unrolled, it does not modify the tagged dimension.

This function separates the iteration domain into two iteration domains, a full iteration domain and a partial iteration domain. The full iteration domain has an upper bound that is multiple of fac while the other does not. The full iteration domain is then split by fac and the inner loop (which should have a constant extent equal to fac) is tagged as a unrolled loop.

Let us assume the following loop (a loop represents and iteration domain)

for (i=0; i<N; i++)
  for (j=0; j<23; j++)
    S0;

To unroll the j loop with an unrolling factor of 4, one should call

 S0.unroll(j, 4);

The loop (iteration domain) is first separated into the following two loops

for (int i=0; i<20; i++)
  S0;
for (int i=20; i<23; i++)
  S0;

The full loop is then split by 4

for (int i1=0; i1<20/4; i1++)
  for (int i2=0; i2<4; i2++)
     S0;
for (int i=20; i<23; i++)
  S0;

the i2 loop is then tagged to be unrolled.

L_outer and L_inner are the names of the new loops created after splitting. If not provided, default names will be assigned. L_outer is the outer loop.

void tiramisu::computation::unroll	(	tiramisu::var	L,
		int	fac,
		tiramisu::var	L_outer,
		tiramisu::var	L_inner
	)

Unroll the loop level L with an unrolling factor fac.

The difference between this function and the function tag_unroll_level() is that this function separates the iteration domain into full and partial iteration domains for unrolling first and then it calls tag_unroll_level(). tag_unroll_level() only tags a dimension to be unrolled, it does not modify the tagged dimension.

This function separates the iteration domain into two iteration domains, a full iteration domain and a partial iteration domain. The full iteration domain has an upper bound that is multiple of fac while the other does not. The full iteration domain is then split by fac and the inner loop (which should have a constant extent equal to fac) is tagged as a unrolled loop.

Let us assume the following loop (a loop represents and iteration domain)

for (i=0; i<N; i++)
  for (j=0; j<23; j++)
    S0;

To unroll the j loop with an unrolling factor of 4, one should call

 S0.unroll(j, 4);

The loop (iteration domain) is first separated into the following two loops

for (int i=0; i<20; i++)
  S0;
for (int i=20; i<23; i++)
  S0;

The full loop is then split by 4

for (int i1=0; i1<20/4; i1++)
  for (int i2=0; i2<4; i2++)
     S0;
for (int i=20; i<23; i++)
  S0;

the i2 loop is then tagged to be unrolled.

L_outer and L_inner are the names of the new loops created after splitting. If not provided, default names will be assigned. L_outer is the outer loop.

void tiramisu::computation::unschedule_this_computation ( )

Don't scheduled a previously scheduled computation.

void tiramisu::computation::vectorize	(	tiramisu::var	L,
		int	v
	)

Vectorize the loop level L.

Use the vector length v.

The difference between this function and the function tag_vector_level() is that this function prepares the iteration domain for vectorization first and then it calls tag_vector_level(). tag_vector_level() only tags a dimension to be vectorized, it does not change the tagged dimension.

This function will separate the iteration domain into two iteration domains, a full iteration domain and a partial iteration domain. The full iteration domain has an upper bound that is multiple of v while the other does not. The full iteration domain is then split by v and the inner loop (which should have a constant extent equal to v) is tagged as a vector loop.

Let us assume the following loop (a loop represents and iteration domain)

for (i=0; i<N; i++)
  for (j=0; j<23; j++)
    S0;

To vectorize the j loop with a vector length 4, one should call

 S0.vectorize(j, 4);

The loop (iteration domain) is first separated into the following two loops

for (int i=0; i<20; i++)
  S0;
for (int i=20; i<23; i++)
  S0;

The full loop is then split by 4

for (int i1=0; i1<20/4; i1++)
  for (int i2=0; i2<4; i2++)
     S0;
for (int i=20; i<23; i++)
  S0;

the i2 loop is then tagged to be vectorized.

The user has to make sure that the extent of the dimension is bigger than v. The vectorization of a loop that has less than v iterations is not correct.

The names of the new loop iterators created after vectorization are L_outer and L_inner. If not provided, default names assigned.

void tiramisu::computation::vectorize	(	tiramisu::var	L,
		int	v,
		tiramisu::var	L_outer,
		tiramisu::var	L_inner
	)

Vectorize the loop level L.

Use the vector length v.

The difference between this function and the function tag_vector_level() is that this function prepares the iteration domain for vectorization first and then it calls tag_vector_level(). tag_vector_level() only tags a dimension to be vectorized, it does not change the tagged dimension.

This function will separate the iteration domain into two iteration domains, a full iteration domain and a partial iteration domain. The full iteration domain has an upper bound that is multiple of v while the other does not. The full iteration domain is then split by v and the inner loop (which should have a constant extent equal to v) is tagged as a vector loop.

Let us assume the following loop (a loop represents and iteration domain)

for (i=0; i<N; i++)
  for (j=0; j<23; j++)
    S0;

To vectorize the j loop with a vector length 4, one should call

 S0.vectorize(j, 4);

The loop (iteration domain) is first separated into the following two loops

for (int i=0; i<20; i++)
  S0;
for (int i=20; i<23; i++)
  S0;

The full loop is then split by 4

for (int i1=0; i1<20/4; i1++)
  for (int i2=0; i2<4; i2++)
     S0;
for (int i=20; i<23; i++)
  S0;

the i2 loop is then tagged to be vectorized.

The user has to make sure that the extent of the dimension is bigger than v. The vectorization of a loop that has less than v iterations is not correct.

The names of the new loop iterators created after vectorization are L_outer and L_inner. If not provided, default names assigned.

Member Data Documentation

bool tiramisu::computation::_is_library_call

protected

True if this computation represents a library call.

Definition at line 2274 of file core.h.

std::string tiramisu::computation::library_call_name

protected

If the computation represents a library call, this is the name of the function.

Definition at line 2279 of file core.h.

const var tiramisu::computation::root

static

root_dimension is a number used to specify the dimension level known as root.

The root dimension level is the outermost level. It is the level outside any loop nest. Loop level 0 is the level of the first loop (outermost loop), loop 1 is the level of following inner loop, ...

Where is this number used ?

These numbers are used in the helper functions used for scheduling (such as after(), before(), ...). For example, c0.after(c1) indicates that the computation c0 should be executed after the computation c1. Since the two computations c0 and c1 are usually nested in a loop, we need to specify at which loop level c0 is after c1. This is where we need to specify the loop level numbers. Here is an example. Suppose that the two computations c0 and c1 have the following iteration domains {c0[i,j]: 0<=i<N and 0<=j<N} and {c1[i,j]: 0<=i<N and 0<=j<N}.

When code is generated for the two computations, two loop nests are generated. When scheduling c0 after c1 using the after function, the user can choose one among three possibilities in specifying at which level c0 is after c1.

c0.after(c1, computation::root_dimension) would create a schedule that generates the following code

for (i=0; i<N; i++)
    for (j=0; j<N; j++)
        c1;
for (i=0; i<N; i++)
    for (j=0; j<N; j++)
        c0;

c0.after(c1, 0) would create a schedule that generates the following code

for (i=0; i<N; i++) {
    for (j=0; j<N; j++)
        c1;
    for (j=0; j<N; j++)
        c0;
}

This means that c0 is after c1 starting from loop level 0, (before the loop level 0, c0 and c1 have the same order).

c0.after(c1, 1) would create a schedule that generates the following code

for (i=0; i<N; i++)
    for (j=0; j<N; j++) {
        c1;
        c0;
    }

This means that c0 is after c1 starting from loop level 1, (before the loop level 1, c0 and c1 have the same order).

Definition at line 3990 of file core.h.

const int tiramisu::computation::root_dimension = -1

static

Equivalent of computation::root but to be used with scheduling functions that take loop level (integers) as input instead of tiramisu::var.

Definition at line 3997 of file core.h.

isl_ast_expr* tiramisu::computation::wait_index_expr

protected

An index expression just for the request buffer.

Definition at line 2284 of file core.h.

The documentation for this class was generated from the following file:

core.h

Public Member Functions

Static Public Member Functions

Static Public Attributes

Protected Member Functions

Protected Attributes

Detailed Description

Constructor & Destructor Documentation

Member Function Documentation

Member Data Documentation