Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion devito/core/cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,6 @@ def _specialize_clusters(cls, clusters, **kwargs):
# Reduce flops
clusters = cire(clusters, 'sops', sregistry, options, platform)
clusters = factorize(clusters, **kwargs)
clusters = optimize_pows(clusters)

# The previous passes may have created fusion opportunities
clusters = fuse(clusters)
Expand Down
1 change: 0 additions & 1 deletion devito/core/gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,6 @@ def _specialize_clusters(cls, clusters, **kwargs):
# Reduce flops
clusters = cire(clusters, 'sops', sregistry, options, platform)
clusters = factorize(clusters, **kwargs)
clusters = optimize_pows(clusters)

# The previous passes may have created fusion opportunities
clusters = fuse(clusters)
Expand Down
7 changes: 6 additions & 1 deletion devito/operator/operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@
from devito.parameters import configuration
from devito.passes import (
Graph, lower_index_derivatives, generate_implicit, generate_macros,
minimize_symbols, unevaluate, error_mapper, is_on_device, lower_dtypes
minimize_symbols, optimize_pows, unevaluate, error_mapper, is_on_device,
lower_dtypes
)
from devito.symbolics import estimate_cost, subs_op_args
from devito.tools import (DAG, OrderedSet, Signer, ReducerMap, as_mapper, as_tuple,
Expand Down Expand Up @@ -409,6 +410,10 @@ def _lower_clusters(cls, expressions, profiler=None, **kwargs):
# Lower all remaining high order symbolic objects
clusters = lower_index_derivatives(clusters, **kwargs)

# Turn pows into multiplications. This must happen as late as possible
# in the compilation process to maximize the optimization potential
clusters = optimize_pows(clusters)

# Make sure no reconstructions can unpick any of the symbolic
# optimizations performed so far
clusters = unevaluate(clusters)
Expand Down
4 changes: 3 additions & 1 deletion devito/passes/clusters/aliases.py
Original file line number Diff line number Diff line change
Expand Up @@ -525,6 +525,7 @@ def collect(extracted, ispace, minstorage):
k = group.dimensions_translated
else:
k = group.dimensions
k = frozenset(d for d in k if not d.is_NonlinearDerived)
mapper.setdefault(k, []).append(group)

aliases = AliasList()
Expand Down Expand Up @@ -912,7 +913,8 @@ def lower_schedule(schedule, meta, sregistry, opt_ftemps, opt_min_dtype,
indices.append(i.dim - i.lower + s)

dtype = sympy_dtype(pivot, base=meta.dtype)
obj = make(name=name, dimensions=dimensions, halo=halo, dtype=dtype)
obj = make(name=name, dimensions=dimensions, halo=halo, dtype=dtype,
shift=shift)
expression = Eq(obj[indices], uxreplace(pivot, subs))

callback = lambda idx: obj[[i + s for i, s in zip(idx, shift)]]
Expand Down
15 changes: 14 additions & 1 deletion devito/types/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from devito.types import Array, CompositeObject, Indexed, Symbol, LocalObject
from devito.types.basic import IndexedData
from devito.tools import CustomDtype, Pickable, frozendict
from devito.tools import CustomDtype, Pickable, as_tuple, frozendict

__all__ = ['Timer', 'Pointer', 'VolatileInt', 'FIndexed', 'Wildcard', 'Fence',
'Global', 'Hyperplane', 'Indirection', 'Temp', 'TempArray', 'Jump',
Expand Down Expand Up @@ -235,12 +235,25 @@ class TempArray(Array):

is_autopaddable = True

__rkwargs__ = (Array.__rkwargs__ + ('shift',))

def __init_finalize__(self, *args, shift=None, **kwargs):
super().__init_finalize__(*args, **kwargs)

# An integer for each Dimension representing the shift applied to the halo
# for homogeneity reasons
self._shift = as_tuple(shift)

def __padding_setup__(self, **kwargs):
padding = kwargs.pop('padding', None)
if padding is None:
padding = self.__padding_setup_smart__(**kwargs)
return super().__padding_setup__(padding=padding, **kwargs)

@property
def shift(self):
return self._shift


class Fence:

Expand Down
72 changes: 18 additions & 54 deletions examples/performance/00_overview.ipynb

Large diffs are not rendered by default.

29 changes: 24 additions & 5 deletions tests/test_dse.py
Original file line number Diff line number Diff line change
Expand Up @@ -1745,7 +1745,7 @@ def g2_tilde(field, phi, theta):
assert len([i for i in FindSymbols().visit(bns['x0_blk0']) if i.is_Array]) == 7
assert len(FindNodes(VExpanded).visit(pbs['x0_blk0'])) == 3

@pytest.mark.parametrize('so_ops', [(4, 147), (8, 211)])
@pytest.mark.parametrize('so_ops', [(4, 146), (8, 210)])
@switchconfig(profiling='advanced')
def test_tti_J_akin_complete(self, so_ops):
grid = Grid(shape=(16, 16, 16))
Expand Down Expand Up @@ -2664,6 +2664,25 @@ def test_sparse_const(self):
op()
assert np.all(src.data == 8)

def test_space_and_time_invariant_together(self):
grid = Grid(shape=(34, 45, 50))

a = Function(name='a', grid=grid, space_order=8)
vx = TimeFunction(name='vx', grid=grid, space_order=8)
tzz = vx.func(name='tzz')

eqn = Eq(tzz.forward, a.dy.dz * (vx.dx.dy + vx.dx.dz) + tzz)

op = Operator(eqn, opt=('advanced', {'openmp': False}))

op.cfunction

assert_structure(
op,
['t,x0_blk0,y0_blk0,x,y,z', 't,x0_blk0,y0_blk0,x,y,z'],
'tx0_blk0y0_blk0xyzyz'
)


class TestIsoAcoustic:

Expand Down Expand Up @@ -2706,9 +2725,9 @@ def test_fullopt(self):
bns, _ = assert_blocking(op0, {})
bns, _ = assert_blocking(op1, {'x0_blk0'}) # due to loop blocking

assert summary0[('section0', None)].ops == 50
assert summary0[('section0', None)].ops == 55
assert summary0[('section1', None)].ops == 44
assert np.isclose(summary0[('section0', None)].oi, 2.851, atol=0.001)
assert np.isclose(summary0[('section0', None)].oi, 3.136, atol=0.001)

assert summary1[('section0', None)].ops == 31
assert summary1[('section1', None)].ops == 88
Expand Down Expand Up @@ -2760,7 +2779,7 @@ def tti_noopt(self):
# Make sure no opts were applied
op = wavesolver.op_fwd(False)
assert len(op._func_table) == 0
assert summary[('section0', None)].ops == 743
assert summary[('section0', None)].ops == 753

return v, rec

Expand Down Expand Up @@ -2846,7 +2865,7 @@ class TestTTIv2:

@switchconfig(profiling='advanced')
@pytest.mark.parametrize('space_order,expected', [
(4, 200), (12, 392)
(4, 190), (12, 382)
])
def test_opcounts(self, space_order, expected):
grid = Grid(shape=(3, 3, 3))
Expand Down
4 changes: 2 additions & 2 deletions tests/test_unexpansion.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ def test_v4(self):
'cire-mingain': 400}))

# Check code generation
assert op._profiler._sections['section1'].sops == 1443
assert op._profiler._sections['section1'].sops == 1442
assert_structure(op, ['x,y,z',
't,x0_blk0,y0_blk0,x,y,z',
't,x0_blk0,y0_blk0,x,y,z,i1',
Expand Down Expand Up @@ -431,7 +431,7 @@ def test_v1(self):
'openmp': False}))

# Check code generation
assert op._profiler._sections['section1'].sops == 191
assert op._profiler._sections['section1'].sops == 190
assert_structure(op, ['x,y,z',
't,x0_blk0,y0_blk0,x,y,z',
't,x0_blk0,y0_blk0,x,y,z,i0',
Expand Down
Loading