Jupyter Notebooks

For interactively exploring data in a Data Cube, we recommend using Jupyter Notebooks.

Standalone Tools

The Data Cube software comes with several tools that can be used for data exploration and exporting, without writing any code.

  • datacube (see datacube --help after installation)
  • pixeldrill
  • movie_generator

Code Recipes

Display an RGB Image

Uses datacube.Datacube datacube.model.CRS datacube.Datacube.load() xarray.Dataset.to_array() xarray.DataArray.transpose() xarray.DataArray.where() xarray.DataArray.all() xarray.plot.imshow()

import datacube
from datacube.storage.masking import mask_valid_data as mask_invalid_data

query = {
    'time': ('1990-01-01', '1991-01-01'),
    'lat': (-35.2, -35.4),
    'lon': (149.0, 149.2),

dc = datacube.Datacube(app='plot-rgb-recipe')
data = dc.load(product='ls5_nbar_albers', measurements=['red', 'green', 'blue'], **query)
data = mask_invalid_data(data)

fake_saturation = 4000
rgb = data.to_array(dim='color')
rgb = rgb.transpose(*(rgb.dims[1:]+rgb.dims[:1]))  # make 'color' the last dimension
rgb = rgb.where((rgb <= fake_saturation).all(dim='color'))  # mask out pixels where any band is 'saturated'
rgb /= fake_saturation  # scale to [0, 1] range for imshow

rgb.plot.imshow(x=data.crs.dimensions[1], y=data.crs.dimensions[0],
                col='time', col_wrap=5, add_colorbar=False)

Multi-Product Time Series

Uses datacube.Datacube datacube.model.DatasetType datacube.index._datasets.DatasetTypeResource.get_by_name() datacube.Datacube.load() xarray.Dataset.isel() xarray.concat()

import numpy
import xarray
import datacube

query = {
    'time': ('2013-01-01', '2014-01-01'),
    'lat': (-35.2, -35.4),
    'lon': (149.0, 149.2),

products = ['ls7_nbar_albers', 'ls8_nbar_albers']

dc = datacube.Datacube(app='multi-prod-recipe')

# find similarly named measurements
measurements = set(dc.index.products.get_by_name(products[0]).measurements.keys())
for prod in products[1:]:

datasets = []
for prod in products:
    ds = dc.load(product=prod, measurements=measurements, **query)
    ds['product'] = ('time', numpy.repeat(prod, ds.time.size))

combined = xarray.concat(datasets, dim='time')
combined = combined.isel(time=combined.time.argsort())  # sort along time dim

Polygon Drill

Uses datacube.Datacube xarray.DataArray datacube.model.CRS datacube.Datacube.load() xarray.Dataset.where()

import fiona
import rasterio.features

import datacube
from datacube.utils import geometry

def geometry_mask(geoms, geobox, all_touched=False, invert=False):
    Create a mask from shapes.

    By default, mask is intended for use as a
    numpy mask, where pixels that overlap shapes are False.
    :param list[Geometry] geoms: geometries to be rasterized
    :param datacube.utils.GeoBox geobox:
    :param bool all_touched: If True, all pixels touched by geometries will be burned in. If
                             false, only pixels whose center is within the polygon or that
                             are selected by Bresenham's line algorithm will be burned in.
    :param bool invert: If True, mask will be True for pixels that overlap shapes.
    return rasterio.features.geometry_mask([geom.to_crs(geobox.crs) for geom in geoms],

def main():
    shape_file = 'my_shape_file.shp'
    with fiona.open(shape_file) as shapes:
        crs = geometry.CRS(shapes.crs_wkt)
        first_geometry = next(iter(shapes))['geometry']
        geom = geometry.Geometry(first_geometry, crs=crs)

    query = {
        'time': ('1990-01-01', '1991-01-01'),
        'geopolygon': geom

    dc = datacube.Datacube(app='poly-drill-recipe')
    data = dc.load(product='ls5_nbar_albers', measurements=['red'], **query)

    mask = geometry_mask([geom], data.geobox, invert=True)
    data = data.where(mask)

    data.red.plot.imshow(col='time', col_wrap=5)

Line Transect

Uses datacube.Datacube xarray.DataArray datacube.model.CRS datacube.Datacube.load() xarray.Dataset.sel_points()

import fiona
import numpy
import xarray

import datacube
from datacube.utils import geometry

def transect(data, line, resolution, method='nearest', tolerance=None):
    Extract line transect from data along geom

    :param xarray.Dataset data: data loaded via `Datacube.load`
    :param datacube.utils.Geometry line: line along which to extract the transect
    :param float resolution: interval used to extract points along the line (in data CRS units)
    :param str method: see xarray.Dataset.sel_points
    :param float tolerance: see xarray.Dataset.sel_points
    assert line.type == 'LineString'
    line = line.to_crs(data.crs)
    dist = numpy.arange(0, line.length, resolution)
    points = [line.interpolate(d).coords[0] for d in dist]
    indexers = {
        data.crs.dimensions[0]: [p[1] for p in points],
        data.crs.dimensions[1]: [p[0] for p in points]
    return data.sel_points(xarray.DataArray(dist, name='distance', dims=['distance']),

def main():
    with fiona.open('line.shp') as shapes:
        crs = geometry.CRS(shapes.crs_wkt)
        first_geometry = next(shapes)['geometry']
        line = geometry.Geometry(first_geometry, crs=crs)

    query = {
        'time': ('1990-01-01', '1991-01-01'),
        'geopolygon': line

    dc = datacube.Datacube(app='line-trans-recipe')
    data = dc.load(product='ls5_nbar_albers', measurements=['red'], **query)

    trans = transect(data, line, abs(data.affine.a))
    trans.red.plot(x='distance', y='time')