GTiff mask with shapefile in python with gdal, ogr, etc

ksed picture ksed · May 20, 2014 · Viewed 9.6k times · Source

OK, After a bit of fiddling, I've tweaked a script from the site hyperlink in the second comment line. The purpose of the script is to clip/mask a LARGE raster (i.e. that cannot fit into a 32-bit Python 2.7.5 application) in GTiff format with a shapefile with multiple polygons (each with a "Name" record) and save the clipped rasters into a "clip" sub-directory, where each masked grid is named after each polygon's "Name". Like the original script, it assumes that the GTiff and shapefile are in the same projection and overlap correctly, and it processes ~100 masks/sec. However, I have modified that script to 1) work with a float-valued elevation grid, 2) only load the window of the larger grid into memory that is bounded by the current polygon (i.e. to reduce the memory load), 2) exports GTiff's that have the right (i.e. not shifted) geo-location and value.

HOWEVER, I having trouble with each masked grid having a what I'll call a "right-sided shadow". That is for every ~vertical line in a polygon where the right side of that line is outside of the given polygon, the masked grid will includes one raster cell to the right of that polygon-side.

Thus, my question is, what am I doing wrong that gives the masked grid a right-shadow?

I'll try to figure out how to post an example shapefile and tif so that others can reproduce. The code below also has comment lines for integer-valued satellite imagery (e.g. in as in the original code from geospatialpython.com).

# RasterClipper.py - clip a geospatial image using a shapefile
# http://geospatialpython.com/2011/02/clip-raster-using-shapefile.html
# http://gis.stackexchange.com/questions/57005/python-gdal-write-new-raster-using-projection-from-old

import os, sys, time, Tkinter as Tk, tkFileDialog
import operator
from osgeo import gdal, gdalnumeric, ogr, osr
import Image, ImageDraw

def SelectFile(req = 'Please select a file:', ft='txt'):
    """ Customizable file-selection dialogue window, returns list() = [full path, root path, and filename]. """
    try:    # Try to select a csv dataset
        foptions = dict(filetypes=[(ft+' file','*.'+ft)], defaultextension='.'+ft)
        root = Tk.Tk(); root.withdraw(); fname = tkFileDialog.askopenfilename(title=req, **foptions); root.destroy()
        return [fname]+list(os.path.split(fname))
    except: print "Error: {0}".format(sys.exc_info()[1]); time.sleep(5);  sys.exit()

def rnd(v, N): return int(round(v/float(N))*N)
def rnd2(v): return int(round(v))

# Raster image to clip
rname = SelectFile('Please select your TIF DEM:',ft='tif')
raster = rname[2]
print 'DEM:', raster
os.chdir(rname[1])

# Polygon shapefile used to clip
shp = SelectFile('Please select your shapefile of catchments (requires Name field):',ft='shp')[2]
print 'shp:', shp

qs = raw_input('Do you want to stretch the image? (y/n)')
qs = True if qs == 'y' else False

# Name of base clip raster file(s)
if not os.path.exists('./clip/'):   os.mkdir('./clip/')
output = "/clip/clip"

# This function will convert the rasterized clipper shapefile
# to a mask for use within GDAL.
def imageToArray(i):
    """
    Converts a Python Imaging Library array to a
    gdalnumeric image.
    """
    a=gdalnumeric.fromstring(i.tostring(),'b')
    a.shape=i.im.size[1], i.im.size[0]
    return a

def arrayToImage(a):
    """
    Converts a gdalnumeric array to a
    Python Imaging Library Image.
    """
    i=Image.fromstring('L',(a.shape[1],a.shape[0]), (a.astype('b')).tostring())
    return i

def world2Pixel(geoMatrix, x, y, N= 5, r=True):
    """
    Uses a gdal geomatrix (gdal.GetGeoTransform()) to calculate
    the pixel location of a geospatial coordinate
    """
    ulX = geoMatrix[0]
    ulY = geoMatrix[3]
    xDist = geoMatrix[1]
    yDist = geoMatrix[5]
    rtnX = geoMatrix[2]
    rtnY = geoMatrix[4]
    if r:
        pixel = int(round(x - ulX) / xDist)
        line = int(round(ulY - y) / xDist)
    else:
        pixel = int(rnd(x - ulX, N) / xDist)
        line = int(rnd(ulY - y, N) / xDist)
    return (pixel, line)

def histogram(a, bins=range(0,256)):
    """
    Histogram function for multi-dimensional array.
    a = array
    bins = range of numbers to match
    """
    fa = a.flat
    n = gdalnumeric.searchsorted(gdalnumeric.sort(fa), bins)
    n = gdalnumeric.concatenate([n, [len(fa)]])
    hist = n[1:]-n[:-1]
    return hist

def stretch(a):
    """
    Performs a histogram stretch on a gdalnumeric array image.
    """
    hist = histogram(a)
    im = arrayToImage(a)
    lut = []
    for b in range(0, len(hist), 256):
        # step size
        step = reduce(operator.add, hist[b:b+256]) / 255
        # create equalization lookup table
        n = 0
        for i in range(256):
            lut.append(n / step)
            n = n + hist[i+b]
    im = im.point(lut)
    return imageToArray(im)

# Also load as a gdal image to get geotransform
# (world file) info
srcImage = gdal.Open(raster)
geoTrans_src = srcImage.GetGeoTransform()
#print geoTrans_src
pxs = int(geoTrans_src[1])
srcband = srcImage.GetRasterBand(1)
ndv = -9999.0
#ndv = 0

# Create an OGR layer from a boundary shapefile
shapef = ogr.Open(shp)
lyr = shapef.GetLayer()
minXl, maxXl, minYl, maxYl = lyr.GetExtent()
ulXl, ulYl = world2Pixel(geoTrans_src, minXl, maxYl)
lrXl, lrYl = world2Pixel(geoTrans_src, maxXl, minYl)
#poly = lyr.GetNextFeature()
for poly in lyr:
    pnm = poly.GetField("Name")

    # Convert the layer extent to image pixel coordinates
    geom = poly.GetGeometryRef()
    #print geom.GetEnvelope()
    minX, maxX, minY, maxY = geom.GetEnvelope()

    geoTrans = geoTrans_src
    ulX, ulY = world2Pixel(geoTrans, minX, maxY)
    lrX, lrY = world2Pixel(geoTrans, maxX, minY)

    # Calculate the pixel size of the new image
    pxWidth = int(lrX - ulX)
    pxHeight = int(lrY - ulY)

    # Load the source data as a gdalnumeric array
    #srcArray = gdalnumeric.LoadFile(raster)
    clip = gdalnumeric.BandReadAsArray(srcband, xoff=ulX, yoff=ulY, win_xsize=pxWidth, win_ysize=pxHeight)
    #clip = srcArray[:, ulY:lrY, ulX:lrX]

    # Create a new geomatrix for the image
    geoTrans = list(geoTrans)
    geoTrans[0] = minX
    geoTrans[3] = maxY

    # Map points to pixels for drawing the
    # boundary on a blank 8-bit,
    # black and white, mask image.
    points = []
    pixels = []
    #geom = poly.GetGeometryRef()
    pts = geom.GetGeometryRef(0)
    for p in range(pts.GetPointCount()):
        points.append((pts.GetX(p), pts.GetY(p)))
    for p in points:
        pixels.append(world2Pixel(geoTrans, p[0], p[1]))
    rasterPoly = Image.new("L", (pxWidth, pxHeight), 1)
    rasterize = ImageDraw.Draw(rasterPoly)
    rasterize.polygon(pixels, 0)
    mask = imageToArray(rasterPoly)

    # Clip the image using the mask
    #clip = gdalnumeric.choose(mask, (clip, 0)).astype(gdalnumeric.uint8)
    clip = gdalnumeric.choose(mask, (clip, ndv)).astype(gdalnumeric.numpy.float)

    # This image has 3 bands so we stretch each one to make them
    # visually brighter
    #for i in range(3):
    #    clip[i,:,:] = stretch(clip[i,:,:])
    if qs:  clip[:,:] = stretch(clip[:,:])

    # Save ndvi as tiff
    outputi = rname[1]+output+'_'+pnm+'.tif'
    #gdalnumeric.SaveArray(clip, outputi, format="GTiff", prototype=srcImage)
    driver = gdal.GetDriverByName('GTiff')
    DataSet = driver.Create(outputi, pxWidth, pxHeight, 1, gdal.GDT_Float64)
    #DataSet = driver.Create(outputi, pxWidth, pxHeight, 1, gdal.GDT_Int32)
    DataSet.SetGeoTransform(geoTrans)
    Projection = osr.SpatialReference()
    Projection.ImportFromWkt(srcImage.GetProjectionRef())
    DataSet.SetProjection(Projection.ExportToWkt())
    # Write the array
    DataSet.GetRasterBand(1).WriteArray(clip)
    DataSet.GetRasterBand(1).SetNoDataValue(ndv)

    # Save ndvi as an 8-bit jpeg for an easy, quick preview
    #clip = clip.astype(gdalnumeric.uint8)
    #gdalnumeric.SaveArray(clip, rname[1]+outputi+'.jpg', format="JPEG")
    #print '\t\tSaved:', outputi, '-.tif, -.jpg'
    print 'Saved:', outputi
    del mask, clip, geom
    del driver, DataSet

del shapef, srcImage, srcband

Answer

Rutger Kassies picture Rutger Kassies · May 30, 2014

This functionality is already incorporated into the gdal command line utilities. Given your case, i don't see any reason why you want to do this yourself in Python.

You can loop over the geomerties with OGR and for each one call gdalwarp with the appropriate parameters.

import ogr
import subprocess

inraster = 'NE1_HR_LC_SR_W_DR\NE1_HR_LC_SR_W_DR.tif'
inshape = '110m_cultural\ne_110m_admin_0_countries_lakes.shp'

ds = ogr.Open(inshape)
lyr = ds.GetLayer(0)

lyr.ResetReading()
ft = lyr.GetNextFeature()

while ft:

    country_name = ft.GetFieldAsString('admin')

    outraster = inraster.replace('.tif', '_%s.tif' % country_name.replace(' ', '_'))    
    subprocess.call(['gdalwarp', inraster, outraster, '-cutline', inshape, 
                     '-crop_to_cutline', '-cwhere', "'admin'='%s'" % country_name])

    ft = lyr.GetNextFeature()

ds = None

I have used some example data from Natural Earth in the example above, for Brazil the cutout looks like:

enter image description here

If you only want to crop the image to the area of the polygon and dont mask anything outside you can transform the Shapefile so it contains the envelope of the polygons. Or simply loose the shapefile and call gdal_translate with -projwin to specify the area of interest.