The index to the articles in this series is found here.
So, we’re about ready for our next step. Converting those .gif files from the radar station into files holding no extraneous information, just precipitation data. So, that colour bar on the right can go, as can all the background. We want to encode those pixels so that any non-rain pixel has a value of 0, and rain pixels have numeric values that increase as the intensity of rain increases.
So, in earlier postings we created a background image. Now, for each radar image, we compare the background to the new image. Where pixels differ, if the colour in the new image can be found in a reference table of colours to rain intensities, then we record the appropriate ordinal of the table entry in the file.
I’ll record the results in a binary data file of my own. Because I’ll have to read these files from multiple scripts, it makes sense to create classes to manipulate them, and to design a file format that can verify that we’re not reading some random file not of my creation.
Here, then, is the manipulation package, called rpreddtypes.py:
#! /usr/bin/python3
import numpy as np
import gzip
# First, classes to manipulate the intermediate binary file.
#
# Format:
# RAIN PREDICTOR BIN FILE\n
# VERSION 1\n
# WIDTH NNN\n
# HEIGHT NNN\n
# XOFFSET NNN\n
# YOFFSET NNN\n
# <Binary blob of byte values>
class RpBinFileReadError(Exception):
def __init__(self, message):
self.message = message
class RpBinCommon:
HEADER_KEY = 'RAIN PREDICTOR BIN FILE'
VERSION_KEY = 'VERSION'
WIDTH_KEY = 'WIDTH'
HEIGHT_KEY = 'HEIGHT'
XOFFSET_KEY = 'XOFFSET'
YOFFSET_KEY = 'YOFFSET'
class RpBinReader(RpBinCommon):
"""
Reader for intermediate binary data type
"""
def __init__ (self,):
self.version = 0
self.buffer = b''
self.blen = 0
self.width = 0
self.height = 0
self.xoffset = 0
self.yoffset = 0
def read(self, filename):
with open(filename, 'rb') as istream:
try:
istream = open(filename, 'rb')
except OSError as ex:
raise RpBinFileReadError(ex.strerror)
header = istream.readline().rstrip().decode('ascii')
if header != self.HEADER_KEY:
raise RpBinFileReadError('File {0} is not a valid '
'file'.format(filename))
vstr, vnum = (istream.readline().rstrip()
.decode('ascii').split(" "))
if vstr != self.VERSION_KEY:
raise RpBinFileReadError('File {0} is not a valid '
'file'.format(filename))
if vnum != '1':
raise RpBinFileReadError('File {0} is version {1}'
'which is not supported'
'by this code'.format(filename,
vnum))
width, self.width = (istream.readline().rstrip()
.decode('ascii').split(" "))
self.width = int(self.width)
if width != self.WIDTH_KEY or self.width < 0:
raise RpBinFileReadError('File {0} is not a valid '
'file'.format(filename))
height, self.height = (istream.readline().rstrip()
.decode('ascii').split(" "))
self.height = int(self.height)
if width != self.WIDTH_KEY or self.height < 0:
raise RpBinFileReadError('File {0} is not a valid '
'file'.format(filename))
xoffset, self.xoffset = (istream.readline().rstrip()
.decode('ascii').split(" "))
self.xoffset = int(self.xoffset)
if xoffset != self.XOFFSET_KEY or self.xoffset < 0:
raise RpBinFileReadError('File {0} is not a valid '
'file'.format(filename))
yoffset, self.yoffset = (istream.readline().rstrip()
.decode('ascii').split(" "))
self.yoffset = int(self.yoffset)
if yoffset != self.YOFFSET_KEY or self.yoffset < 0:
raise RpBinFileReadError('File {0} is not a valid '
'file'.format(filename))
btmp = istream.read()
self.buffer = gzip.decompress(btmp)
self.blen = self.width * self.height
if self.blen != len(self.buffer):
raise RpBinFileReadError('File {0} is '
'corrupted'.format(filename))
def getVersion():
return self.version
def getWidth():
return self.width
def getHeight():
return self.height
def getXOffset():
return self.xoffset
def getYOffset():
return self.yoffset
def get1Dbuffer():
return self.buffer
def getNumpyArray():
array = np.arange(self.width * self.height, dtype=bool)
for i in range(self.width * self.height):
array[i] = self.buffer[i]
return array.reshape(self.width, self.height)
class RpBinWriter(RpBinCommon):
def __init__(self):
pass
def write(self, filename, width, height, xoffset, yoffset, values):
with open(filename, 'wb') as ofile:
ofile.write('{0}\n'.format(self.HEADER_KEY)
.encode('ascii'))
ofile.write('{0} {1}\n'.format(self.VERSION_KEY, 1)
.encode('ascii'))
ofile.write('{0} {1}\n'.format(self.WIDTH_KEY, width)
.encode('ascii'))
ofile.write('{0} {1}\n'.format(self.HEIGHT_KEY, height)
.encode('ascii'))
ofile.write('{0} {1}\n'.format(self.XOFFSET_KEY, xoffset)
.encode('ascii'))
ofile.write('{0} {1}\n'.format(self.YOFFSET_KEY, yoffset)
.encode('ascii'))
ofile.write(gzip.compress(bytearray(values)))
This will be used by our script that generates binary training data files from .gif files. That’s this one, make-rain-inputs.py:
#! /usr/bin/python3
# This script will take .gif files downloaded from the radar station
# and convert them to a simpler format for eventual use. It will
# contain only information about precipitation or its absence, in a
# set of integer steps.
import argparse
import sys
import gif
import rpreddtypes
parser = argparse.ArgumentParser(description='Extract '
'precipitation data.')
parser.add_argument('ifilenames', type=str,
metavar='filename', nargs='+',
help='Filenames to process')
parser.add_argument('--baseline', type=str,
dest='baseline',
help='The baseline .gif file')
parser.add_argument('--width', type=int, dest='owidth',
default=-1,
help='The width of the sub-rectangle '
'that is to be output')
parser.add_argument('--height', type=int, dest='oheight',
default=-1,
help='The height of the sub-rectangle '
'that is to be output')
parser.add_argument('--top-left-x', type=int, dest='offsetx',
default=0,
help='The x-value of the upper left '
'of the sub-rectangle that is to be output')
parser.add_argument('--top-left-y', type=int, dest='offsety',
default=0,
help='The y-value of the upper left '
'of the sub-rectangle that is to be output')
parser.add_argument('--override-intensities', type=list,
dest='intensities',
default=[0x99ccff, 0x0099ff, 0x00ff66,
0x00cc00, 0x009900, 0x006600,
0xffff33, 0xffcc00, 0xff9900,
0xff6600, 0xff0000, 0xff0299,
0x9933cc, 0x660099],
help='Override the colour codes for '
'intensities')
parser.add_argument('--verbose', type=bool, dest='verbose',
default = False,
help='Extra output during processing')
args = parser.parse_args()
if not args.baseline:
print ('A baseline comparison file must be supplied '
'with the --baseline argument')
sys.exit(1)
baselineReader = gif.Reader()
bfile = open(args.baseline, 'rb')
baselineReader.feed(bfile.read())
bfile.close()
if ( not baselineReader.is_complete()
or not baselineReader.has_screen_descriptor() ):
print ('Failed to parse {0} as a '
'.gif file'.format(args.baseline))
sys.exit(1)
baselineBuffer = baselineReader.blocks[0].get_pixels()
baselineColours = baselineReader.color_table
baselineWidth = baselineReader.width
baselineHeight = baselineReader.height
newwidth = baselineWidth
if args.owidth != -1:
newwidth = args.owidth
newheight = baselineHeight
if args.oheight != -1:
newheight = args.oheight
xoffset = args.offsetx
yoffset = args.offsety
for ifile in args.ifilenames:
convertReader = gif.Reader()
cfile = open(ifile, 'rb')
convertReader.feed(cfile.read())
cfile.close()
if ( not convertReader.is_complete()
or not convertReader.has_screen_descriptor() ):
print ('Failed to parse {0} as a '
'.gif file'.format(ifile))
sys.exit(1)
if ( len(convertReader.blocks) != 2
or not isinstance(convertReader.blocks[0], gif.Image)
or not isinstance(convertReader.blocks[1], gif.Trailer)):
print ('While processing file: {}'.format(ifile))
print ('The code only accepts input files with a single block of '
'type Image followed by one of type Trailer. This '
'constraint has not been met, the code will have to be '
'changed to handle the more complicated case.')
sys.exit(1)
convertBuffer = convertReader.blocks[0].get_pixels()
convertColours = convertReader.color_table
convertWidth = convertReader.width
convertHeight = convertReader.height
if baselineWidth != convertWidth or baselineHeight != convertHeight:
print('The baseline file ({0}) and the file to convert {1} '
'have incompatible dimensions'.format(args.baseline,
ifile))
sys.exit(1)
output_block = []
for pixel in range(len(baselineBuffer)):
row = pixel // baselineWidth
col = pixel % baselineWidth
if row < yoffset:
continue
if row >= yoffset + newheight:
break
if col < xoffset or col >= xoffset + newwidth:
continue
if pixel >= len(convertBuffer):
output_block.append(0)
continue
btuple = baselineColours[baselineBuffer[pixel]]
ctuple = convertColours[convertBuffer[pixel]]
if btuple == ctuple:
output_block.append(0)
else:
code = ( ctuple[0] * 256 * 256
+ ctuple[1] * 256
+ ctuple[2] )
appendval = 0
for i in range(len(args.intensities)):
if code == args.intensities[i]:
appendval = i+1
break
output_block.append(appendval)
newfilename = ifile + '.bin'
writer = rpreddtypes.RpBinWriter()
writer.write(newfilename, newwidth, newheight, xoffset, yoffset,
output_block)
if (args.verbose):
print('Wrote output file: {0}'.format(newfilename))
In the next posting we’ll need a script to generate the true values. That is, for each file, whether it indicates rain in Ottawa, and whether that rain is heavy. We’ll also want to attach a sequence number to each file, allowing us to know when we have continuous runs of data that we need to make a single training entry.
Update #1: Fixed a bug that resulted in rainfall in the lowest intensity bin not being recorded.
UPDATE #2 (2019-08-23): Included a link to an index page of articles in this series.