Data from EMULATE¶
As part of the work to make the EMULATE mean-sea-level pressure dataset, some observations from the Daily Weather Reports were (commercially) transcribed in the early 2000’s. This transcription project covered the period 1856-1881, but only transcribed part of the observations: only pressure and pressure tendencies were transcribed, and only from selected stations. These data were not actually used in EMULATE, and this is the first time they have been released.
The transcriptions were collected into spreadsheet files:
"DAILY WEATHER REPORTS (1861-1881)",,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,
1861,January,February,March,April,May,June,July,August,September,October,November,December,,,
,,,,,,,,,,,,,,,
1,29.59,29.73,29.25,29.94,30.16,29.75,30.03,29.82,-9999,29.51,29.3,-9999,"1 August 1861 - The first ""forecast"" for the general public Reputedly Robert Fitzroy coined the word ""forecast"".",,
2,30.23,30.37,29.54,29.92,30.35,-9999,29.73,29.73,29.71,29.64,29.35,30.28,"The first storm warning had been issued in February 1861.",,
3,30.33,-9999,-9999,29.86,29.99,30.1,-9999,29.32,29.49,30.12,-9999,29.99,,,
4,30.21,29.24,29.67,29.85,30.3,30.09,-9999,-9999,29.53,30.22,29.61,30,,,
5,30.15,29.35,29.76,29.9,-9999,30.04,-9999,29.53,29.83,29.91,29.35,29.55,,,
6,-9999,20.04,28.99,30.16,30.13,30.18,29.37,29.65,29.53,-9999,29.38,29.56,,,
7,30.19,28.92,29.71,-9999,30.08,30.13,-9999,29.72,29.63,29.96,29.19,28.99,,,
8,-9999,29.69,29.5,30.48,29.92,30.04,29.73,29.27,-9999,29.65,29.41,-9999,,,
9,29.99,30.12,30.012,30.53,29.61,-9999,29.77,29.6,29.75,29.68,29.51,29.44,,,
10,30.27,-9999,-9999,30.29,29.81,29.79,29.68,29.87,29.93,29.95,-9999,29.65,,,
11,30.05,30.27,28.92,30.36,29.89,29.84,29.59,-9999,29.93,29.33,29.11,29.65,,,
12,29.69,29.75,29.11,30.5,-9999,29.73,29.58,29.45,29.98,29.39,29.46,29.36,,,
13,-9999,29.62,29.81,30.39,30.16,30.09,29.78,29.71,29.78,-9999,29.71,29.19,,,
14,29.06,29.89,29.93,-9999,30.31,30.25,-9999,29.98,29.15,29.79,29.41,29.47,,,
15,30.31,29.54,29.62,30.43,30.25,30.25,29.68,29.48,-9999,30.18,29.51,-9999,,,
16,30.39,29.44,29.56,30.44,30.14,-9999,29.53,29.62,29.9,30.29,29.75,30.11,,,
17,30.39,-9999,-9999,30.42,30.19,30.19,29.58,29.71,30.08,30.33,-9999,29.98,,,
18,30.21,29.69,28.8,30.26,30.3,30.14,29.33,-9999,30.1,30.22,30.22,30.06,,,
19,30.01,29.6,28.79,30.32,-9999,30.1,29.38,29.54,30.08,29.93,29.89,30.5,,,
20,-9999,29.46,29.34,30.23,30.24,29.94,29.53,29.63,29.61,-9999,29.77,30.46,,,
21,30.09,28.97,28.87,-9999,30.17,29.96,-9999,29.99,29.66,29.84,29.41,30.36,,,
22,30.16,29.01,29.35,29.86,30.06,29.94,29.58,30.07,-9999,29.81,29.08,-9999,,,
23,29.99,29.52,29.36,29.85,29.88,-9999,29.33,29.73,28.93,29.87,29.25,30.48,,,
24,29.69,-9999,-9999,29.91,29.84,29.65,29.63,29.64,29.09,29.79,-9999,30.43,,,
25,29.43,30.31,29.7,29.79,29.42,29.69,29.33,-9999,29.37,30.1,29.09,-9999,,,
26,29.79,30.03,29.45,30.06,-9999,29.63,29.5,29.99,29.61,30.38,29.14,30.32,,,
27,-9999,29.72,29.43,30.19,29.81,29.78,29.58,30.02,29.83,-9999,29.47,30.59,,,
28,29.95,29.49,29.45,-9999,30.14,29.78,-9999,29.88,29.53,30.36,29.66,30.54,,,
29,29.97,,29.51,30.12,29.92,29.67,29.83,29.63,-9999,30.2,29.35,-9999,,,
30,29.82,,29.45,30.15,29.88,,29.43,29.68,29.95,29.94,29.32,30.56,,,
31,29.76,,,,29.93,,29.6,30.12,,29.63,,30.41,,,
,,,,,,,,,,,,,,,
One complication with the DWR is that exact observation locations are not given, only approximate locations (e.g. ‘Aberdeen’). For MSLP, observations exact locations are not vital, so a nominal location has been assigned for each station.
To make the data easily useable, the station files have been converted into the same monthly data format used for weatherrescue.org data; assigning latitudes, longitudes, and times in the process.
#!/usr/bin/env python
# Convert the digitised data from Rob's Emulate files to the format
# Ed Hawkins is using.
import os
import os.path
import pandas
import numpy
import datetime
import calendar
import sys
import re
# Get script directory
sd=os.path.dirname(os.path.abspath(__file__))
# Load the Station names and locations
md=pandas.read_csv("%s/../../Lisa_storms/metadata/names.csv" % sd,
header=None)
# Get station name from file name
def get_station_name(file_name):
sn=os.path.basename(file_name).split('.')
if sn[0]=='DWR_stations':
return sn[1].upper()
return sn[0].upper()
# The old Emulate data is in several different formats.
# These files are in one-ob-per-day format
Files_opd=(
'Alexandria.Alexandria.csv',
'DWR_stations.Aberdeen.csv',
'DWR_stations.Algiers.csv',
'DWR_stations.Angra.csv',
'DWR_stations.Biskra.csv',
'DWR_stations.Brest.csv',
'DWR_stations.Funchal.csv',
'DWR_stations.Galway.csv',
'DWR_stations.Greencastle.csv', # Changes to TPD in 1868
'DWR_stations.Holyhead.csv',
'DWR_stations.La Coruna.csv',
'DWR_stations.Liverpool.csv',
'DWR_stations.Mogador.csv',
'DWR_stations.Nairn.csv', # Changes from TPD in 1871
'DWR_stations.Pembroke.csv',
'DWR_stations.Plymouth.csv',
'DWR_stations.Portsmouth.csv',
'DWR_stations.Rochefort.csv', # Changes to TPD in 1871
'DWR_stations.Roches Pt.csv',
'DWR_stations.Scilly Islands.csv',
'DWR_stations.Toulon.csv',
'DWR_stations.Valentia.csv',
'Lesina_Split.Sheet1.csv',
'Lisbon.Lisbon.csv',
'Lyons.Lyons.csv',
'Rome.Rome.csv',
'Sibiu.Funchal.csv',
'Stornaway.Stornaway.csv',
)
# These files are in two-ob-per-day format
Files_tpd=(
'DWR_stations.Nairn.csv', # Changes to OPD in 1871
'DWR_stations.Penzance.csv',
'DWR_stations.Scarborough.csv',
'DWR_stations.Greencastle.csv', # Changes from OPD in 1868
'DWR_stations.Rochefort.csv', # Changes from OPD in 1871
)
def load_from_file_opd(file_name):
station=get_station_name(file_name)
csv=pandas.read_csv(file_name,header=None)
Hours={
'Lyons': 9,
'Alexandria': 9,
'Sibu': 8,
'Stornoway': 9,
'Brest': 8, # Change to 18 previous day on 1875-04-01
'Rochefort': 8,
'Mogador': 7,
'Biskra': 7,
}
# Hour (gmt) of the daily ob.
def obs_hour(station,year,month):
if station=='Brest':
if year>1875 or (year==1875 and month>3):
return -6
return Hours.get(station,8)
# Find the index numbers starting the data for a year
# (Lines with 'January' in the second column)
def find_year_starts(tab):
return tab[tab.iloc[:,1].str.contains('January',na=False)].index.values
# Extract a year's worth of data given the start location
def get_data_for_year(station,tab,istart):
dates=[]
pressures=[]
year=int(tab.iloc[istart,0])
for month in range(1,13):
hour=obs_hour(station,year,month)
lastday=calendar.monthrange(year,month)[1]
for day in range(1,lastday+1):
pre=tab.iloc[istart+day+1,month]
try:
pre=float(pre)
if numpy.isnan(pre): continue
if pre<0: continue # -9999 - missing
if pre<35: pre=pre*33.8639 # InHg
if pre<800: pre=pre*1.33 # mmHg
except ValueError:
continue
dates.append(datetime.datetime(year,month,day,0)+
datetime.timedelta(hours=hour))
pressures.append(pre)
return {'dates':dates,'pressures':pressures}
year_i=find_year_starts(csv)
file_data={}
for idx in year_i:
year=int(csv.iloc[idx,0])
file_data["%04d" % year]=get_data_for_year(station,csv,idx)
return file_data
def load_from_file_tpd(file_name):
station=get_station_name(file_name)
csv=pandas.read_csv(file_name,header=None)
# Find the index numbers starting the data for a year
# (Lines with 'January' in the third column)
def find_year_starts(tab):
return tab[tab.iloc[:,2].str.contains('January',na=False)].index.values
# Convert '2pm' into 14'
def hour_from_string(hrst):
try:
hour=int(re.findall('[0-9]+', hrst)[0])
except IndexError:
return None
if len(re.findall('pm', hrst.lower())) !=0:
hour=hour+12
if hour==24: hour=12
return hour
# Extract a year's worth of data given the start location
def get_data_for_year(station,tab,istart):
dates=[]
pressures=[]
year=int(tab.iloc[istart,0])
for month in range(1,13):
lastday=calendar.monthrange(year,month)[1]
for day in range(1,lastday+1):
for hri in (0,1):
if hri==0:
pre=tab.iloc[istart+day*2,month+1]
else:
pre=tab.iloc[istart+day*2+1,month+1]
try:
pre=float(pre)
if numpy.isnan(pre): continue
if pre<0: continue # -9999 - missing
if pre<35: pre=pre*33.8639 # InHg
if pre<800: pre=pre*1.33 # mmHg
except ValueError:
continue
if hri==0:
hour=hour_from_string(tab.iloc[istart+day*2,1])
if hour is None: continue
dates.append(datetime.datetime(year,month,day,hour))
else:
hour=hour_from_string(tab.iloc[istart+day*2+1,1])
if hour is None: continue
dates.append(datetime.datetime(year,month,day,hour))
pressures.append(pre)
return {'dates':dates,'pressures':pressures}
year_i=find_year_starts(csv)
file_data={}
for idx in year_i:
year=int(csv.iloc[idx,0])
file_data["%04d" % year]=get_data_for_year(station,csv,idx)
return file_data
# Specific functions for custom formats
def load_from_file_teneriffe(file_name):
csv=pandas.read_csv(file_name,header=None)
file_data={}
dc=csv[csv.iloc[:,0].str.contains('\d\d\d\d',na=False,regex=True)].index.values
for idx in dc:
try:
year = int(csv.iloc[idx,0])
month = int(csv.iloc[idx,1])
day = int(csv.iloc[idx,2])
hour = int(csv.iloc[idx,3])
minute = int(csv.iloc[idx,4])
except ValueError: continue # Missing date
try:
pre=float(csv.iloc[idx,5])
if numpy.isnan(pre): continue
if pre<0: continue # -9999 - missing
if pre<35: pre=pre*33.8639 # InHg
if pre<800: pre=pre*1.33 # mmHg
except ValueError: continue # Missing pressure
if "%04d" % year not in file_data:
file_data["%04d" % year]={}
if 'dates' not in file_data["%04d" % year]:
file_data["%04d" % year]['dates']=[]
file_data["%04d" % year]['pressures']=[]
file_data["%04d" % year]['dates'].append(
datetime.datetime(year,month,day,hour))
file_data["%04d" % year]['pressures'].append(pre)
return file_data
def load_from_file_middle_east(file_name,column):
csv=pandas.read_csv(file_name,header=None)
file_data={}
dc=csv[csv.iloc[:,0].str.contains('\d\d\d\d',na=False,regex=True)].index.values
for idx in dc:
try:
year = int(csv.iloc[idx,0])
month = int(csv.iloc[idx,1])
day = int(csv.iloc[idx,2])
hour = 8
minute = 0
except ValueError: continue # Missing date
try:
pre=float(csv.iloc[idx,column])
if numpy.isnan(pre): continue
if pre<0: continue # -9999 - missing
if pre<35: pre=pre*33.8639 # InHg
if pre<800: pre=pre*1.33 # mmHg
except ValueError: continue # Missing pressure
if "%04d" % year not in file_data:
file_data["%04d" % year]={}
if 'dates' not in file_data["%04d" % year]:
file_data["%04d" % year]['dates']=[]
file_data["%04d" % year]['pressures']=[]
file_data["%04d" % year]['dates'].append(
datetime.datetime(year,month,day,hour))
file_data["%04d" % year]['pressures'].append(pre)
return file_data
# Add a station's data to the output files
def output_station(name,sdata):
LastF=''
Of=None
opfile=None
mdl=md[md.iloc[:,0].str.lower()==name.lower()]
if mdl.empty:
raise Exception("No station %s in metadata" %
name)
for syr in list(sdata.keys()):
for idx in range(len(sdata[syr]['dates'])):
Of=("%s/../../data_from_Emulate/%04d/%02d/prmsl.txt" %
(sd,sdata[syr]['dates'][idx].year,
sdata[syr]['dates'][idx].month))
if Of!=LastF:
dn=os.path.dirname(Of)
if not os.path.isdir(dn):
os.makedirs(dn)
if opfile is not None:
opfile.close()
if os.path.getsize(LastF)==0:
os.remove(Of)
dn=os.path.dirname(LastF)
if not os.listdir(dn):
os.rmdir(dn)
opfile=open(Of, "a")
LastF=Of
opfile.write(("%04d %02d %02d %02d %02d %6.2f "+
"%7.2f %6.1f %16s\n") %
(sdata[syr]['dates'][idx].year,
sdata[syr]['dates'][idx].month,
sdata[syr]['dates'][idx].day,
sdata[syr]['dates'][idx].hour,
sdata[syr]['dates'][idx].minute,
mdl.iloc[0,2],mdl.iloc[0,3], #latlon
sdata[syr]['pressures'][idx], # ob value
mdl.iloc[0,1])) # name
if opfile is not None:
opfile.close()
# Load all the data
emulate_data={}
Files=os.listdir("%s/../original_data_csv" % sd)
if True:
for file_name in Files:
station_name=get_station_name(file_name)
# Once-per-day data
if file_name in Files_opd:
print(station_name)
if station_name not in emulate_data:
emulate_data[station_name]={}
emulate_data[station_name].update(load_from_file_opd(
"%s/../original_data_csv/%s" % (sd,file_name)))
# Twice_per_day data
if file_name in Files_tpd:
print(station_name)
if station_name not in emulate_data:
emulate_data[station_name]={}
emulate_data[station_name].update(load_from_file_tpd(
"%s/../original_data_csv/%s" % (sd,file_name)))
# Special cases
print('TENERIFE')
emulate_data['TENERIFE']=load_from_file_teneriffe(
"%s/../original_data_csv/Tenerife.Teneriffe.csv" % sd)
print('BAGHDAD')
emulate_data['BAGHDAD']=load_from_file_middle_east(
"%s/../original_data_csv/ME_daily_pressure.ME_daily_pressure.csv" % sd,3)
print('BASRA')
emulate_data['BASRA']=load_from_file_middle_east(
"%s/../original_data_csv/ME_daily_pressure.ME_daily_pressure.csv" % sd,4)
print('DIYARBAKIR')
emulate_data['DIYARBAKIR']=load_from_file_middle_east(
"%s/../original_data_csv/ME_daily_pressure.ME_daily_pressure.csv" % sd,5)
print('BEIRUT')
emulate_data['BEIRUT']=load_from_file_middle_east(
"%s/../original_data_csv/ME_daily_pressure.ME_daily_pressure.csv" % sd,6)
# Output the data in new format
for station in list(emulate_data.keys()):
output_station(station,emulate_data[station])