month_from_tape (grib2)ΒΆ

#!/usr/bin/env python3

# Extract 20CRv3 data for a month - from tape to $SCRATCH

import os
import sys
import subprocess
import tempfile
import datetime
from shutil import copyfile

# Version and month
import argparse

parser = argparse.ArgumentParser()
parser.add_argument("--startyear", help="Year run started", type=int, required=True)
parser.add_argument("--year", help="Year to extract", type=int, required=True)
parser.add_argument("--month", help="Month to extract", type=int, required=True)
parser.add_argument("--version", help="Month to extract", type=int, default=451)
args = parser.parse_args()

# Location of Output files (on hsi)
originals_directory = (
    "/home/projects/incite11/ensda_" + "v%03d_archive_grb2_monthly" % args.version
)
# Where to put the grib (and obs) files retrieved from hsi
working_directory = "%s/20CRv3.working/ensda_%04d/%04d/%02d" % (
    os.getenv("SCRATCH"),
    args.startyear,
    args.year,
    args.month,
)
if not os.path.isdir(working_directory):
    os.makedirs(working_directory)

# Get the list of start years
proc = subprocess.Popen(
    "hsi ls -l %s" % originals_directory,
    stdout=subprocess.PIPE,
    stderr=subprocess.PIPE,
    shell=True,
)
(out, err) = proc.communicate()
start_years = err.decode("utf8").split()  # Why does hsi put output in stderr?
start_years = [x for x in start_years if "ensda_%03d" % args.version in x]
start_years = [int(x[-4:]) for x in start_years]
if args.startyear not in start_years:
    raise Exception("%04d is not as available start year" % args.startyear)

# Get the list of files available for the selected month
proc = subprocess.Popen(
    "hsi ls -l %s/ensda_%03d_%04d/%04d/%04d%02d_*.tar"
    % (
        originals_directory,
        args.version,
        args.startyear,
        args.year,
        args.year,
        args.month,
    ),
    stdout=subprocess.PIPE,
    stderr=subprocess.PIPE,
    shell=True,
)
(out, err) = proc.communicate()

file_list = err.decode("utf8").split()
obs_files = [x for x in file_list if "_psobs" in x]
if len(obs_files) < 1:
    raise Exception("Data not available")
anl_files = [x for x in file_list if "_pgrbanl_" in x]
if len(anl_files) < 80:
    raise Exception("Analysis data not complete")
fg_files = [x for x in file_list if "_pgrbfg_" in x]
if len(fg_files) < 80:
    raise Exception("Forecast data not complete")


# Have the list of files to retrieve - sort them into tape order
tfile = tempfile.NamedTemporaryFile(delete=False, mode="w")
for fn in obs_files + anl_files + fg_files:
    tfile.write(
        "%s/ensda_%03d_%04d/%04d/%s\n"
        % (originals_directory, args.version, args.startyear, args.year, fn)
    )
tfile.close()
sfile = tempfile.NamedTemporaryFile(delete=False, mode="w")
proc = subprocess.Popen(
    (
        "generate_sorted_list_for_hpss.py -i %s |"
        + "grep -v 'Generating sorted list from input file' > %s"
    )
    % (tfile.name, sfile.name),
    stdout=subprocess.PIPE,
    stderr=subprocess.PIPE,
    shell=True,
)
(out, err) = proc.communicate()

# Doesn't work - for now, assume success.
# if out is not None or err is not None:
# raise StandardError("Problem with hsi tape ordering")

# Run the retrieval - may take a long time
sfile.close()
sfile = open(sfile.name, "r")
tfile = open(tfile.name, "w")
tfile.write("lcd %s\n" % working_directory)
for line in sfile:
    tfile.write("cget %s" % line)
sfile.close()
tfile.close()
os.remove(sfile.name)
proc = subprocess.Popen("hsi < %s" % tfile.name, shell=True)
(out, err) = proc.communicate()
os.remove(tfile.name)

# Check that the files retrieved, and untar
for file in obs_files + anl_files + fg_files:
    fpn = "%s/%s" % (working_directory, file)
    if not os.path.isfile(fpn):
        raise Exception("Missing file %s" % fpn)

    proc = subprocess.Popen("cd %s; tar xf %s" % (working_directory, file), shell=True)
    (out, err) = proc.communicate()
    if out is not None or err is not None:
        raise Exception("Failed to untar %s/%s" % (working_directory, file))