month_from_tape (grib1)ΒΆ

#!/usr/bin/env python3

# Extract 20CRv3 data for a month - from tape to $SCRATCH

import os
import sys
import subprocess
import tempfile
import datetime
from shutil import copyfile

# Version and month
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--startyear", help="Year run started",
                    type=int, required=True)
parser.add_argument("--year", help="Year to extract",
                    type=int, required=True)
parser.add_argument("--month", help="Month to extract",
                    type=int, required=True)
parser.add_argument("--version", help="Month to extract",
                    type=int,default=451)
args = parser.parse_args()

# Location of Output files (on hsi)
originals_directory="/home/projects/incite11/ensda_"+\
                    "v%03d_archive_orig" % args.version
# Where to put the grib (and obs) files retrieved from hsi
working_directory="%s/20CRv3.working.orig/ensda_%04d/%04d/%02d" % (
                   os.getenv('SCRATCH'),args.startyear,
                   args.year,args.month)
if not os.path.isdir(working_directory):
    os.makedirs(working_directory)

# Get the list of start years
proc = subprocess.Popen("hsi ls -l %s" % originals_directory,
                    stdout=subprocess.PIPE, 
                    stderr=subprocess.PIPE, shell=True)
(out, err) = proc.communicate()
start_years=err.decode('utf8').split() # Why does hsi put output in stderr?
start_years=[x for x in start_years if 'ensda_%03d' % args.version in x]
start_years=[int(x[-4:]) for x in start_years]
if args.startyear not in start_years:
    raise Exception(
      "%04d is not as available start year" % args.startyear)

# Get the list of files available for the selected month
proc = subprocess.Popen("hsi ls -l %s/ensda_%03d_%04d/%04d%02d*.tar" % (
                    originals_directory,args.version,
                    args.startyear,args.year,
                    args.month),
                    stdout=subprocess.PIPE, 
                    stderr=subprocess.PIPE, shell=True)
(out, err) = proc.communicate()
file_list=err.decode('utf8').split()
tar_files=[x for x in file_list if '%04d%02d' % (args.year,args.month) in x]
if len(tar_files)<100:
    print(file_list)
    raise Exception("Data not available")

# Have the list of files to retrieve - sort them into tape order
tfile=tempfile.NamedTemporaryFile(delete=False,mode='w')
for fn in tar_files:
    tfile.write("%s/ensda_%03d_%04d/%s\n" % (
                    originals_directory,args.version,
                    args.startyear,fn))
tfile.close()
sfile=tempfile.NamedTemporaryFile(delete=False,mode='w')
proc = subprocess.Popen("hpss_file_sorter.script %s > %s" % (
                         tfile.name,sfile.name),
                         stdout=subprocess.PIPE, 
                         stderr=subprocess.PIPE, shell=True)
(out, err) = proc.communicate()
# Doesn't work - for now, assume success.
#if out is not None or err is not None:
    #raise StandardError("Problem with hsi tape ordering")

# Run the retrieval - may take a long time
sfile.close()
sfile=open(sfile.name,'r')
tfile=open(tfile.name,'w')
tfile.write("lcd %s\n" % working_directory)
for line in sfile:
    tfile.write("cget %s" % line)
sfile.close()
tfile.close()
os.remove(sfile.name)
proc = subprocess.Popen("hsi < %s" % tfile.name, shell=True)
(out, err) = proc.communicate()
os.remove(tfile.name)

# Check that the files retrieved, and untar
for file in tar_files:
    fpn= "%s/%s" % (working_directory,file)
    if not os.path.isfile(fpn):
        raise Exception("Missing file %s" % fpn)
         
    proc = subprocess.Popen("cd %s; tar xf %s" % (
                            working_directory,file), shell=True)
    (out, err) = proc.communicate()
    if out is not None or err is not None:
        raise Exception("Failed to untar %s/%s" % (
                             working_directory,file))