@@ -600,3 +600,123 @@ def _build_result(self, obj):
600600 """Build a product that opens the data using `xarray.open_dataset`."""
601601 return AWSProduct (obj ,
602602 lambda s : xr .open_dataset (s .url + '#mode=bytes' , engine = 'netcdf4' ))
603+
604+
605+ @exporter .export
606+ class MLWPArchive (S3DataStore ):
607+ """Access data from the NOAA/CIRA Machine-Learning Weather Prediction archive in AWS.
608+
609+ This consists of individual model runs stored in netCDF format, across a variety
610+ a collection of models (Aurora, FourCastNet, GraphCast, Pangu) and initial conditions
611+ (GFS or IFS).
612+
613+ """
614+
615+ _model_map = {'aurora' : 'AURO' , 'fourcastnet' : 'FOUR' ,
616+ 'graphcast' : 'GRAP' , 'pangu' : 'PANG' }
617+
618+ def __init__ (self ):
619+ super ().__init__ ('noaa-oar-mlwp-data' )
620+
621+ def _model_id (self , model , version , init ):
622+ """Build a model id from the model name, version, and initial conditions."""
623+ init = init or 'GFS'
624+ model = self ._model_map .get (model .lower (), model )
625+ if version is None :
626+ model_id = sorted (self .common_prefixes (model + '_' , '_' ))[- 1 ]
627+ else :
628+ version = str (version )
629+ if len (version ) < 3 :
630+ version = version + '00'
631+ model_id = f'{ model } _v{ version } _'
632+ return f'{ model_id } { init } '
633+
634+ def _build_key (self , model_id , dt , depth = None ):
635+ """Build a key for the bucket up to the desired point."""
636+ first_hour = 0
637+ last_hour = 240
638+ step_hours = 6
639+ parts = [model_id , f'{ dt :%Y} ' , f'{ dt :%m%d} ' ,
640+ f'{ model_id } _{ dt :%Y%m%d%H} _'
641+ f'f{ first_hour :03d} _f{ last_hour :03d} _{ step_hours :02d} .nc' ]
642+ return self .delimiter .join (parts [slice (0 , depth )])
643+
644+ def dt_from_key (self , key ): # noqa: D102
645+ # Docstring inherited
646+ # GRAP_v100_GFS_2025021212_f000_f240_06.nc
647+ dt = key .split ('/' )[- 1 ].split ('_' )[3 ]
648+ return datetime .strptime (dt , '%Y%m%d%H' ).replace (tzinfo = timezone .utc )
649+
650+ def get_product (self , model , dt = None , version = None , init = None ):
651+ """Get a product from the archive.
652+
653+ Parameters
654+ ----------
655+ model : str
656+ The selected model to get data for. Can be any of the four-letter codes supported
657+ by the archive (currently FOUR, PANG, GRAP, AURO), or the known names (
658+ case-insensitive): ``'Aurora'``, ``'FourCastNet'``, ``'graphcast'``, or
659+ ``'pangu'``.
660+ dt : `datetime.datetime`, optional
661+ The desired date/time for the model run; the one closest matching in time will
662+ be returned. This should have the proper timezone included; if not specified, UTC
663+ will be assumed. If ``None``, defaults to the current UTC date/time.
664+ version : str or int, optional
665+ The particular version of the model to select. If not given, the query will try
666+ to select the most recent version of the model.
667+ init : str, optional
668+ Selects the model run initialized with a particular set of initial conditions.
669+ Should be one of ``'GFS'`` or ``'IFS'``, defaults to ``'GFS'``.
670+
671+ See Also
672+ --------
673+ get_range
674+
675+ """
676+ dt = datetime .now (timezone .utc ) if dt is None else ensure_timezone (dt )
677+ model_id = self ._model_id (model , version , init )
678+ search_key = self ._build_key (model_id , dt )
679+ prefix = search_key .rsplit ('_' , maxsplit = 4 )[0 ]
680+ return self ._closest_result (self .objects (prefix ), dt )
681+
682+ def get_range (self , model , start , end , version = None , init = None ):
683+ """Yield products within a particular date/time range.
684+
685+ Parameters
686+ ----------
687+ model : str
688+ The selected model to get data for. Can be any of the four-letter codes supported
689+ by the archive (currently FOUR, PANG, GRAP, AURO), or the known names (
690+ case-insensitive): ``'Aurora'``, ``'FourCastNet'``, ``'graphcast'``, or
691+ ``'pangu'``.
692+ start : `datetime.datetime`
693+ The start of the date/time range. This should have the proper timezone included;
694+ if not specified, UTC will be assumed.
695+ end : `datetime.datetime`
696+ The end of the date/time range. This should have the proper timezone included;
697+ if not specified, UTC will be assumed.
698+ version : str or int, optional
699+ The particular version of the model to select. If not given, the query will try
700+ to select the most recent version of the model.
701+ init : str, optional
702+ Selects the model run initialized with a particular set of initial conditions.
703+ Should be one of ``'GFS'`` or ``'IFS'``, defaults to ``'GFS'``.
704+
705+ See Also
706+ --------
707+ get_product
708+
709+ """
710+ start = ensure_timezone (start )
711+ end = ensure_timezone (end )
712+ model_id = self ._model_id (model , version , init )
713+ for dt in date_iterator (start , end , days = 1 ):
714+ prefix = self ._build_key (model_id , dt , depth = 3 )
715+ for obj in self .objects (prefix ):
716+ if start <= self .dt_from_key (obj .key ) < end :
717+ yield self ._build_result (obj )
718+
719+ def _build_result (self , obj ):
720+ """Build a product that opens the data using `xarray.open_dataset`."""
721+ return AWSProduct (obj ,
722+ lambda s : xr .open_dataset (s .url + '#mode=bytes' , engine = 'netcdf4' ))
0 commit comments