Logo
  • Home
  • About ESA Φ-lab CIN
  • CIN People
  • Opportunities
  • Projects
  • Φ-talks
  • News
→ THE EUROPEAN SPACE AGENCY
Logo

About ESA EO

About CIN

About Pi School

ESA Φ-lab Website

ESA Φ-lab Linkedin community

Copyright 2025 @ European Space Agency. All rights reserved.

LinkedInXGitHubInstagramFacebookYouTube
AI4EO to monitor air quality(NO2)-omi-linking

AI4EO to monitor air quality(NO2)-omi-linking

📆 Project Period
May, 2022
📍 GitHub
gitlab.esa.int

AI4EO to monitor air quality(NO2)-omi-linking

import numpy as np
import netCDF4
import h5py
import os
import pandas as pd
import glob
from scipy.io import netcdf
from datetime import datetime, timedelta

A. Extract NO2 from OMI-L3

A.1 Explore content of files with h5py

A.2 Creating pandas dataframe with date, longitutes, latitude, no2 column data and grid ID

cols = df.columns.values
order = [4,0,2,1,3]

df2 = df[cols[order]].sort_values(['ID', 'date'], ignore_index=True)
df2.to_csv('/home/jovyan/eodata/AI4EO to monitor air quality NO2/timeSeries_NO2_OMI.csv', index=False)
df2
image
#print missing dates
all_data = pd.read_csv('/home/jovyan/eodata/AI4EO to monitor air quality NO2/timeSeries_NO2_OMI.csv')
missings = pd.date_range(start = '2004-10-01', end = '2020-12-31').difference(pd.unique(all_data['date']))
missings
pd.date_range(start = '2004-10-01', end = '2020-12-31').difference(pd.unique(df2['date']))
for y in range(2004,2020):
    df2[df2["date"].dt.year==datetime(y,1,1).year].to_csv("/home/jovyan/eodata/AI4EO to monitor air quality NO2/OMI-yearly\\OMI-"+str(y)+".csv", index=False)

B. Link UK grid to OMI grid

B.1 Load UK grid

uk_grids = pd.read_csv('/home/jovyan/eodata/AI4EO to monitor air quality NO2/UK_grids_noGeo_centroid.csv').drop(['.geo','system:index'], axis=1)
uk_grids = uk_grids.add_prefix('uk_')
uk_grids
image

B.2 Compute OMI grid values

image
OMI_grids.to_csv('/home/jovyan/eodata/AI4EO to monitor air quality NO2/OMI_grids_noGeo_centroid.csv', index=False)

B.3 Link UK grid to OMI grid

for row in uk_grids.head().iterrows():
    #print(row[1])
    print(row[1]['uk_centroidLat'])
    print(row[1]['uk_centroidLon'])

OMI_grids.head()
53.520869128059545
-0.0012246022636999
53.520616071300296
0.0138497457511531
53.52036111946441
0.0289239196618866
53.52010431396655
0.0439978709429132
53.51984553097928
0.0590715504716754
image
df.to_csv('/home/jovyan/eodata/AI4EO to monitor air quality NO2/link_OMI_UK.csv', index=False)
df

C. Retrieve NO2 time series data for a particular UK grid.

uk_gridIds = df.uk_gridId.unique()
id = uk_gridIds[0]
print('UK grid id:', id)

# Get id of correspondent CAMS grid
cams_gridId = df[df.uk_gridId == id]['OMI_ID'].values[0]
print('Correspondent CAMS grid id:', cams_gridId)

tseries = df2[df2.ID == ID].reset_index()
tseries
UK grid id: TA3204
Correspondent CAMS grid id: 1699
image
for file in glob.glob('/home/jovyan/eodata/AI4EO to monitor air quality NO2/No2-data/No2-data/OMI-NO2-L3/*.nc4'):
    print(file)
    try:
        root = h5py.File(file, mode='r')
        def print_attrs(name, obj):
            for key, val in obj.attrs.items():
                #print("    %s: %s" % (key, val))
                pass
        root.visititems(print_attrs)

    except OSError as err:
        print(err)
    break #One is enough
data = []
def yields_data():
    for file in glob.glob('/home/jovyan/eodata/AI4EO to monitor air quality NO2/No2-data/No2-data/OMI-NO2-L3/*.nc4'):
        try:
            root = h5py.File(file, mode='r')
            longitudes = root['XDim'][:]
            latitudes = root['YDim'][:]
            no2 = root['ColumnAmountNO2TropCloudScreened'][:]
            name = os.path.basename(file)
            date = datetime.strptime(str(name[7:11]+"/"+name[12:14]+"/"+name[14:16]), '%Y/%m/%d')
            ref_count = 0
            for lon in range(len(longitudes)):
                for lat in range(len(latitudes)):
                    ref_count += 1
                    yield date, longitudes[lon], latitudes[lat], no2[lat,lon], int(ref_count)
        except Exception as err:
            print(file)
            print(err)

for date, lon, lat, no2, ID in yields_data():
    data.append([date, lon, lat, no2, ID])

df = pd.DataFrame(data, columns=['date', 'longitude', 'latitude', 'no2', 'ID'])
print(df.head)
<bound method NDFrame.head of                date  longitude  latitude           no2    ID
0        2015-05-06     -8.875    49.125 -1.267651e+30     1
1        2015-05-06     -8.875    49.375 -1.267651e+30     2
2        2015-05-06     -8.875    49.625 -1.267651e+30     3
3        2015-05-06     -8.875    49.875 -1.267651e+30     4
4        2015-05-06     -8.875    50.125 -1.267651e+30     5
...             ...        ...       ...           ...   ...
11692027 2009-01-16      1.875    59.875 -1.267651e+30  2108
11692028 2009-01-16      1.875    60.125 -1.267651e+30  2109
11692029 2009-01-16      1.875    60.375 -1.267651e+30  2110
11692030 2009-01-16      1.875    60.625 -1.267651e+30  2111
11692031 2009-01-16      1.875    60.875 -1.267651e+30  2112

[11692032 rows x 5 columns]>
DatetimeIndex(['2004-10-01', '2004-10-02', '2004-10-03', '2004-10-04',
               '2004-10-05', '2004-10-06', '2004-10-07', '2004-10-08',
               '2004-10-09', '2004-10-10',
               ...
               '2020-12-22', '2020-12-23', '2020-12-24', '2020-12-25',
               '2020-12-26', '2020-12-27', '2020-12-28', '2020-12-29',
               '2020-12-30', '2020-12-31'],
              dtype='datetime64[ns]', length=5936, freq=None)
DatetimeIndex(['2004-11-19', '2004-11-20', '2004-11-21', '2004-11-22',
               '2004-11-23', '2004-11-24', '2004-11-25', '2004-11-26',
               '2004-11-27', '2004-11-28',
               ...
               '2020-12-22', '2020-12-23', '2020-12-24', '2020-12-25',
               '2020-12-26', '2020-12-27', '2020-12-28', '2020-12-29',
               '2020-12-30', '2020-12-31'],
              dtype='datetime64[ns]', length=400, freq=None)
OMI_all = pd.read_csv('/home/jovyan/eodata/AI4EO to monitor air quality NO2/timeSeries_NO2_OMI.csv')
OMI_grids = OMI_all.loc[OMI_all['date'] == '2004-10-01']
OMI_grids = OMI_grids.drop(['no2','date'], axis=1)
OMI_grids['latMin'] = OMI_grids['latitude'] - 0.125
OMI_grids['latMax'] = OMI_grids['latitude'] + 0.125
OMI_grids['lonMin'] = OMI_grids['longitude'] - 0.125
OMI_grids['lonMax'] = OMI_grids['longitude'] + 0.125
OMI_grids = OMI_grids.add_prefix('OMI_')
OMI_grids
data = []

def closest_OMI_centroid(lat, lon):
    closest_lats = OMI_grids.iloc[(OMI_grids['OMI_latitude']-lat).abs().argsort()]
    clolat = float(closest_lats[:1]['OMI_latitude'])
    closest_lats = closest_lats[closest_lats.OMI_latitude == clolat]
    closest_centroid = closest_lats.iloc[(abs(closest_lats['OMI_longitude'])-abs(lon)).abs().argsort()][:1]
    return closest_centroid

def yields_data():
    for row in uk_grids.iterrows():
        closest = closest_OMI_centroid(row[1]['uk_centroidLat'], row[1]['uk_centroidLon'])
        yield row[1]['uk_gridId'], row[1]['uk_centroidLat'], row[1]['uk_centroidLon'], row[1]['uk_latMax'], \\
                row[1]['uk_latMin'], row[1]['uk_lonMax'], row[1]['uk_lonMin'], int(closest['OMI_ID']), \\
                float(closest['OMI_latitude']), float(closest['OMI_longitude']), float(closest['OMI_latMax']), \\
                float(closest['OMI_latMin']), float(closest['OMI_lonMax']), float(closest['OMI_lonMin'])

for uk_gridId, uk_centroidLat, uk_centroidLon, uk_latMax, uk_latMin, uk_lonMax, uk_lonMin, \\
    OMI_ID, OMI_latitude, OMI_longitude, OMI_latMax, OMI_latMin, OMI_lonMax, OMI_lonMin in yields_data():
    data.append([uk_gridId, uk_centroidLat, uk_centroidLon, uk_latMax, uk_latMin, uk_lonMax, uk_lonMin, \\
                OMI_ID, OMI_latitude, OMI_longitude, OMI_latMax, OMI_latMin, OMI_lonMax, OMI_lonMin])

df = pd.DataFrame(data, columns=['uk_gridId', 'uk_centroidLat', 'uk_centroidLon', 'uk_latMax', 'uk_latMin', \\
                                 'uk_lonMax', 'uk_lonMin', 'OMI_ID', 'OMI_latitude', 'OMI_longitude', \\
                                 'OMI_latMax', 'OMI_latMin', 'OMI_lonMax', 'OMI_lonMin'])