Dimensionality Reduction

Dimensionality reduction is a technique used in machine learning and statistics to reduce the number of features or variables in a dataset while preserving its essential characteristics. This is particularly useful when dealing with high-dimensional data, as it can help improve model performance, reduce overfitting, and make data visualization easier.

First we will import the required libraries and read in some precipitation imagery.

import geowombat as gw
import os
import matplotlib.pyplot as plt
import pandas as pd
from glob import glob
import ray
import pandas as pd


os.chdir("../../xr_fresh/data/")


file_glob = f"evi_*tif"
files = sorted(glob(file_glob))

# print dates and files in a table
pd.DataFrame({  "file": files})
/home/mmann1123/miniconda3/envs/xr_fresh_update/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  from .autonotebook import tqdm as notebook_tqdm
file
0 evi_20160101.tif
1 evi_20160401.tif
2 evi_20160517.tif
3 evi_20170203.tif

Let’s take a look at the imput data

with gw.open(files,   nodata=-9999 ) as ds:
    ds = ds.gw.mask_nodata()
    ds.plot(col="time", col_wrap=4, cmap="viridis", robust=True)
    display(ds)

Now let’s create 3 components from the data

import xr_fresh.dimension_reduction  # This registers the accessor
cpus = 8
with ray.init(num_cpus=cpus) as rays:

    # Example usage
    with gw.open(
        files,
        stack_dim="band",
        nodata=-9999
    ) as src:
        src = src.gw.mask_nodata()
        # get 3 k principal components - base zero counting
        transformed_dataarray = src.gw_ext.k_pca(
            gamma=15, n_components=3, n_workers=cpus, chunk_size=512
        )
        
        # plot each with a seperate scale 
        for i, comp in enumerate(transformed_dataarray.component.values):
            transformed_dataarray.sel(component=comp).plot.imshow(
                figsize=(8, 6), robust=True, cbar_kwargs={'label': f'Component {i+1}'}
            )
            plt.title(f'Principal Component {i+1}')
            plt.show()

Save your outputs

transformed_dataarray.gw.save(
    "test.tif",
    overwrite=True,
)