Skip to content

Plotting functions

Module containing plotting functions for summarising and visualising data and dataframe objects.

dod2k_utilities.ut_plot

@author: Lucie Luecke

Plotting functions for displaying data(frames).

Last updated 19/12/2025 for publication of dod2k v2.0

df_colours_markers(db_name='dod2k_v2.0')

Generate archive colours and proxy markers for plotting functions.

Parameters:

Name Type Description Default
db_name str

Name of the database CSV file to load. Default is 'dod2k_dupfree_dupfree'.

'dod2k_v2.0'

Returns:

Name Type Description
archive_colour dict

Dictionary mapping archive types to color codes.

archives_sorted ndarray

Sorted list of archive types based on record count.

proxy_marker dict

Dictionary mapping each archive type and proxy to a specific marker.

Source code in dod2k_utilities/ut_plot.py
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
def df_colours_markers(db_name='dod2k_v2.0'):
    """
    Generate archive colours and proxy markers for plotting functions.

    Parameters
    ----------
    db_name : str, optional
        Name of the database CSV file to load. Default is 'dod2k_dupfree_dupfree'.

    Returns
    -------
    archive_colour : dict
        Dictionary mapping archive types to color codes.
    archives_sorted : numpy.ndarray
        Sorted list of archive types based on record count.
    proxy_marker : dict
        Dictionary mapping each archive type and proxy to a specific marker.
    """
    cols = [ '#4477AA', '#EE6677', '#228833', '#CCBB44', '#66CCEE', '#AA3377', '#BBBBBB', '#44AA99']

    df = utf.load_compact_dataframe_from_csv(db_name)


    # count archive types
    archive_count = {}
    for ii, at in enumerate(set(df['archiveType'])):
        archive_count[at] = df.loc[df['archiveType']==at, 'paleoData_proxy'].count()

    archive_colour = {'other': cols[-1]}
    proxy_marker   = {}
    other_archives = []
    major_archives = []


    mt = 'ov^s<>pP*XDdh'*10 # generates string of marker types

    ijk=0
    sort = np.argsort([cc for cc in archive_count.values()])
    archives_sorted = np.array([cc for cc in archive_count.keys()])[sort][::-1]
    for ii, at in enumerate(archives_sorted):
        print(ii, at, archive_count[at])
        if archive_count[at]>10:
            archive_colour[at] = cols[ii]
            major_archives+=[at]
        else:
            archive_colour[at] = cols[-1]
            other_archives+=[at]
        arch_mask = df['archiveType']==at
        arch_proxy_types = np.unique(df['paleoData_proxy'][arch_mask])
        proxy_marker[at]={}
        for jj, pt in enumerate(arch_proxy_types):
            marker = mt[jj] if at in major_archives else mt[ijk]
            proxy_marker[at][pt]=marker
        if at not in major_archives: ijk+=1

    return archive_colour, archives_sorted, proxy_marker 

geo_EOF_plot(df, pca_rec, EOFs, keys, fs=(13, 8), dpi=350, barlabel='EOF', which_EOF=0)

Plot geographic distribution of records colored by EOF loadings.

Parameters:

Name Type Description Default
df DataFrame

DataFrame with paleo-proxy records. Must include columns 'geo_meanLat', 'geo_meanLon', 'datasetId'.

required
pca_rec dict

Dictionary mapping keys to lists of dataset IDs included in PCA.

required
EOFs dict

Dictionary mapping keys to EOF arrays.

required
keys list

List of keys (record types) to plot.

required
fs tuple

Figure size in inches. Default is (13, 8).

(13, 8)
dpi int

Figure resolution in dots per inch. Default is 350.

350
barlabel str

Label for the colorbar. Default is 'EOF'.

'EOF'
which_EOF int

Index of the EOF to plot. Default is 0 (first EOF).

0

Returns:

Name Type Description
fig Figure

Matplotlib figure object containing the EOF-colored map.

Source code in dod2k_utilities/ut_plot.py
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
def geo_EOF_plot(df, pca_rec, EOFs, keys, fs=(13,8), dpi=350, barlabel='EOF', which_EOF=0):
    """
    Plot geographic distribution of records colored by EOF loadings.

    Parameters
    ----------
    df : pandas.DataFrame
        DataFrame with paleo-proxy records. Must include columns
        'geo_meanLat', 'geo_meanLon', 'datasetId'.
    pca_rec : dict
        Dictionary mapping keys to lists of dataset IDs included in PCA.
    EOFs : dict
        Dictionary mapping keys to EOF arrays. 
    keys : list
        List of keys (record types) to plot.
    fs : tuple, optional
        Figure size in inches. Default is (13, 8).
    dpi : int, optional
        Figure resolution in dots per inch. Default is 350.
    barlabel : str, optional
        Label for the colorbar. Default is 'EOF'.
    which_EOF : int, optional
        Index of the EOF to plot. Default is 0 (first EOF).

    Returns
    -------
    fig : matplotlib.figure.Figure
        Matplotlib figure object containing the EOF-colored map.
    """
    #%% plot the spatial distribution of all records
    proxy_lats = df['geo_meanLat'].values
    proxy_lons = df['geo_meanLon'].values

    # plots the map
    fig = plt.figure(figsize=fs, dpi=dpi) #fs=(13,8), dpi=350
    grid = GS(1, 3)

    ax = plt.subplot(grid[:, :], projection=ccrs.Robinson()) # create axis with Robinson projection of globe

    # ax.stock_img(clip_on=False)

    ax.add_feature(cfeature.LAND, alpha=0.6) # adds land features
    ax.add_feature(cfeature.OCEAN, alpha=0.6, facecolor='#C5DEEA') # adds ocean features
    ax.coastlines() # adds coastline features

    ax.set_global()

    mt = 'v^soD<''osD>pP*Xdh' # generates string of marker types

    # some of the following lines are hard-coded to plot EOF1, 
    # but asking for EOFs[key][0] here and also in f.get_colours will give the plot of EOF2
    # also need to modify the colorscale label cax.set_ylabel('EOF 2')

    # if we are multipling the PCs x -1, multiply the EOF loadings by -1 as well
    a= {}
    label={}
    for key in keys:
        if key in ['tree_d18O', 'coral_d18O']:# multiply EOF sign by -1
            a[key] = -1
            label[key] = key+' ($\\ast(-1)$)'
        else:
            a[key] = 1
            label[key]=key
    print(a)

    all_EOFs = [a[key]*EOFs[key][which_EOF][ii]  for key in keys for ii in range(len(EOFs[key][which_EOF]))]

    colors, sm, norm = get_colours2(all_EOFs, 
                                colormap='RdBu_r',minval=-0.6,maxval=0.6)

    ijk=0



    for key in keys:

        marker  = mt[ijk]

        colors = get_colours(a[key]*EOFs[key][which_EOF], colormap='RdBu_r',minval=-0.6,maxval=0.6)
        id_mask = np.isin(df['datasetId'], pca_rec[key]) 
        for jj in range(len(pca_rec[key])):

            scat_label   = label[key]+' (n=%d)'%len(pca_rec[key]) if jj==0 else None

            plt.scatter(proxy_lons[id_mask][jj], proxy_lats[id_mask][jj], 
                        transform=ccrs.PlateCarree(), zorder=999,
                        marker=marker, 
                        color=colors[jj], 
                        label=None,
                        lw=.3, ec='k', s=200)
            plt.scatter(proxy_lons[id_mask][jj], proxy_lats[id_mask][jj], 
                        transform=ccrs.PlateCarree(), zorder=999,
                        marker=marker, 
                        color='none', 
                        label=scat_label, 
                        lw=1, ec='k', s=200)
        ijk+=1

    cax=ax.inset_axes([1.02, 0.1, 0.035, 0.8])
    sm.set_array([])

    matplotlib.colorbar.ColorbarBase(cax, cmap='RdBu_r', norm=norm)
    cax.set_ylabel(barlabel, fontsize=13.5)

    plt.legend(bbox_to_anchor=(0.03,-0.01), loc='upper left', ncol=3, fontsize=13.5, framealpha=0)
    grid.tight_layout(fig)

    return fig

geo_plot(df, fs=(9, 4.5), dpi=350, return_col=False, **kwargs)

Plot the spatial distribution of paleo-proxy records on a global map.

Parameters:

Name Type Description Default
df DataFrame

DataFrame containing the records. Must include columns: 'geo_meanLat', 'geo_meanLon', 'archiveType', 'paleoData_proxy', 'datasetId'.

required
fs tuple

Figure size (width, height) in inches. Default is (9, 4.5).

(9, 4.5)
dpi int

Figure resolution in dots per inch. Default is 350.

350
**kwargs dict

Optional keyword arguments. Supported keys: - 'mark_records': dict, to highlight specific datasets on the map. - 'mark_archives': list of archive keys to mark.

{}

Returns:

Name Type Description
fig Figure

Matplotlib figure object containing the map.

Source code in dod2k_utilities/ut_plot.py
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
def geo_plot(df, fs=(9,4.5), dpi=350, return_col=False,  **kwargs):
    """
    Plot the spatial distribution of paleo-proxy records on a global map.

    Parameters
    ----------
    df : pandas.DataFrame
        DataFrame containing the records. Must include columns:
        'geo_meanLat', 'geo_meanLon', 'archiveType', 'paleoData_proxy', 'datasetId'.
    fs : tuple, optional
        Figure size (width, height) in inches. Default is (9, 4.5).
    dpi : int, optional
        Figure resolution in dots per inch. Default is 350.
    **kwargs : dict
        Optional keyword arguments. Supported keys:
        - 'mark_records': dict, to highlight specific datasets on the map.
        - 'mark_archives': list of archive keys to mark.

    Returns
    -------
    fig : matplotlib.figure.Figure
        Matplotlib figure object containing the map.
    """
    archive_colour, archives_sorted, proxy_marker = df_colours_markers()

    #%% plot the spatial distribution of all records
    proxy_lats = df['geo_meanLat'].values
    proxy_lons = df['geo_meanLon'].values

    # plots the map
    fig = plt.figure(figsize=fs, dpi=dpi) #fs=(13,8), dpi=350
    grid = GS(1, 3)

    ax = plt.subplot(grid[:, :], projection=ccrs.Robinson()) # create axis with Robinson projection of globe


    ax.add_feature(cfeature.LAND, alpha=0.5) # adds land features
    ax.add_feature(cfeature.OCEAN, alpha=0.3, facecolor='#C5DEEA') # adds ocean features
    ax.coastlines() # adds coastline features

    ax.set_global()


    mt = 'ov^s<>pP*XDdh'*10 # generates string of marker types

    ijk=0
    h1, l1 = [], []
    h2, l2 = [], []
    for at in archives_sorted:
        at_mask = df['archiveType']==at
        for ii, pt in enumerate(set(df[at_mask]['paleoData_proxy'])):
            marker  = mt[ii]
            pt_mask = df['paleoData_proxy']==pt
            label   = at+': '+pt+' (n=%d)'%len(df[at_mask&pt_mask])
            plt.scatter(proxy_lons[pt_mask&at_mask], proxy_lats[pt_mask&at_mask], 
                        transform=ccrs.PlateCarree(), zorder=999,
                        marker=proxy_marker[at][pt], 
                        color=archive_colour[at], 
                        label=label,
                        lw=.3, ec='k', s=200)

            if kwargs and 'mark_records' in kwargs:
                hh, ll = ax.get_legend_handles_labels()
                key = '%s_%s'%(at, pt) if at!='lake sediment' else 'lake sediment_d18O+d2H'
                if key in kwargs['mark_archives']:
                    id_mask = np.isin(df['datasetId'], kwargs['mark_records'][key])
                    label='included in PCA'
                    # if label in ll:
                    #     label = None
                    plt.scatter(proxy_lons[pt_mask&at_mask&id_mask], proxy_lats[pt_mask&at_mask&id_mask], 
                                transform=ccrs.PlateCarree(), zorder=999,
                                marker=proxy_marker[at][pt], #label=label,
                                lw=2, ec='k', color=archive_colour[at], s=200)


    hh, ll = ax.get_legend_handles_labels()
    plt.legend(hh, ll, bbox_to_anchor=(0.03,-0.01), loc='upper left', ncol=3, fontsize=12, framealpha=0)
    grid.tight_layout(fig)

    if return_col:
        return fig, archive_colour

    return fig

get_archive_colours(archives_sorted, archive_count, cols=['#4477AA', '#EE6677', '#228833', '#CCBB44', '#66CCEE', '#AA3377', '#BBBBBB', '#44AA99', '#332288'])

Assign colors to archive types based on record abundance.

Parameters:

Name Type Description Default
archives_sorted list of str

Archive types sorted in descending or preferred order, typically by record count.

required
archive_count dict

Dictionary mapping archive type to total number of records.

required
cols list of str

List of color hex codes used to assign colors to major archives. The last color in the list is reserved for minor archives and the aggregated 'other' category.

['#4477AA', '#EE6677', '#228833', '#CCBB44', '#66CCEE', '#AA3377', '#BBBBBB', '#44AA99', '#332288']

Returns:

Name Type Description
archive_colour dict

Mapping from archive type to assigned color. Includes an 'other' entry for minor archives.

major_archives list of str

Archive types with more than 10 records.

other_archives list of str

Archive types with 10 or fewer records.

Notes

Archive types with more than 10 records are treated as major archives and assigned unique colors. All remaining archive types are grouped under 'other' and share a common color.

Source code in dod2k_utilities/ut_plot.py
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
def get_archive_colours(archives_sorted, archive_count, cols= [ '#4477AA', '#EE6677', '#228833', '#CCBB44', '#66CCEE', '#AA3377', '#BBBBBB', '#44AA99', '#332288']):
    """
    Assign colors to archive types based on record abundance.

    Parameters
    ----------
    archives_sorted : list of str
        Archive types sorted in descending or preferred order, typically
        by record count.
    archive_count : dict
        Dictionary mapping archive type to total number of records.
    cols : list of str, optional
        List of color hex codes used to assign colors to major archives.
        The last color in the list is reserved for minor archives and
        the aggregated ``'other'`` category.

    Returns
    -------
    archive_colour : dict
        Mapping from archive type to assigned color. Includes an
        ``'other'`` entry for minor archives.
    major_archives : list of str
        Archive types with more than 10 records.
    other_archives : list of str
        Archive types with 10 or fewer records.

    Notes
    -----
    Archive types with more than 10 records are treated as major archives
    and assigned unique colors. All remaining archive types are grouped
    under ``'other'`` and share a common color.
    """

    archive_colour = {'other': cols[-1]}
    other_archives = []
    major_archives = []

    for ii, at in enumerate(archives_sorted):
        print(ii, at, archive_count[at])
        if archive_count[at]>10:
            major_archives     +=[at]
            archive_colour[at] = cols[ii]
        else:
            other_archives     +=[at]
            archive_colour[at] = cols[-1]
    return archive_colour, major_archives, other_archives

get_colours(data, colormap='brewer_RdBu_11', minval=False, maxval=False, return_mappable=False)

Generate colors from a colormap based on data values.

Parameters:

Name Type Description Default
data array - like

Array or list of numerical values to map to colors.

required
colormap str

Matplotlib colormap name. Default is 'brewer_RdBu_11'.

'brewer_RdBu_11'
minval float or False

Minimum value for color normalization. If False, uses min(data). Default is False.

False
maxval float or False

Maximum value for color normalization. If False, uses max(data). Default is False.

False
return_mappable bool

If True, also return ScalarMappable and Normalize objects for colorbar. Default is False.

False

Returns:

Type Description
list of tuple

List of RGBA color tuples, one for each data value, in same order as data.

Examples:

>>> temps = [15, 20, 25, 30, 35]
>>> colors = get_colours(temps, colormap='coolwarm')
>>> # Use colors for scatter plot
>>> plt.scatter(x, y, c=colors)
Source code in dod2k_utilities/ut_plot.py
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
def get_colours(data, colormap='brewer_RdBu_11', minval=False,
                maxval=False, return_mappable=False):
    """
    Generate colors from a colormap based on data values.

    Parameters
    ----------
    data : array-like
        Array or list of numerical values to map to colors.
    colormap : str, optional
        Matplotlib colormap name. Default is 'brewer_RdBu_11'.
    minval : float or False, optional
        Minimum value for color normalization. If False, uses min(data).
        Default is False.
    maxval : float or False, optional
        Maximum value for color normalization. If False, uses max(data).
        Default is False.
    return_mappable : bool, optional
        If True, also return ScalarMappable and Normalize objects for colorbar.
        Default is False.

    Returns
    -------
    list of tuple
        List of RGBA color tuples, one for each data value, in same order as data.

    Examples
    --------
    >>> temps = [15, 20, 25, 30, 35]
    >>> colors = get_colours(temps, colormap='coolwarm')
    >>> # Use colors for scatter plot
    >>> plt.scatter(x, y, c=colors)
    """
    from matplotlib.colors import Normalize
    import matplotlib.cm as cm
    if not minval:
        minval = np.min(data)
    if not maxval:
        maxval = np.max(data)
    N = len(data)
    cmap         = cm.get_cmap(colormap)
    sm           = cm.ScalarMappable(cmap = colormap)
    sm.set_array(range(N))
    norm         = Normalize(vmin=minval, vmax=maxval)
    rgba         = cmap(norm(data))
    cols         = list(rgba)
    if return_mappable:
        return cols, sm, norm
    return cols

get_colours2(data, colormap='brewer_RdBu_11', minval=False, maxval=False)

generates colours from a colormap based on the data values (array or list) returns cols: list of colours, in same order as data

Source code in dod2k_utilities/ut_plot.py
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
def get_colours2(data, colormap='brewer_RdBu_11', minval=False,
                maxval=False):
    """
    generates colours from a colormap based on the *data* values (array or list)
    returns *cols*: list of colours, in same order as data
    """
    from matplotlib.colors import Normalize
    import matplotlib.cm as cm
    if not minval:
        minval = np.min(data)
    if not maxval:
        maxval = np.max(data)
    print(minval)
    print(maxval)
    N = len(data)
    cmap         = cm.get_cmap(colormap)
    sm           = cm.ScalarMappable(cmap = colormap)
    sm.set_array(range(N))
    norm         = Normalize(vmin=minval, vmax=maxval)
    rgba         = cmap(norm(data))
    cols         = list(rgba)
    return cols, sm, norm

plot_PCs(years_hom, eigenvectors, paleoData_zscores_hom, title='', name='', col='tab:blue')

Plot principal components and reconstructed time series.

Parameters:

Name Type Description Default
years_hom ndarray

Homogenised time axis.

required
eigenvectors ndarray

Eigenvectors from PCA.

required
paleoData_zscores_hom MaskedArray

Homogenised z-score data array of shape (n_records, n_years).

required
title str

Title for plots.

''
name str

Name suffix for saving figures.

''

Returns:

Name Type Description
PCs ndarray

Principal component time series.

eigenvectors ndarray

Eigenvectors (EOF loadings) corresponding to PCs.

Source code in dod2k_utilities/ut_plot.py
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
def plot_PCs(years_hom, eigenvectors, paleoData_zscores_hom, title='', name='', col='tab:blue'):
    """
    Plot principal components and reconstructed time series.

    Parameters
    ----------
    years_hom : numpy.ndarray
        Homogenised time axis.
    eigenvectors : numpy.ndarray
        Eigenvectors from PCA.
    paleoData_zscores_hom : numpy.ma.MaskedArray
        Homogenised z-score data array of shape (n_records, n_years).
    title : str, optional
        Title for plots.
    name : str, optional
        Name suffix for saving figures.

    Returns
    -------
    PCs : numpy.ndarray
        Principal component time series.
    eigenvectors : numpy.ndarray
        Eigenvectors (EOF loadings) corresponding to PCs.
    """
    PCs = np.dot(eigenvectors.T, paleoData_zscores_hom.data)

    Dz   = paleoData_zscores_hom.data
    Dzr  = np.ma.masked_array(np.dot(eigenvectors, PCs), mask=paleoData_zscores_hom.mask)


    fig = plt.figure()
    plt.suptitle(title)
    ax = plt.subplot(311)

    plt.plot(years_hom, np.ma.mean(paleoData_zscores_hom, axis=0), #color='k', 
             zorder=999, color=col)

    ax.axes.xaxis.set_ticklabels([])
    plt.axvline(years_hom[0], color='k', lw=.5, alpha=.5)
    plt.axvline(years_hom[-1], color='k', lw=.5, alpha=.5)
    plt.xlim(years_hom[0]-20, years_hom[-1]+20)
    plt.ylabel('paleoData_zscores')
    for ii in range(2):
        ax = plt.subplot(311+ii+1)
        plt.plot(years_hom, PCs[ii], color=col)
        if ii==1: plt.xlabel('time (year CE)')
        plt.ylabel('PC %d'%(ii+1))
        plt.axhline(0, color='k', alpha=0.5, lw=0.5)
        plt.axvline(years_hom[0], color='k', lw=.5, alpha=.5)
        plt.axvline(years_hom[-1], color='k', lw=.5, alpha=.5)
        plt.xlim(years_hom[0]-20, years_hom[-1]+20)
        if ii==0: ax.axes.xaxis.set_ticklabels([])

    utf.save_fig(fig, 'PCs_%s'%title, dir=name)

    plt.figure()
    for ii in range(paleoData_zscores_hom.shape[0]):
        plt.plot(paleoData_zscores_hom[ii,:], Dzr[ii,:],  alpha=0.4, lw=1, color=col)
    plt.xlabel('paleoData_zscores')
    plt.ylabel('paleoData_zscores_reconstructed')


    fig = plt.figure()
    plt.suptitle(title)
    ax = plt.subplot(211)
    for ii in range(paleoData_zscores_hom.shape[0]):
        plt.plot(years_hom, paleoData_zscores_hom[ii,:], color=col, alpha=0.4, lw=1)
    plt.plot(years_hom, np.ma.mean(paleoData_zscores_hom, axis=0), color='k', zorder=999)

    ax.axes.xaxis.set_ticklabels([])
    plt.axvline(years_hom[0], color='k', lw=.5, alpha=.5)
    plt.axvline(years_hom[-1], color='k', lw=.5, alpha=.5)
    plt.xlim(years_hom[0]-20, years_hom[-1]+20)
    plt.ylabel('paleoData_zscores')

    ax = plt.subplot(212)
    for ii in range(Dzr.shape[0]):
        plt.plot(years_hom, Dzr[ii,:], color=col, alpha=0.4, lw=1)
    plt.plot(years_hom, np.ma.mean(Dzr, axis=0), color='k', zorder=999)

    ax.axes.xaxis.set_ticklabels([])
    plt.axvline(years_hom[0], color='k', lw=.5, alpha=.5)
    plt.axvline(years_hom[-1], color='k', lw=.5, alpha=.5)
    plt.xlim(years_hom[0]-20, years_hom[-1]+20)
    plt.ylabel('paleoData_zscores \n (reconstructed)')


    n_recs = paleoData_zscores_hom.data.shape[0]
    fig = plt.figure()
    plt.suptitle(title)
    for ii in range(2):
        plt.subplot(211+ii)
        plt.plot(range(n_recs), eigenvectors[ii], color=col)
        if ii==1: plt.xlabel('rec')
        plt.ylabel('EOF %d load'%(ii+1))
        plt.axhline(0, color='k', alpha=0.5, lw=0.5)

    utf.save_fig(fig, 'EOFloading_%s'%title, dir=name)

    return PCs, eigenvectors

plot_count_proxy_by_archive_all(df, archive_proxy_count, archive_proxy_ticks, archive_colour)

Plot proxy counts by archive for all proxy types.

Parameters:

Name Type Description Default
df DataFrame

DataFrame containing proxy and archive metadata (not directly used for plotting but retained for consistency).

required
archive_proxy_count dict

Dictionary mapping proxy identifiers (e.g., "archive: proxy") to record counts.

required
archive_proxy_ticks list of str

Ordered list of proxy identifiers used for tick labels.

required
archive_colour dict

Mapping from archive type to color.

required

Returns:

Type Description
Figure

Figure containing the bar chart.

Notes

All proxy types are included regardless of count. Bars are sorted in descending order of record count. Archive colors are derived from the archive prefix of each proxy identifier.

Source code in dod2k_utilities/ut_plot.py
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
def plot_count_proxy_by_archive_all(df, archive_proxy_count, archive_proxy_ticks, archive_colour) :
    """
    Plot proxy counts by archive for all proxy types.

    Parameters
    ----------
    df : pandas.DataFrame
        DataFrame containing proxy and archive metadata (not directly
        used for plotting but retained for consistency).
    archive_proxy_count : dict
        Dictionary mapping proxy identifiers (e.g., ``"archive: proxy"``)
        to record counts.
    archive_proxy_ticks : list of str
        Ordered list of proxy identifiers used for tick labels.
    archive_colour : dict
        Mapping from archive type to color.

    Returns
    -------
    matplotlib.figure.Figure
        Figure containing the bar chart.

    Notes
    -----
    All proxy types are included regardless of count. Bars are sorted in
    descending order of record count. Archive colors are derived from
    the archive prefix of each proxy identifier.
    """
    fig = plt.figure(figsize=(10, 7), dpi=500)
    ax  = plt.gca()
    count_by_proxy_long   = [archive_proxy_count[tt] for tt in archive_proxy_ticks]
    ticks_by_proxy_long   = [tt for tt in archive_proxy_ticks]
    cols_by_proxy_long    = [archive_colour[tt.split(':')[0]] for tt in archive_proxy_ticks ]
    archive_by_proxy_long = [tt.split(':')[0] for tt in archive_proxy_ticks]

    sort = np.argsort(count_by_proxy_long)[::-1]

    # create placeholder artists for legend and clean axis again
    plt.bar(range(len(set(archive_by_proxy_long))), range(len(set(archive_by_proxy_long))), 
            color=[archive_colour[aa] for aa in set(archive_by_proxy_long)],
            label=set(archive_by_proxy_long))
    h, l = ax.get_legend_handles_labels()
    plt.legend()
    ax.cla()

    plt.bar(np.arange(len(ticks_by_proxy_long)), 
            np.array(count_by_proxy_long)[sort], 
            color=np.array(cols_by_proxy_long)[sort])

    plt.xlabel('proxy type')
    plt.ylabel('count')
    ax.set_xticks(np.arange(len(ticks_by_proxy_long)), 
                  [ticks_by_proxy_long[ii] for ii in sort], 
                  rotation=45, ha='right', fontsize=9)
    plt.legend(h[::-1], l[::-1], ncol=2)


    fig.tight_layout()
    return fig

plot_count_proxy_by_archive_short(df, archive_proxy_count, archive_proxy_ticks, archive_colour)

Plot proxy counts by archive for proxy types exceeding a count threshold.

Parameters:

Name Type Description Default
df DataFrame

DataFrame containing proxy and archive metadata (not directly used for plotting but retained for consistency).

required
archive_proxy_count dict

Dictionary mapping proxy identifiers (e.g., "archive: proxy") to record counts.

required
archive_proxy_ticks list of str

Ordered list of proxy identifiers used for tick labels.

required
archive_colour dict

Mapping from archive type to color.

required

Returns:

Type Description
Figure

Figure containing the bar chart.

Notes

Only proxy types with more than 10 records are included. Bars are sorted in descending order of count. Archive type is inferred from the prefix of each proxy identifier and used for color coding and legend construction.

Source code in dod2k_utilities/ut_plot.py
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
def plot_count_proxy_by_archive_short(df, archive_proxy_count, archive_proxy_ticks, archive_colour) :
    """
    Plot proxy counts by archive for proxy types exceeding a count threshold.

    Parameters
    ----------
    df : pandas.DataFrame
        DataFrame containing proxy and archive metadata (not directly
        used for plotting but retained for consistency).
    archive_proxy_count : dict
        Dictionary mapping proxy identifiers (e.g., ``"archive: proxy"``)
        to record counts.
    archive_proxy_ticks : list of str
        Ordered list of proxy identifiers used for tick labels.
    archive_colour : dict
        Mapping from archive type to color.

    Returns
    -------
    matplotlib.figure.Figure
        Figure containing the bar chart.

    Notes
    -----
    Only proxy types with more than 10 records are included. Bars are
    sorted in descending order of count. Archive type is inferred from
    the prefix of each proxy identifier and used for color coding and
    legend construction.
    """

    fig = plt.figure(figsize=(8, 5), dpi=500)
    ax  = plt.gca()
    count_by_proxy_short   = [archive_proxy_count[tt] for tt in archive_proxy_ticks if archive_proxy_count[tt]>10 ]
    ticks_by_proxy_short   = [tt for tt in archive_proxy_ticks if archive_proxy_count[tt]>10 ]
    cols_by_proxy_short    = [archive_colour[tt.split(':')[0]] for tt in archive_proxy_ticks if archive_proxy_count[tt]>10 ]
    archive_by_proxy_short = [tt.split(':')[0] for tt in archive_proxy_ticks if archive_proxy_count[tt]>10 ]

    sort = np.argsort(count_by_proxy_short)[::-1]

    # create placeholder artists for legend and clean axis again
    plt.bar(range(len(set(archive_by_proxy_short))), range(len(set(archive_by_proxy_short))), 
            color=[archive_colour[aa] for aa in set(archive_by_proxy_short)],
            label=set(archive_by_proxy_short))
    h, l = ax.get_legend_handles_labels()
    plt.legend()
    ax.cla()

    plt.bar(np.arange(len(ticks_by_proxy_short)), np.array(count_by_proxy_short)[sort], 
            color=np.array(cols_by_proxy_short)[sort])

    plt.xlabel('proxy type')
    plt.ylabel('count')
    ax.set_xticks(np.arange(len(ticks_by_proxy_short)), 
                  [ticks_by_proxy_short[ii] for ii in sort], 
                  rotation=45, ha='right', fontsize=10)
    plt.legend(h[::-1], l[::-1])


    fig.tight_layout()
    return fig

plot_coverage(df, archives_sorted, major_archives, other_archives, archive_colour, all=False, ysc='linear', return_data=False)

Plot temporal coverage of proxy records, optionally separated by archive type.

Parameters:

Name Type Description Default
df DataFrame

DataFrame containing a 'year' column with iterable year values for each record and an 'archiveType' column.

required
archives_sorted list of str

Ordered list of archive types present in the dataset.

required
major_archives list of str

Archive types treated as major and plotted individually.

required
other_archives list of str

Archive types grouped under the 'other' category.

required
archive_colour dict

Mapping from archive type (and 'other') to color.

required
all bool

If True, plot total coverage across all archives.

False
ysc (linear, log)

Y-axis scale.

'linear'
return_data bool

If True, return coverage arrays in addition to the figure.

False

Returns:

Name Type Description
Figure

Coverage plot figure.

years (ndarray, optional)

Array of years spanning the full temporal range.

coverage (ndarray, optional)

Total number of records available for each year.

coverage_by_archive (dict, optional)

Dictionary mapping archive type to yearly coverage arrays.

Notes

Coverage is defined as the number of records overlapping each year. Archive types not classified as major are aggregated into an 'other' category.

Source code in dod2k_utilities/ut_plot.py
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
def plot_coverage(df, archives_sorted, major_archives, other_archives, archive_colour, all=False, ysc='linear', return_data=False):
    """
    Plot temporal coverage of proxy records, optionally separated by archive type.

    Parameters
    ----------
    df : pandas.DataFrame
        DataFrame containing a ``'year'`` column with iterable year values
        for each record and an ``'archiveType'`` column.
    archives_sorted : list of str
        Ordered list of archive types present in the dataset.
    major_archives : list of str
        Archive types treated as major and plotted individually.
    other_archives : list of str
        Archive types grouped under the ``'other'`` category.
    archive_colour : dict
        Mapping from archive type (and ``'other'``) to color.
    all : bool, optional
        If True, plot total coverage across all archives.
    ysc : {'linear', 'log'}, optional
        Y-axis scale.
    return_data : bool, optional
        If True, return coverage arrays in addition to the figure.

    Returns
    -------
    matplotlib.figure.Figure
        Coverage plot figure.
    years : numpy.ndarray, optional
        Array of years spanning the full temporal range.
    coverage : numpy.ndarray, optional
        Total number of records available for each year.
    coverage_by_archive : dict, optional
        Dictionary mapping archive type to yearly coverage arrays.

    Notes
    -----
    Coverage is defined as the number of records overlapping each year.
    Archive types not classified as major are aggregated into an
    ``'other'`` category.
    """
    #%% compute the coverage of all records and coverage per archive 

    MinY     = np.array([min([float(sy) for sy in yy])  for yy in df['year']]) # find minimum year for each record
    MaxY     = np.array([max([float(sy) for sy in yy])  for yy in df['year']]) # find maximum year for each record
    years    = np.arange(min(MinY), max(MaxY)+1)

    # generate array of coverage (how many records are available each year, in total)
    coverage = np.zeros(years.shape[0])
    for ii in range(len(df['year'])):
        coverage[(years>=MinY[ii])&(years<=MaxY[ii])] += 1
    # generate array of coverage for each archive type
    coverage_by_archive = {arch: np.zeros(years.shape[0]) for arch in major_archives+['other'] }
    for arch in archives_sorted:
        arch_mask = df['archiveType']==arch 
        for ii in range(len(df[arch_mask]['year'])):
            if arch not in major_archives: arch='other'
            cc = coverage_by_archive[arch]
            coverage_by_archive[arch][(years>=MinY[arch_mask][ii])&(years<=MaxY[arch_mask][ii])] += 1

    fig = plt.figure(figsize=(8, 4), dpi=200)
    ax = plt.gca()
    if all:
        plt.step(years, coverage, color='k', label='all records', lw=3)
    plt.xlabel('year')
    plt.ylabel('total # of records')

    plt.xlim(-100, 2020)
    ax.grid(False)
    if np.sum(coverage_by_archive['other'])==0:
        archives = major_archives
    else: archives = major_archives+['other']
    for ii, arch in enumerate(archives):
        plt.step(years, coverage_by_archive[arch], color=archive_colour[arch],
                 label=arch, lw=1.8)

    h1, l1 = ax.get_legend_handles_labels()
    if ysc=='log':plt.legend(h1, [ll.replace(' ',' ') for ll in l1], 
                             ncol=4, framealpha=0, bbox_to_anchor=(0,1), loc='lower left' )
    else:plt.legend(h1, l1, ncol=3, framealpha=0)
    plt.ylabel('# of records per archive')
    fig.tight_layout()
    plt.yscale(ysc)
    if return_data:
        return fig, years, coverage, coverage_by_archive
    return fig

plot_coverage2(df, years, title='')

Plot the coverage of records over a range of years.

This function counts how many records in the DataFrame overlap with each year in the given range and produces a step plot showing total coverage.

Parameters:

Name Type Description Default
df DataFrame

DataFrame containing 'year' data for each record. Each row should have 'miny' and 'maxy' indicating the start and end year of the record.

required
years array - like

Array of years over which to compute coverage.

required
title str

Title of the plot. Default is an empty string.

''

Returns:

Type Description
Figure

The matplotlib Figure object containing the plot.

Source code in dod2k_utilities/ut_plot.py
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
def plot_coverage2(df, years, title=''):
    """
    Plot the coverage of records over a range of years.

    This function counts how many records in the DataFrame overlap with each 
    year in the given range and produces a step plot showing total coverage.

    Parameters
    ----------
    df : pandas.DataFrame
        DataFrame containing 'year' data for each record. Each row should have
        'miny' and 'maxy' indicating the start and end year of the record.
    years : array-like
        Array of years over which to compute coverage.
    title : str, optional
        Title of the plot. Default is an empty string.

    Returns
    -------
    matplotlib.figure.Figure
        The matplotlib Figure object containing the plot.
    """
    coverage_filt = np.zeros(years.shape[0])

    miny, maxy = years[0], years[-1]
    for ii in range(len(df['year'])):
        # time_12, int_1, int_2 = np.intersect1d(years, df.iloc[ii].year, return_indices=True)
        coverage_filt[(years>=df.iloc[ii].miny)&(years<=df.iloc[ii].maxy)] += 1
        # coverage_filt[int_1]+=1

    fig = plt.figure(figsize=(6, 3), dpi=100)
    plt.title(title)
    ax = plt.gca()
    plt.step(years, coverage_filt, color='k', label='all records', lw=3)
    plt.xlabel('year')
    plt.ylabel('total # of records')

    h1, l1 = ax.get_legend_handles_labels()
    plt.legend(h1, l1, ncol=3, framealpha=0)
    plt.ylabel('# of records per archive')
    plt.show()
    return fig

plot_coverage_analysis(df, years, key, col, title='')

Plot the coverage of records over a range of years.

This function counts how many records in the DataFrame overlap with each year in the given range and produces a step plot showing total coverage.

Parameters:

Name Type Description Default
df DataFrame

DataFrame containing 'year' data for each record. Each row should have 'miny' and 'maxy' indicating the start and end year of the record.

required
years array - like

Array of years over which to compute coverage.

required
title str

Title of the plot. Default is an empty string.

''

Returns:

Type Description
Figure

The matplotlib Figure object containing the plot.

Source code in dod2k_utilities/ut_plot.py
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
def plot_coverage_analysis(df, years, key, col, title=''):
    """
    Plot the coverage of records over a range of years.

    This function counts how many records in the DataFrame overlap with each 
    year in the given range and produces a step plot showing total coverage.

    Parameters
    ----------
    df : pandas.DataFrame
        DataFrame containing 'year' data for each record. Each row should have
        'miny' and 'maxy' indicating the start and end year of the record.
    years : array-like
        Array of years over which to compute coverage.
    title : str, optional
        Title of the plot. Default is an empty string.

    Returns
    -------
    matplotlib.figure.Figure
        The matplotlib Figure object containing the plot.
    """
    coverage_filt = np.zeros(years.shape[0])

    miny, maxy = years[0], years[-1]
    for ii in range(len(df['year'])):
        # time_12, int_1, int_2 = np.intersect1d(years, df.iloc[ii].year, return_indices=True)
        coverage_filt[(years>=df.iloc[ii].miny)&(years<=df.iloc[ii].maxy)] += 1
        # coverage_filt[int_1]+=1

    fig = plt.figure(figsize=(6, 3), dpi=100)
    plt.title(title)
    ax = plt.gca()
    plt.step(years, coverage_filt, color=col, label=key, lw=3)
    plt.xlabel('year')
    plt.ylabel('total # of records')

    h1, l1 = ax.get_legend_handles_labels()
    plt.legend(h1, l1, ncol=3, framealpha=0)
    plt.ylabel('# of records per archive')
    plt.show()
    return fig

plot_geo_archive_proxy(df, archive_colour, highlight_archives=[], marker='default', size='default', figsize='default')

Plot global distribution of proxy records grouped by archive and proxy type.

Parameters:

Name Type Description Default
df DataFrame

DataFrame containing geographic coordinates and proxy metadata. Must include 'geo_meanLat', 'geo_meanLon', 'archiveType', and 'paleoData_proxy'.

required
archive_colour dict

Mapping from archive type to color.

required
highlight_archives list of str

Archive types to emphasize using archive-specific marker cycling.

[]
marker str or sequence

Marker specification. If 'default', a predefined sequence of marker styles is used.

'default'
size int or float

Marker size. If 'default', a preset size is used.

'default'
figsize tuple or str

Figure size. If 'default', a predefined size is used.

'default'

Returns:

Type Description
Figure

Figure containing the global map.

Notes

Marker shape distinguishes proxy types, while color denotes archive type. Highlighted archives reuse marker cycling per archive, whereas non-highlighted archives use a global marker index.

Source code in dod2k_utilities/ut_plot.py
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
def plot_geo_archive_proxy(df, archive_colour, highlight_archives=[], marker='default', size='default', figsize='default'):
    """
    Plot global distribution of proxy records grouped by archive and proxy type.

    Parameters
    ----------
    df : pandas.DataFrame
        DataFrame containing geographic coordinates and proxy metadata.
        Must include ``'geo_meanLat'``, ``'geo_meanLon'``,
        ``'archiveType'``, and ``'paleoData_proxy'``.
    archive_colour : dict
        Mapping from archive type to color.
    highlight_archives : list of str, optional
        Archive types to emphasize using archive-specific marker cycling.
    marker : str or sequence, optional
        Marker specification. If ``'default'``, a predefined sequence
        of marker styles is used.
    size : int or float, optional
        Marker size. If ``'default'``, a preset size is used.
    figsize : tuple or str, optional
        Figure size. If ``'default'``, a predefined size is used.

    Returns
    -------
    matplotlib.figure.Figure
        Figure containing the global map.

    Notes
    -----
    Marker shape distinguishes proxy types, while color denotes archive
    type. Highlighted archives reuse marker cycling per archive, whereas
    non-highlighted archives use a global marker index.
    """

    proxy_lats = df['geo_meanLat'].values
    proxy_lons = df['geo_meanLon'].values

    # plots the map
    figsize=(15, 12) if figsize=='default' else figsize
    fig = plt.figure(figsize=figsize, dpi=350)
    grid = GS(1, 3)

    ax = plt.subplot(grid[:, :], projection=ccrs.Robinson()) # create axis with Robinson projection of globe

    ax.add_feature(cfeature.LAND, alpha=0.5) # adds land features
    ax.add_feature(cfeature.OCEAN, alpha=0.6, facecolor='#C5DEEA') # adds ocean features
    ax.coastlines() # adds coastline features

    ax.set_global()

    # loop through the data to generate a scatter plot of each data record:
    # 1st loop: go through archive types individually (determines marker type)
    # 2nd loop: through paleo proxy types attributed to the specific archive, which is colour coded

    if marker=='default':
        mt = 'ov^s<>pP*XDdh'*10 # generates string of marker types
    else:
        mt = marker

    if size=='default':
        s = 200
    else:
        s = size
    archive_types = np.unique(df['archiveType'])

    ijk=0
    for jj, at in enumerate(archive_types):
        arch_mask = df['archiveType']==at
        arch_proxy_types = np.unique(df['paleoData_proxy'][arch_mask])
        for ii, pt in enumerate(arch_proxy_types):
            pt_mask = df['paleoData_proxy']==pt
            at_mask = df['archiveType']==at
            label = at+': '+pt+' ($n=%d$)'% df['paleoData_proxy'][(df['paleoData_proxy']==pt)&(df['archiveType']==at)].count()
            marker = mt[ii] if at in highlight_archives else mt[ijk]
            plt.scatter(proxy_lons[pt_mask&at_mask], proxy_lats[pt_mask&at_mask], 
                        transform=ccrs.PlateCarree(), zorder=999,
                        marker=marker, color=archive_colour[at], 
                        label=label,#.replace('marine sediment:', 'marine sediment:\n'), 
                        lw=.3, ec='k', s=s)
            if at not in highlight_archives: ijk+=1

    plt.legend(bbox_to_anchor=(-0.01,-0.01), loc='upper left', ncol=3, fontsize=13.5, framealpha=0)
    grid.tight_layout(fig)
    return fig

plot_geo_archive_proxy_short(df, archives_sorted, archive_proxy_count_short, archive_colour)

Plot geographical distribution of proxy records for major archive–proxy combinations.

Parameters:

Name Type Description Default
df DataFrame

DataFrame containing geographic coordinates and proxy metadata. Must include 'geo_meanLat', 'geo_meanLon', 'archiveType', and 'paleoData_proxy'.

required
archives_sorted list of str

Ordered list of archive types to control plotting and legend order.

required
archive_proxy_count_short dict

Nested dictionary mapping archive types to proxy counts, including grouped 'other' proxy categories.

required
archive_colour dict

Mapping from archive type to color.

required

Returns:

Type Description
Figure

Figure containing the global map.

Notes

Each archive–proxy combination is plotted with a distinct marker, while colors indicate archive type. Proxies classified as 'other' are plotted using masks that exclude explicitly listed proxy types.

Source code in dod2k_utilities/ut_plot.py
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
def plot_geo_archive_proxy_short(df, archives_sorted, archive_proxy_count_short, archive_colour):
    """
    Plot geographical distribution of proxy records for major archive–proxy combinations.

    Parameters
    ----------
    df : pandas.DataFrame
        DataFrame containing geographic coordinates and proxy metadata.
        Must include ``'geo_meanLat'``, ``'geo_meanLon'``,
        ``'archiveType'``, and ``'paleoData_proxy'``.
    archives_sorted : list of str
        Ordered list of archive types to control plotting and legend order.
    archive_proxy_count_short : dict
        Nested dictionary mapping archive types to proxy counts, including
        grouped ``'other'`` proxy categories.
    archive_colour : dict
        Mapping from archive type to color.

    Returns
    -------
    matplotlib.figure.Figure
        Figure containing the global map.

    Notes
    -----
    Each archive–proxy combination is plotted with a distinct marker,
    while colors indicate archive type. Proxies classified as ``'other'``
    are plotted using masks that exclude explicitly listed proxy types.
    """

    proxy_lats = df['geo_meanLat'].values
    proxy_lons = df['geo_meanLon'].values

    # plots the map
    fig = plt.figure(figsize=(13, 8), dpi=350)
    grid = GS(1, 3)

    ax = plt.subplot(grid[:, :], projection=ccrs.Robinson()) # create axis with Robinson projection of globe
    # ax.stock_img()


    ax.add_feature(cfeature.LAND, alpha=0.6) # adds land features
    ax.add_feature(cfeature.OCEAN, alpha=0.6, facecolor='#C5DEEA') # adds ocean features
    ax.coastlines() # adds coastline features

    ax.set_global()


    mt = 'ov^s<>pP*XDdh'*10 # generates string of marker types

    ijk=0
    for at in archives_sorted:
        print(sorted(archive_proxy_count_short[at]))
        for ii, key in enumerate(sorted(archive_proxy_count_short[at])):
            marker = mt[ii]
            if 'other' not in key: 
                at, pt = key.split(': ')
                at_mask = df['archiveType']==at
                pt_mask = df['paleoData_proxy']==pt
                label = key+' (n=%d)'%archive_proxy_count_short[at][key]
            else:
                at= key.split('other ')[-1]
                exclude_types = [kk.split(': ')[-1] for kk in archive_proxy_count_short[at].keys() if at in kk if 'other' not in kk]
                at_mask = df['archiveType']==at
                pt_mask = ~np.isin(df['paleoData_proxy'], exclude_types)
                label = key+' (n=%d)'%df['paleoData_proxy'][pt_mask&at_mask].count()
                if exclude_types==[]:
                    marker=mt[ijk]
                    ijk+=1
                    label = label.replace('other ','')
            plt.scatter(proxy_lons[pt_mask&at_mask], proxy_lats[pt_mask&at_mask], 
                        transform=ccrs.PlateCarree(), zorder=999,
                        marker=marker, 
                        color=archive_colour[at], 
                        label=label,
                        lw=.3, ec='k', s=150)

    plt.legend(bbox_to_anchor=(0.03,-0.01), loc='upper left', ncol=3, fontsize=12, framealpha=0)
    grid.tight_layout(fig)
    return fig

plot_length(df, title='', mincount=0, col='tab:blue')

Plot a histogram of lengths from a DataFrame.

This function bins the 'length' values in the DataFrame into predefined ranges, optionally filters bins with counts below mincount, and displays a bar plot.

Parameters:

Name Type Description Default
df DataFrame

DataFrame containing a column named 'length' with numeric values.

required
title str

Title of the plot.

''
mincount int

Minimum count threshold for bins. Bins with fewer counts than mincount are excluded from the plot. Default is 0 (all bins shown).

0

Returns:

Type Description
None

The function displays a matplotlib bar plot and does not return any value.

Source code in dod2k_utilities/ut_plot.py
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
def plot_length(df, title='', mincount=0, col='tab:blue'):
    """
    Plot a histogram of lengths from a DataFrame.

    This function bins the 'length' values in the DataFrame into predefined ranges,
    optionally filters bins with counts below `mincount`, and displays a bar plot.

    Parameters
    ----------
    df : pandas.DataFrame
        DataFrame containing a column named 'length' with numeric values.
    title : str, optional
        Title of the plot.
    mincount : int, optional
        Minimum count threshold for bins. Bins with fewer counts than `mincount`
        are excluded from the plot. Default is 0 (all bins shown).

    Returns
    -------
    None
        The function displays a matplotlib bar plot and does not return any value.
    """
    count_res = {'%s-%s'%(ii, ii+50): 0 for ii in range(0, 200, 50) }
    count_res.update({'%s-%s'%(ii, ii+100): 0 for ii in range(200, 800, 100) })
    count_res['>800'] = 0
    for dd in df['length'].values:
        if dd>800:
            count_res['>800']+=1
        for ii in range(0, 200, 50):
            if dd in range(ii, ii+50):
                count_res['%s-%s'%(ii, ii+50)]+=1
        for ii in range(200, 800, 100):
            if dd in range(ii, ii+100):
                count_res['%s-%s'%(ii, ii+100)]+=1



    plt.figure(dpi=100, figsize=(5,3))
    plt.title(title)
    ax=plt.gca()
    ii=0
    rr=[]
    for res, count in count_res.items():
        if count<mincount: continue
        plt.bar(ii, count, color=col)
        ii+=1
        rr+=[res]
    ax.set_xticks(range(ii))
    ax.set_xticklabels(rr, rotation=45, ha='right', fontsize=7)
    plt.xlabel('length')
    plt.ylabel('count')
    plt.show()
    return 

plot_resolution(df, title='', mincount=0, col='tab:blue')

Plot a histogram of resolutions from a DataFrame.

This function counts the occurrences of each "resolution" in the DataFrame, optionally merges bins with counts below mincount, and displays a bar plot.

Parameters:

Name Type Description Default
df DataFrame

DataFrame containing a column named 'resolution', where each entry is a list of integers representing resolution values.

required
title str

Title of the plot.

''
mincount int

Minimum count threshold for individual resolution bins. Bins with fewer counts than mincount are merged into a coarser bin. Default is 0 (no merging).

0

Returns:

Type Description
None

The function displays a matplotlib bar plot and does not return any value.

Source code in dod2k_utilities/ut_plot.py
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
def plot_resolution(df, title='', mincount=0, col='tab:blue'):
    """
    Plot a histogram of resolutions from a DataFrame.

    This function counts the occurrences of each "resolution" in the DataFrame,
    optionally merges bins with counts below `mincount`, and displays a bar plot.

    Parameters
    ----------
    df : pandas.DataFrame
        DataFrame containing a column named 'resolution', where each entry is 
        a list of integers representing resolution values.
    title : str, optional
        Title of the plot.
    mincount : int, optional
        Minimum count threshold for individual resolution bins. Bins with fewer
        counts than `mincount` are merged into a coarser bin. Default is 0 
        (no merging).

    Returns
    -------
    None
        The function displays a matplotlib bar plot and does not return any value.

    """
    count_res = {}
    for dd in df['resolution'].values:
        if len(dd)>1:
            res = '%d - %d'%(min(dd), max(dd))
        else:
            res='%d'%min(dd)
        if res not in count_res: 
            count_res[res]=0

        count_res[res]+=1

    if mincount!=0:
        rmv = []
        for kk in list(count_res):
            if count_res[kk]<mincount:
                maxres = float(kk.split(' - ')[-1])
                if maxres<6:
                    maxres=5+np.round(maxres/10.)*10
                    newkey='   <%d'%maxres
                elif maxres<95:
                    maxres=5+np.round(maxres/10.)*10
                    newkey='  <%d'%maxres
                else:
                    maxres=50+np.round(maxres/100.)*100
                    newkey=' <%d'%maxres
                if newkey not in count_res:
                    count_res[newkey]=0
                    print(kk, newkey)
                count_res[newkey]+=1
                rmv+=[kk]
        for kk in rmv: del count_res[kk]


    plt.figure(dpi=100, figsize=(5,3))
    plt.title(title)
    ax=plt.gca()
    ii=0
    rr=[]
    for kk in np.sort(list(count_res)):
        plt.bar(ii, count_res[kk], color=col)
        ii+=1
        rr+=[kk]
    ax.set_xticks(range(ii))
    ax.set_xticklabels(rr, rotation=45, ha='right', fontsize=7)
    plt.xlabel('resolution')
    plt.ylabel('count')
    plt.show()
    return 

shade_percentiles(x, y, color, ax, alpha=1, lu=False, zorder=None, lw=1, ups=[60, 70, 80, 90, 95], label=None)

Shade percentile ranges of an ensemble on a matplotlib axis.

Creates overlapping shaded regions showing different percentile ranges of an ensemble, useful for visualizing uncertainty in climate data.

Parameters:

Name Type Description Default
x array - like

Time or x-axis values (1D array of length n).

required
y array - like

Ensemble data as m×n array where m is ensemble dimension and n is time dimension.

required
color str or tuple

Color for shading (matplotlib color specification).

required
ax Axes

Axes object to plot on.

required
alpha float

Overall transparency multiplier (0-1). Default is 1.

1
lu bool

If True, plot dotted lines at 5th and 95th percentiles. Default is False.

False
zorder float

Drawing order for the shaded regions. Default is None.

None
lw float

Line width for percentile boundary lines if lu=True. Default is 1.

1
ups list of float

Upper percentiles to shade. Default is [60, 70, 80, 90, 95].

[60, 70, 80, 90, 95]
label str

Label for legend (applied to outermost shading). Default is None.

None

Returns:

Type Description
None
Notes

Shades symmetric percentile ranges with decreasing opacity: - Innermost: 40th-60th percentile (darkest) - Outermost: 5th-95th percentile (lightest)

Examples:

>>> fig, ax = plt.subplots()
>>> x = np.arange(100)
>>> y = np.random.randn(50, 100)  # 50 ensemble members, 100 time steps
>>> shade_percentiles(x, y, 'blue', ax, lu=True, label='Ensemble')
Source code in dod2k_utilities/ut_plot.py
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
def shade_percentiles(x, y, color, ax, alpha=1, lu=False, zorder=None, lw=1,
                      ups=[60, 70, 80, 90, 95], label=None):
    """
    Shade percentile ranges of an ensemble on a matplotlib axis.

    Creates overlapping shaded regions showing different percentile ranges
    of an ensemble, useful for visualizing uncertainty in climate data.

    Parameters
    ----------
    x : array-like
        Time or x-axis values (1D array of length n).
    y : array-like
        Ensemble data as m×n array where m is ensemble dimension and n is
        time dimension.
    color : str or tuple
        Color for shading (matplotlib color specification).
    ax : matplotlib.axes.Axes
        Axes object to plot on.
    alpha : float, optional
        Overall transparency multiplier (0-1). Default is 1.
    lu : bool, optional
        If True, plot dotted lines at 5th and 95th percentiles. Default is False.
    zorder : float, optional
        Drawing order for the shaded regions. Default is None.
    lw : float, optional
        Line width for percentile boundary lines if lu=True. Default is 1.
    ups : list of float, optional
        Upper percentiles to shade. Default is [60, 70, 80, 90, 95].
    label : str, optional
        Label for legend (applied to outermost shading). Default is None.

    Returns
    -------
    None

    Notes
    -----
    Shades symmetric percentile ranges with decreasing opacity:
    - Innermost: 40th-60th percentile (darkest)
    - Outermost: 5th-95th percentile (lightest)

    Examples
    --------
    >>> fig, ax = plt.subplots()
    >>> x = np.arange(100)
    >>> y = np.random.randn(50, 100)  # 50 ensemble members, 100 time steps
    >>> shade_percentiles(x, y, 'blue', ax, lu=True, label='Ensemble')
    """
    # shades the percentiles of an mxn array (ensemble dimension: m, time dimension: n)
    lows = [100-ii for ii in ups]
    alps = np.array([0.5,0.4, 0.35, 0.3, 0.25])*alpha
    ii=0
    for l, u, a in zip(lows, ups, alps):
        mina = np.nanpercentile(dc(y), l, axis=0)
        maxa = np.nanpercentile(dc(y), u, axis=0)
        X    = dc(np.array(x))
        ll=label if ii==0 else None
        ax.fill_between(X, y1=mina, y2=maxa, color=color, alpha=a, lw=0, 
                        zorder=zorder, label=ll)
        if (l, u) == (5, 95) and lu:
            ax.plot(X, mina, color=color, lw=lw, ls=':', zorder=zorder*2)
            ax.plot(X, maxa, color=color, lw=lw, ls=':', zorder=zorder*2)
        ii+=1
    return