Source code for DiadFit.CO2_in_bubble_error

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Lets use a generic function for getting a panda series value or not.

[docs] def get_value(variable, i): """ This function returns the value if its not a series, otherwise returns the right row in the series """ if isinstance(variable, pd.Series): return variable.iloc[i] else: return variable
const=(4/3)*np.pi
[docs] def propagate_CO2_in_bubble(*, N_dup=1000, sample_ID, vol_perc_bub=None, error_vol_perc_bub=None, error_type_vol_perc_bub='Abs', MI_x=None, MI_y=None,MI_z=None,VB_x=None, VB_y=None,VB_z=None, error_MI_x=None, error_MI_y=None,error_MI_z=None,error_VB_x=None, error_VB_y=None, error_VB_z=None, error_type_dimension='Abs', error_dist_dimension='normal', melt_dens_kgm3, CO2_bub_dens_gcm3, error_dist_vol_perc_bub='normal', error_CO2_bub_dens_gcm3=0, error_type_CO2_bub_dens_gcm3='Abs', error_dist_CO2_bub_dens_gcm3='normal', error_melt_dens_kgm3=0, error_type_melt_dens_kgm3='Abs', error_dist_melt_dens_kgm3='normal', plot_figure=True, fig_i=0, neg_values=True): """ This function propagates uncertainty in reconstruction of melt inclusion CO2 contents by feeding each row into propagate_CO2_in_bubble_ind. The returned standard deviation uses the 84th-16th percentile rather than the true standard deviation, as this is better for skewed distributions. Parameters ---------------- SampleID: str, pd.series Sample_ID (e.g. sample names) which is returned on dataframe N_dup: int Number of duplicates when generating errors for Monte Carlo simulations Now, either you know the vol% of the bubble and the associated error, in which case enter these two arguements: vol_perc_bub: int, float, pd.series Volume proportion of sulfide in melt inclusion error_vol_perc_bub, CO2_bub_dens_gcm3, error_melt_dens_kgm3: int, float, pd.Series Error for each variable, can be absolute or % error_type_vol_perc_bub, error_type_bub_dens_gcm3, error_type_melt_dens_kgm3: 'Abs' or 'Perc' whether given error is perc or absolute Or, you have measured the x-y-z dimensions of the MI and VB (e.g. by perpendicular polishing). MI_x, MI_y, MI_Z, VB_x, VB_y, VB_Z: int, float, pd.Series x, y, z dimensions of vapour bubble and melt inclusion error_MI_x, error_MI_y, error_MI_Z, error_VB_x, error_VB_y, error_VB_Z: int, float, pd.Series Error on x, y, z dimension meausurement error_type_dimension: 'Abs' or 'Perc' Whether errors on all x-y-z are perc or abs error_dist_dimension: 'normal' or 'uniform' Whether errors on all x-y-z are normally or uniformly distributed. melt_dens_kgm3:int, float, pd.series Density of the silicate melt in kg/m3, e.g. from DensityX CO2_bub_dens_gcm3: int, float, pd.Series Density of the vapour bubble in g/cm3 error_dist_vol_perc_bub, error_dist_bub_dens_gcm3, error_dist_melt_dens_kgm3, error_dist_dimension: 'normal' or 'uniform' Distribution of simulated error plot_figure: bool Default true - plots a figure of the row indicated by fig_i (default 1st row, fig_i=0) neg_values: bool Default True - whether negative values are removed from MC simulations or not. False, replace all negative values with zeros. Returns ------------------ pd.DataFrame: df_step, All_outputs All outputs has calculations for every simulaiton df_step has average and standard deviation for each sample """ # Constant for sphere calcs # Lets check what they entered for volume - if they didnt enter a volume % Bubble, lets calculate it if vol_perc_bub is None: if VB_z is None: VB_z=(VB_x+VB_y)/2 if MI_z is None: MI_z=(MI_x+MI_y)/2 Vol_VB_sphere=const*VB_x*VB_y*VB_z*(0.5)**3 Vol_MI_sphere=const*MI_x*MI_y*MI_z*(0.5)**3 vol_perc_bub=100* Vol_VB_sphere/Vol_MI_sphere # Now lets check how they entered error in volume if not ((error_vol_perc_bub is not None and all(v is None for v in [error_MI_x, error_MI_y, error_MI_z, error_VB_x, error_VB_y, error_VB_z])) or (error_vol_perc_bub is None and all(v is not None for v in [error_MI_x, error_MI_y, error_MI_z, error_VB_x, error_VB_y, error_VB_z]))): raise ValueError('Specify either error_vol_perc_bub or non-None values for all of error_MI_x, error_MI_y, error_MI_z, error_VB_x, error_VB_y, and error_VB_z.') # Set up empty things to fill up. if type(vol_perc_bub) is pd.Series: len_loop=len(vol_perc_bub) else: len_loop=1 mean_CO2_eq_melt = np.zeros(len_loop, dtype=float) mean_CO2_eq_melt_ind = np.zeros(len_loop, dtype=float) med_CO2_eq_melt = np.zeros(len_loop, dtype=float) std_CO2_eq_melt = np.zeros(len_loop, dtype=float) preferred_val_CO2_melt= np.zeros(len_loop, dtype=float) std_IQR_CO2_eq_melt= np.zeros(len_loop, dtype=float) Q84= np.zeros(len_loop, dtype=float) Q16= np.zeros(len_loop, dtype=float) Sample=np.zeros(len_loop, dtype=np.dtype('U100') ) All_outputs=pd.DataFrame([]) #This loops through each sample for i in range(0, len_loop): if i % 20 == 0: print('working on sample number '+str(i)) # If user has entered a pandas series for error, takes right one for each loop # vol_perc_bub % and error # Checking volume right format, if panda series or integer vol_perc_bub_i=get_value(vol_perc_bub, i) error_vol_perc_bub_i=get_value(error_vol_perc_bub, i) CO2_bub_dens_gcm3_i=get_value(CO2_bub_dens_gcm3, i) error_CO2_bub_dens_gcm3_i=get_value(error_CO2_bub_dens_gcm3, i) melt_dens_kgm3_i=get_value(melt_dens_kgm3, i) error_melt_dens_kgm3_i=get_value(error_melt_dens_kgm3, i) # This is the function doing the work to actually make the simulations for each variable. if error_vol_perc_bub is not None: df_synthetic=propagate_CO2_in_bubble_ind( N_dup=N_dup, vol_perc_bub=vol_perc_bub_i, melt_dens_kgm3=melt_dens_kgm3_i, CO2_bub_dens_gcm3=CO2_bub_dens_gcm3_i, error_CO2_bub_dens_gcm3=error_CO2_bub_dens_gcm3_i, error_type_CO2_bub_dens_gcm3=error_type_CO2_bub_dens_gcm3, error_dist_CO2_bub_dens_gcm3=error_dist_CO2_bub_dens_gcm3, error_vol_perc_bub=error_vol_perc_bub_i, error_type_vol_perc_bub=error_type_vol_perc_bub, error_dist_vol_perc_bub=error_dist_vol_perc_bub, error_melt_dens_kgm3=error_melt_dens_kgm3_i, error_type_melt_dens_kgm3=error_type_melt_dens_kgm3, error_dist_melt_dens_kgm3=error_dist_melt_dens_kgm3, len_loop=1, neg_values=neg_values) else: # This is the more complex one where we have to account for x-y-z errors on all of them. MI_x_i = get_value(MI_x, i) MI_y_i = get_value(MI_y, i) VB_x_i = get_value(VB_x, i) VB_y_i = get_value(VB_y, i) VB_z_i = get_value(VB_z, i) MI_z_i = get_value(MI_z, i) error_MI_x_i = get_value(error_MI_x, i) error_MI_y_i = get_value(error_MI_y, i) error_MI_z_i = get_value(error_MI_z, i) error_VB_x_i = get_value(error_VB_x, i) error_VB_y_i = get_value(error_VB_y, i) error_VB_z_i = get_value(error_VB_z, i) df_synthetic=propagate_CO2_in_bubble_ind( N_dup=N_dup, vol_perc_bub=vol_perc_bub_i, melt_dens_kgm3=melt_dens_kgm3_i, CO2_bub_dens_gcm3=CO2_bub_dens_gcm3_i, error_CO2_bub_dens_gcm3=error_CO2_bub_dens_gcm3_i, error_type_CO2_bub_dens_gcm3=error_type_CO2_bub_dens_gcm3, error_dist_CO2_bub_dens_gcm3=error_dist_CO2_bub_dens_gcm3, error_vol_perc_bub=None, MI_x=MI_x_i, MI_y=MI_y_i,MI_z=MI_z_i,VB_x=VB_x_i, VB_y=VB_y_i,VB_z=VB_z_i, error_MI_x=error_MI_x_i, error_MI_y=error_MI_y_i,error_MI_z=error_MI_z_i, error_VB_x=error_VB_x_i, error_VB_y=error_VB_y_i, error_VB_z=error_VB_z_i, error_type_dimension=error_type_dimension, error_dist_dimension=error_dist_dimension, error_melt_dens_kgm3=error_melt_dens_kgm3_i, error_type_melt_dens_kgm3=error_type_melt_dens_kgm3, error_dist_melt_dens_kgm3=error_dist_melt_dens_kgm3, len_loop=1, neg_values=neg_values) # Convert to densities for MC df=df_synthetic # Check of if sample_ID is None: Sample[i]=i elif isinstance(sample_ID, str): Sample[i]=sample_ID else: Sample[i]=sample_ID.iloc[i] df.insert(0, 'Filename', Sample[i]) All_outputs=pd.concat([All_outputs, df], axis=0) mean_CO2_eq_melt[i]=np.nanmean(df['CO2_eq_melt_ppm_MC']) med_CO2_eq_melt[i]=np.nanmedian(df['CO2_eq_melt_ppm_MC']) std_CO2_eq_melt[i]=np.nanstd(df['CO2_eq_melt_ppm_MC']) var=df['CO2_eq_melt_ppm_MC'] Q16[i]=np.percentile(var, 16) Q84[i]=np.percentile(var, 84) std_IQR_CO2_eq_melt[i]=0.5*np.abs((np.percentile(var, 84) -np.percentile(var, 16))) # Values all the same, so can just take the 1st. preferred_val_CO2_melt[i]=df['CO2_eq_melt_ppm_noMC'].iloc[0] df_step=pd.DataFrame(data={'Filename': Sample, 'CO2_eq_in_melt_noMC': preferred_val_CO2_melt, 'mean_MC_CO2_equiv_melt_ppm': mean_CO2_eq_melt, 'med_MC_CO2_equiv_melt_ppm': med_CO2_eq_melt, 'std_MC_IQR_CO2_equiv_melt_ppm': std_IQR_CO2_eq_melt, '16th_quantile_melt_ppm': Q16, '84th_quantile_melt_ppm': Q84, }) if plot_figure is True: all_sims=fig_i all_sims=All_outputs.loc[All_outputs['Filename']==All_outputs['Filename'].iloc[fig_i]] fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(10,8)) ax1.hist(all_sims['vol_perc_bub_with_noise'], bins=50); ax1.set_xlabel('MC bubble volume (vol%)') ax1.set_ylabel('# of simulations') ax2.hist(all_sims['melt_dens_kgm3_with_noise'], bins=50); ax2.set_xlabel('MC Melt Density (kg/cm3)') ax3.hist(all_sims['CO2_bub_dens_gcm3_with_noise'], bins=50); ax3.set_xlabel('MC Bubble Density (g/cm3)') ax3.set_ylabel('# of simulations') ax4.hist(all_sims['CO2_eq_melt_ppm_MC'], bins=50, color='red'); ax4.set_xlabel('CO2 equivalent in the melt held in the bubble (ppm)') ax4.plot([all_sims['CO2_eq_melt_ppm_noMC'].iloc[0], all_sims['CO2_eq_melt_ppm_noMC'].iloc[0]], [0, N_dup/20], '-k', label='Preferred value'); ax4.plot([all_sims['CO2_eq_melt_ppm_noMC'].iloc[0]+np.std(all_sims['CO2_eq_melt_ppm_MC']), all_sims['CO2_eq_melt_ppm_noMC'].iloc[0]+np.std(all_sims['CO2_eq_melt_ppm_MC'])], [0, N_dup/20], ':k', label='Preferred+value+1s MC'); ax4.plot([all_sims['CO2_eq_melt_ppm_noMC'].iloc[0]-np.std(all_sims['CO2_eq_melt_ppm_MC']), all_sims['CO2_eq_melt_ppm_noMC'].iloc[0]-np.std(all_sims['CO2_eq_melt_ppm_MC'])], [0, N_dup/20], ':k', label='Preferred-value+1s MC'); ax4.legend() return df_step, All_outputs, fig if 'fig' in locals() else None
# Lets set the random seed np.random.seed(42)
[docs] def add_noise_to_variable(original_value, error, error_type, error_dist, N_dup, neg_values, neg_threshold): """ This function adds noise to each variable for the monte-carloing Parameters ----------------- original_value: int, float Preferred value (e.g. center of distribution) error: int, float Error value error_type: str 'Abs' if absolute error, 'Perc' if percent error_dist: str 'normal' if normally distributed, 'uniform' if uniformly distributed. N_dup: int number of duplicates neg_values: bool whether negative values are replaced with zeros """ # Depending on the error type, allocates an error if error_type == 'Abs': calculated_error = error elif error_type == 'Perc': calculated_error = original_value * error / 100 # Generates noise following a distribution if error_dist == 'normal': noise_to_add = np.random.normal(0, calculated_error, N_dup) elif error_dist == 'uniform': noise_to_add = np.random.uniform(-calculated_error, calculated_error, N_dup) # adds this noise to original value value_with_noise = noise_to_add + original_value if not neg_values: value_with_noise[value_with_noise < neg_threshold] = neg_threshold return value_with_noise
[docs] def propagate_CO2_in_bubble_ind(sample_i=0, N_dup=1000, vol_perc_bub=None, CO2_bub_dens_gcm3=None, melt_dens_kgm3=None, MI_x=None, MI_y=None,MI_z=None,VB_x=None, VB_y=None,VB_z=None, error_MI_x=None, error_MI_y=None,error_MI_z=None,error_VB_x=None, error_VB_y=None, error_VB_z=None, error_type_dimension='Abs', error_dist_dimension='normal', error_vol_perc_bub=None, error_type_vol_perc_bub='Abs', error_dist_vol_perc_bub='normal', error_CO2_bub_dens_gcm3=0, error_type_CO2_bub_dens_gcm3='Abs', error_dist_CO2_bub_dens_gcm3='normal', error_melt_dens_kgm3=0, error_type_melt_dens_kgm3='Abs', error_dist_melt_dens_kgm3='normal', len_loop=1, neg_values=True): """ This function propagates uncertainty in reconstruction of melt inclusion bubble equivalent CO2 contents. and returns a dataframe. it does one sample at a time. Parameters ---------------- sample_i: int if your inputs are panda series, says which row to take Parameters ---------------- For volumes, either enter vol% bubble and associated errors... vol_perc_bub: int, float, pd.series Volume proportion of sulfide in melt inclusion error_vol_perc_bub, CO2_bub_dens_gcm3, error_melt_dens_kgm3: int, float, pd.Series Error for each variable, can be absolute or % error_type_vol_perc_bub, error_type_bub_dens_gcm3, error_type_melt_dens_kgm3: 'Abs' or 'Perc' whether given error is perc or absolute error_dist_vol_perc_bub, error_dist_bub_dens_gcm3, error_dist_melt_dens_kgm3: 'normal' or 'uniform' Distribution of simulated error OR Enter melt inclusion and vapour bubble dimensions (diameter, not radii), and their errors MI_x, MI_y, MI_z, VB_x, VB_y, VB_z: int, float, series: Diameter of melt inclusions. error_MI_x, error_MI_y, error_MI_z, error_VB_x, error_VB_y, error_VB_z: Error on diameter of melt inclusions error_type_dimension='Abs' or 'Perc': Specify whether errors on these dimensions are absolute or percentage error_dist_dimension='normal' or 'uniform': Specify error distribution SampleID: str, pd.series Sample_ID (e.g. sample names) which is returned on dataframe N_dup: int Number of duplicates when generating errors for Monte Carlo simulations melt_dens_kgm3:int, float, pd.series Density of the silicate melt in kg/m3, e.g. from DensityX CO2_bub_dens_gcm3: int, float, pd.Series Density of the vapour bubble in g/cm3 plot_figure: bool Default true - plots a figure of the row indicated by fig_i (default 1st row, fig_i=0) neg_values: bool Default True - whether negative values are removed from MC simulations or not. False, replace all negative values with zeros. Returns ------------------ pd.DataFrame: Input variable duplicated N_dup times with noise added. """ # If only a single sample, set up an output dataframe with an index. if len_loop==1: df_c=pd.DataFrame(data={ 'vol_perc_bub': vol_perc_bub, 'CO2_bub_dens_gcm3': CO2_bub_dens_gcm3, 'melt_dens_kgm3': melt_dens_kgm3 }, index=[0]) else: df_c=pd.DataFrame(data={ 'vol_perc_bub': vol_perc_bub, 'CO2_bub_dens_gcm3': CO2_bub_dens_gcm3, 'melt_dens_kgm3': melt_dens_kgm3}) # Volume error distribution - if they give a volume percentage rather than dimensions if error_vol_perc_bub is not None: print('using error on the bubble volume percent, not the entered dimensions, as error_vol_perc_bub was not None') # Easy peasy Vol_with_noise=add_noise_to_variable(vol_perc_bub, error_vol_perc_bub, error_type_vol_perc_bub, error_dist_vol_perc_bub, N_dup, neg_values, neg_threshold=0.0000000001) else: x_MI_with_noise=add_noise_to_variable(MI_x, error_MI_x, error_type_dimension, error_dist_dimension, N_dup, neg_values, neg_threshold=0.0000000001) y_MI_with_noise=add_noise_to_variable(MI_y, error_MI_y, error_type_dimension, error_dist_dimension, N_dup, neg_values, neg_threshold=0.0000000001) x_VB_with_noise=add_noise_to_variable(VB_x, error_VB_x, error_type_dimension, error_dist_dimension, N_dup, neg_values, neg_threshold=0.0000000001) y_VB_with_noise=add_noise_to_variable(VB_y, error_VB_y, error_type_dimension, error_dist_dimension, N_dup, neg_values, neg_threshold=0.0000000001) z_MI_with_noise=add_noise_to_variable(MI_z, error_MI_z, error_type_dimension, error_dist_dimension, N_dup, neg_values, neg_threshold=0.0000000001) z_VB_with_noise=add_noise_to_variable(VB_z, error_VB_z, error_type_dimension, error_dist_dimension, N_dup, neg_values, neg_threshold=0.0000000001) Vol_VB_sphere_with_noise=const*x_VB_with_noise*y_VB_with_noise*z_VB_with_noise*(0.5)**3 Vol_MI_sphere_with_noise=const*x_MI_with_noise*y_MI_with_noise*z_MI_with_noise*(0.5)**3 Vol_with_noise=100* Vol_VB_sphere_with_noise/Vol_MI_sphere_with_noise # Bubble density CO2_bub_dens_gcm3_with_noise=add_noise_to_variable(CO2_bub_dens_gcm3, error_CO2_bub_dens_gcm3, error_type_CO2_bub_dens_gcm3, error_dist_CO2_bub_dens_gcm3, N_dup, neg_values, neg_threshold=0.0000000001) # Melt density melt_dens_kgm3_with_noise=add_noise_to_variable(melt_dens_kgm3, error_melt_dens_kgm3, error_type_melt_dens_kgm3, error_dist_melt_dens_kgm3, N_dup, neg_values, neg_threshold=0.0000000001) # Now lets calculate the equilibrium CO2 content of the melt CO2_eq_melt_ind=10**4 * (df_c['vol_perc_bub']*df_c['CO2_bub_dens_gcm3'])/(df_c['melt_dens_kgm3']/1000) if error_vol_perc_bub is not None: df_out=pd.DataFrame(data={ 'vol_perc_bub_with_noise': Vol_with_noise, 'CO2_bub_dens_gcm3_with_noise': CO2_bub_dens_gcm3_with_noise, 'melt_dens_kgm3_with_noise': melt_dens_kgm3_with_noise, 'vol_perc_bub': df_c['vol_perc_bub'].iloc[sample_i], 'CO2_bub_dens_gcm3': CO2_bub_dens_gcm3, 'error_type_vol_perc_bub': error_type_vol_perc_bub, 'error_dist_Vol': error_dist_vol_perc_bub, 'error_CO2_bub_dens_gcm3': error_CO2_bub_dens_gcm3, 'error_type_CO2_bub_dens_gcm3': error_type_CO2_bub_dens_gcm3, 'error_dist_CO2_bub_dens_gcm3': error_dist_CO2_bub_dens_gcm3, }) else: df_out=pd.DataFrame(data={ 'vol_perc_bub_with_noise': Vol_with_noise, 'CO2_bub_dens_gcm3_with_noise': CO2_bub_dens_gcm3_with_noise, 'melt_dens_kgm3_with_noise': melt_dens_kgm3_with_noise, 'vol_perc_bub': df_c['vol_perc_bub'].iloc[sample_i], 'CO2_bub_dens_gcm3': CO2_bub_dens_gcm3, 'error_CO2_bub_dens_gcm3': error_CO2_bub_dens_gcm3, 'error_type_CO2_bub_dens_gcm3': error_type_CO2_bub_dens_gcm3, 'error_dist_CO2_bub_dens_gcm3': error_dist_CO2_bub_dens_gcm3, 'Vol_VB_with_noise': Vol_VB_sphere_with_noise, 'Vol_MI_with_noise': Vol_MI_sphere_with_noise, 'VB_x_with_noise': x_VB_with_noise, 'VB_y_with_noise':y_VB_with_noise, 'VB_z_with_noise': z_VB_with_noise, 'MI_x_with_noise': x_MI_with_noise, 'MI_y_with_noise': y_MI_with_noise, 'MI_z_with_noise': z_MI_with_noise, }) CO2_eq_melt=10**4*((df_out['vol_perc_bub_with_noise']*df_out['CO2_bub_dens_gcm3_with_noise']))/(df_out['melt_dens_kgm3_with_noise']/1000) df_out.insert(0, 'CO2_eq_melt_ppm_MC',CO2_eq_melt) #df_out.insert(1, 'CO2_eq_melt_ppm_noMC',float(CO2_eq_melt_ind.values)) df_out.insert(1, 'CO2_eq_melt_ppm_noMC', CO2_eq_melt_ind.iloc[sample_i]) return df_out