import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# Lets use a generic function for getting a panda series value or not.
[docs]
def get_value(variable, i):
""" This function returns the value if its not a series, otherwise returns the right row in the series
"""
if isinstance(variable, pd.Series):
return variable.iloc[i]
else:
return variable
const=(4/3)*np.pi
[docs]
def propagate_CO2_in_bubble(*, N_dup=1000, sample_ID, vol_perc_bub=None, error_vol_perc_bub=None, error_type_vol_perc_bub='Abs',
MI_x=None, MI_y=None,MI_z=None,VB_x=None, VB_y=None,VB_z=None,
error_MI_x=None, error_MI_y=None,error_MI_z=None,error_VB_x=None, error_VB_y=None, error_VB_z=None,
error_type_dimension='Abs', error_dist_dimension='normal',
melt_dens_kgm3, CO2_bub_dens_gcm3,
error_dist_vol_perc_bub='normal',
error_CO2_bub_dens_gcm3=0, error_type_CO2_bub_dens_gcm3='Abs', error_dist_CO2_bub_dens_gcm3='normal',
error_melt_dens_kgm3=0, error_type_melt_dens_kgm3='Abs', error_dist_melt_dens_kgm3='normal',
plot_figure=True, fig_i=0, neg_values=True):
""" This function propagates uncertainty in reconstruction of melt inclusion CO2 contents
by feeding each row into propagate_CO2_in_bubble_ind. The returned standard deviation uses the 84th-16th percentile
rather than the true standard deviation, as this is better for skewed distributions.
Parameters
----------------
SampleID: str, pd.series
Sample_ID (e.g. sample names) which is returned on dataframe
N_dup: int
Number of duplicates when generating errors for Monte Carlo simulations
Now, either you know the vol% of the bubble and the associated error, in which case enter these two arguements:
vol_perc_bub: int, float, pd.series
Volume proportion of sulfide in melt inclusion
error_vol_perc_bub, CO2_bub_dens_gcm3, error_melt_dens_kgm3: int, float, pd.Series
Error for each variable, can be absolute or %
error_type_vol_perc_bub, error_type_bub_dens_gcm3, error_type_melt_dens_kgm3: 'Abs' or 'Perc'
whether given error is perc or absolute
Or, you have measured the x-y-z dimensions of the MI and VB (e.g. by perpendicular polishing).
MI_x, MI_y, MI_Z, VB_x, VB_y, VB_Z: int, float, pd.Series
x, y, z dimensions of vapour bubble and melt inclusion
error_MI_x, error_MI_y, error_MI_Z, error_VB_x, error_VB_y, error_VB_Z: int, float, pd.Series
Error on x, y, z dimension meausurement
error_type_dimension: 'Abs' or 'Perc'
Whether errors on all x-y-z are perc or abs
error_dist_dimension: 'normal' or 'uniform'
Whether errors on all x-y-z are normally or uniformly distributed.
melt_dens_kgm3:int, float, pd.series
Density of the silicate melt in kg/m3, e.g. from DensityX
CO2_bub_dens_gcm3: int, float, pd.Series
Density of the vapour bubble in g/cm3
error_dist_vol_perc_bub, error_dist_bub_dens_gcm3, error_dist_melt_dens_kgm3, error_dist_dimension: 'normal' or 'uniform'
Distribution of simulated error
plot_figure: bool
Default true - plots a figure of the row indicated by fig_i (default 1st row, fig_i=0)
neg_values: bool
Default True - whether negative values are removed from MC simulations or not. False, replace all negative values with zeros.
Returns
------------------
pd.DataFrame: df_step, All_outputs
All outputs has calculations for every simulaiton
df_step has average and standard deviation for each sample
"""
# Constant for sphere calcs
# Lets check what they entered for volume - if they didnt enter a volume % Bubble, lets calculate it
if vol_perc_bub is None:
if VB_z is None:
VB_z=(VB_x+VB_y)/2
if MI_z is None:
MI_z=(MI_x+MI_y)/2
Vol_VB_sphere=const*VB_x*VB_y*VB_z*(0.5)**3
Vol_MI_sphere=const*MI_x*MI_y*MI_z*(0.5)**3
vol_perc_bub=100* Vol_VB_sphere/Vol_MI_sphere
# Now lets check how they entered error in volume
if not ((error_vol_perc_bub is not None and all(v is None for v in [error_MI_x, error_MI_y, error_MI_z, error_VB_x, error_VB_y, error_VB_z])) or (error_vol_perc_bub is None and all(v is not None for v in [error_MI_x, error_MI_y, error_MI_z, error_VB_x, error_VB_y, error_VB_z]))):
raise ValueError('Specify either error_vol_perc_bub or non-None values for all of error_MI_x, error_MI_y, error_MI_z, error_VB_x, error_VB_y, and error_VB_z.')
# Set up empty things to fill up.
if type(vol_perc_bub) is pd.Series:
len_loop=len(vol_perc_bub)
else:
len_loop=1
mean_CO2_eq_melt = np.zeros(len_loop, dtype=float)
mean_CO2_eq_melt_ind = np.zeros(len_loop, dtype=float)
med_CO2_eq_melt = np.zeros(len_loop, dtype=float)
std_CO2_eq_melt = np.zeros(len_loop, dtype=float)
preferred_val_CO2_melt= np.zeros(len_loop, dtype=float)
std_IQR_CO2_eq_melt= np.zeros(len_loop, dtype=float)
Q84= np.zeros(len_loop, dtype=float)
Q16= np.zeros(len_loop, dtype=float)
Sample=np.zeros(len_loop, dtype=np.dtype('U100') )
All_outputs=pd.DataFrame([])
#This loops through each sample
for i in range(0, len_loop):
if i % 20 == 0:
print('working on sample number '+str(i))
# If user has entered a pandas series for error, takes right one for each loop
# vol_perc_bub % and error
# Checking volume right format, if panda series or integer
vol_perc_bub_i=get_value(vol_perc_bub, i)
error_vol_perc_bub_i=get_value(error_vol_perc_bub, i)
CO2_bub_dens_gcm3_i=get_value(CO2_bub_dens_gcm3, i)
error_CO2_bub_dens_gcm3_i=get_value(error_CO2_bub_dens_gcm3, i)
melt_dens_kgm3_i=get_value(melt_dens_kgm3, i)
error_melt_dens_kgm3_i=get_value(error_melt_dens_kgm3, i)
# This is the function doing the work to actually make the simulations for each variable.
if error_vol_perc_bub is not None:
df_synthetic=propagate_CO2_in_bubble_ind(
N_dup=N_dup,
vol_perc_bub=vol_perc_bub_i,
melt_dens_kgm3=melt_dens_kgm3_i,
CO2_bub_dens_gcm3=CO2_bub_dens_gcm3_i,
error_CO2_bub_dens_gcm3=error_CO2_bub_dens_gcm3_i,
error_type_CO2_bub_dens_gcm3=error_type_CO2_bub_dens_gcm3,
error_dist_CO2_bub_dens_gcm3=error_dist_CO2_bub_dens_gcm3,
error_vol_perc_bub=error_vol_perc_bub_i,
error_type_vol_perc_bub=error_type_vol_perc_bub,
error_dist_vol_perc_bub=error_dist_vol_perc_bub,
error_melt_dens_kgm3=error_melt_dens_kgm3_i,
error_type_melt_dens_kgm3=error_type_melt_dens_kgm3,
error_dist_melt_dens_kgm3=error_dist_melt_dens_kgm3,
len_loop=1, neg_values=neg_values)
else:
# This is the more complex one where we have to account for x-y-z errors on all of them.
MI_x_i = get_value(MI_x, i)
MI_y_i = get_value(MI_y, i)
VB_x_i = get_value(VB_x, i)
VB_y_i = get_value(VB_y, i)
VB_z_i = get_value(VB_z, i)
MI_z_i = get_value(MI_z, i)
error_MI_x_i = get_value(error_MI_x, i)
error_MI_y_i = get_value(error_MI_y, i)
error_MI_z_i = get_value(error_MI_z, i)
error_VB_x_i = get_value(error_VB_x, i)
error_VB_y_i = get_value(error_VB_y, i)
error_VB_z_i = get_value(error_VB_z, i)
df_synthetic=propagate_CO2_in_bubble_ind(
N_dup=N_dup,
vol_perc_bub=vol_perc_bub_i,
melt_dens_kgm3=melt_dens_kgm3_i,
CO2_bub_dens_gcm3=CO2_bub_dens_gcm3_i,
error_CO2_bub_dens_gcm3=error_CO2_bub_dens_gcm3_i,
error_type_CO2_bub_dens_gcm3=error_type_CO2_bub_dens_gcm3,
error_dist_CO2_bub_dens_gcm3=error_dist_CO2_bub_dens_gcm3,
error_vol_perc_bub=None,
MI_x=MI_x_i, MI_y=MI_y_i,MI_z=MI_z_i,VB_x=VB_x_i, VB_y=VB_y_i,VB_z=VB_z_i,
error_MI_x=error_MI_x_i, error_MI_y=error_MI_y_i,error_MI_z=error_MI_z_i,
error_VB_x=error_VB_x_i, error_VB_y=error_VB_y_i, error_VB_z=error_VB_z_i,
error_type_dimension=error_type_dimension, error_dist_dimension=error_dist_dimension,
error_melt_dens_kgm3=error_melt_dens_kgm3_i,
error_type_melt_dens_kgm3=error_type_melt_dens_kgm3,
error_dist_melt_dens_kgm3=error_dist_melt_dens_kgm3,
len_loop=1, neg_values=neg_values)
# Convert to densities for MC
df=df_synthetic
# Check of
if sample_ID is None:
Sample[i]=i
elif isinstance(sample_ID, str):
Sample[i]=sample_ID
else:
Sample[i]=sample_ID.iloc[i]
df.insert(0, 'Filename', Sample[i])
All_outputs=pd.concat([All_outputs, df], axis=0)
mean_CO2_eq_melt[i]=np.nanmean(df['CO2_eq_melt_ppm_MC'])
med_CO2_eq_melt[i]=np.nanmedian(df['CO2_eq_melt_ppm_MC'])
std_CO2_eq_melt[i]=np.nanstd(df['CO2_eq_melt_ppm_MC'])
var=df['CO2_eq_melt_ppm_MC']
Q16[i]=np.percentile(var, 16)
Q84[i]=np.percentile(var, 84)
std_IQR_CO2_eq_melt[i]=0.5*np.abs((np.percentile(var, 84) -np.percentile(var, 16)))
# Values all the same, so can just take the 1st.
preferred_val_CO2_melt[i]=df['CO2_eq_melt_ppm_noMC'].iloc[0]
df_step=pd.DataFrame(data={'Filename': Sample,
'CO2_eq_in_melt_noMC': preferred_val_CO2_melt,
'mean_MC_CO2_equiv_melt_ppm': mean_CO2_eq_melt,
'med_MC_CO2_equiv_melt_ppm': med_CO2_eq_melt,
'std_MC_IQR_CO2_equiv_melt_ppm': std_IQR_CO2_eq_melt,
'16th_quantile_melt_ppm': Q16,
'84th_quantile_melt_ppm': Q84,
})
if plot_figure is True:
all_sims=fig_i
all_sims=All_outputs.loc[All_outputs['Filename']==All_outputs['Filename'].iloc[fig_i]]
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(10,8))
ax1.hist(all_sims['vol_perc_bub_with_noise'], bins=50);
ax1.set_xlabel('MC bubble volume (vol%)')
ax1.set_ylabel('# of simulations')
ax2.hist(all_sims['melt_dens_kgm3_with_noise'], bins=50);
ax2.set_xlabel('MC Melt Density (kg/cm3)')
ax3.hist(all_sims['CO2_bub_dens_gcm3_with_noise'], bins=50);
ax3.set_xlabel('MC Bubble Density (g/cm3)')
ax3.set_ylabel('# of simulations')
ax4.hist(all_sims['CO2_eq_melt_ppm_MC'], bins=50, color='red');
ax4.set_xlabel('CO2 equivalent in the melt held in the bubble (ppm)')
ax4.plot([all_sims['CO2_eq_melt_ppm_noMC'].iloc[0], all_sims['CO2_eq_melt_ppm_noMC'].iloc[0]], [0, N_dup/20], '-k', label='Preferred value');
ax4.plot([all_sims['CO2_eq_melt_ppm_noMC'].iloc[0]+np.std(all_sims['CO2_eq_melt_ppm_MC']), all_sims['CO2_eq_melt_ppm_noMC'].iloc[0]+np.std(all_sims['CO2_eq_melt_ppm_MC'])], [0, N_dup/20], ':k', label='Preferred+value+1s MC');
ax4.plot([all_sims['CO2_eq_melt_ppm_noMC'].iloc[0]-np.std(all_sims['CO2_eq_melt_ppm_MC']), all_sims['CO2_eq_melt_ppm_noMC'].iloc[0]-np.std(all_sims['CO2_eq_melt_ppm_MC'])], [0, N_dup/20], ':k', label='Preferred-value+1s MC');
ax4.legend()
return df_step, All_outputs, fig if 'fig' in locals() else None
# Lets set the random seed
np.random.seed(42)
[docs]
def add_noise_to_variable(original_value, error, error_type, error_dist, N_dup, neg_values, neg_threshold):
""" This function adds noise to each variable for the monte-carloing
Parameters
-----------------
original_value: int, float
Preferred value (e.g. center of distribution)
error: int, float
Error value
error_type: str
'Abs' if absolute error, 'Perc' if percent
error_dist: str
'normal' if normally distributed, 'uniform' if uniformly distributed.
N_dup: int
number of duplicates
neg_values: bool
whether negative values are replaced with zeros
"""
# Depending on the error type, allocates an error
if error_type == 'Abs':
calculated_error = error
elif error_type == 'Perc':
calculated_error = original_value * error / 100
# Generates noise following a distribution
if error_dist == 'normal':
noise_to_add = np.random.normal(0, calculated_error, N_dup)
elif error_dist == 'uniform':
noise_to_add = np.random.uniform(-calculated_error, calculated_error, N_dup)
# adds this noise to original value
value_with_noise = noise_to_add + original_value
if not neg_values:
value_with_noise[value_with_noise < neg_threshold] = neg_threshold
return value_with_noise
[docs]
def propagate_CO2_in_bubble_ind(sample_i=0, N_dup=1000, vol_perc_bub=None,
CO2_bub_dens_gcm3=None, melt_dens_kgm3=None,
MI_x=None, MI_y=None,MI_z=None,VB_x=None, VB_y=None,VB_z=None,
error_MI_x=None, error_MI_y=None,error_MI_z=None,error_VB_x=None, error_VB_y=None, error_VB_z=None,
error_type_dimension='Abs', error_dist_dimension='normal',
error_vol_perc_bub=None, error_type_vol_perc_bub='Abs', error_dist_vol_perc_bub='normal',
error_CO2_bub_dens_gcm3=0, error_type_CO2_bub_dens_gcm3='Abs', error_dist_CO2_bub_dens_gcm3='normal',
error_melt_dens_kgm3=0, error_type_melt_dens_kgm3='Abs', error_dist_melt_dens_kgm3='normal', len_loop=1, neg_values=True):
""" This function propagates uncertainty in reconstruction of melt inclusion bubble equivalent CO2 contents.
and returns a dataframe. it does one sample at a time.
Parameters
----------------
sample_i: int
if your inputs are panda series, says which row to take
Parameters
----------------
For volumes, either enter vol% bubble and associated errors...
vol_perc_bub: int, float, pd.series
Volume proportion of sulfide in melt inclusion
error_vol_perc_bub, CO2_bub_dens_gcm3, error_melt_dens_kgm3: int, float, pd.Series
Error for each variable, can be absolute or %
error_type_vol_perc_bub, error_type_bub_dens_gcm3, error_type_melt_dens_kgm3: 'Abs' or 'Perc'
whether given error is perc or absolute
error_dist_vol_perc_bub, error_dist_bub_dens_gcm3, error_dist_melt_dens_kgm3: 'normal' or 'uniform'
Distribution of simulated error
OR
Enter melt inclusion and vapour bubble dimensions (diameter, not radii), and their errors
MI_x, MI_y, MI_z, VB_x, VB_y, VB_z: int, float, series:
Diameter of melt inclusions.
error_MI_x, error_MI_y, error_MI_z, error_VB_x, error_VB_y, error_VB_z:
Error on diameter of melt inclusions
error_type_dimension='Abs' or 'Perc':
Specify whether errors on these dimensions are absolute or percentage
error_dist_dimension='normal' or 'uniform':
Specify error distribution
SampleID: str, pd.series
Sample_ID (e.g. sample names) which is returned on dataframe
N_dup: int
Number of duplicates when generating errors for Monte Carlo simulations
melt_dens_kgm3:int, float, pd.series
Density of the silicate melt in kg/m3, e.g. from DensityX
CO2_bub_dens_gcm3: int, float, pd.Series
Density of the vapour bubble in g/cm3
plot_figure: bool
Default true - plots a figure of the row indicated by fig_i (default 1st row, fig_i=0)
neg_values: bool
Default True - whether negative values are removed from MC simulations or not. False, replace all negative values with zeros.
Returns
------------------
pd.DataFrame:
Input variable duplicated N_dup times with noise added.
"""
# If only a single sample, set up an output dataframe with an index.
if len_loop==1:
df_c=pd.DataFrame(data={
'vol_perc_bub': vol_perc_bub,
'CO2_bub_dens_gcm3': CO2_bub_dens_gcm3,
'melt_dens_kgm3': melt_dens_kgm3 }, index=[0])
else:
df_c=pd.DataFrame(data={
'vol_perc_bub': vol_perc_bub,
'CO2_bub_dens_gcm3': CO2_bub_dens_gcm3,
'melt_dens_kgm3': melt_dens_kgm3})
# Volume error distribution - if they give a volume percentage rather than dimensions
if error_vol_perc_bub is not None:
print('using error on the bubble volume percent, not the entered dimensions, as error_vol_perc_bub was not None')
# Easy peasy
Vol_with_noise=add_noise_to_variable(vol_perc_bub, error_vol_perc_bub,
error_type_vol_perc_bub, error_dist_vol_perc_bub, N_dup, neg_values, neg_threshold=0.0000000001)
else:
x_MI_with_noise=add_noise_to_variable(MI_x, error_MI_x,
error_type_dimension, error_dist_dimension, N_dup, neg_values, neg_threshold=0.0000000001)
y_MI_with_noise=add_noise_to_variable(MI_y, error_MI_y,
error_type_dimension, error_dist_dimension, N_dup, neg_values, neg_threshold=0.0000000001)
x_VB_with_noise=add_noise_to_variable(VB_x, error_VB_x,
error_type_dimension, error_dist_dimension, N_dup, neg_values, neg_threshold=0.0000000001)
y_VB_with_noise=add_noise_to_variable(VB_y, error_VB_y,
error_type_dimension, error_dist_dimension, N_dup, neg_values, neg_threshold=0.0000000001)
z_MI_with_noise=add_noise_to_variable(MI_z, error_MI_z,
error_type_dimension, error_dist_dimension, N_dup, neg_values, neg_threshold=0.0000000001)
z_VB_with_noise=add_noise_to_variable(VB_z, error_VB_z,
error_type_dimension, error_dist_dimension, N_dup, neg_values, neg_threshold=0.0000000001)
Vol_VB_sphere_with_noise=const*x_VB_with_noise*y_VB_with_noise*z_VB_with_noise*(0.5)**3
Vol_MI_sphere_with_noise=const*x_MI_with_noise*y_MI_with_noise*z_MI_with_noise*(0.5)**3
Vol_with_noise=100* Vol_VB_sphere_with_noise/Vol_MI_sphere_with_noise
# Bubble density
CO2_bub_dens_gcm3_with_noise=add_noise_to_variable(CO2_bub_dens_gcm3, error_CO2_bub_dens_gcm3,
error_type_CO2_bub_dens_gcm3, error_dist_CO2_bub_dens_gcm3, N_dup, neg_values, neg_threshold=0.0000000001)
# Melt density
melt_dens_kgm3_with_noise=add_noise_to_variable(melt_dens_kgm3, error_melt_dens_kgm3,
error_type_melt_dens_kgm3, error_dist_melt_dens_kgm3, N_dup, neg_values, neg_threshold=0.0000000001)
# Now lets calculate the equilibrium CO2 content of the melt
CO2_eq_melt_ind=10**4 * (df_c['vol_perc_bub']*df_c['CO2_bub_dens_gcm3'])/(df_c['melt_dens_kgm3']/1000)
if error_vol_perc_bub is not None:
df_out=pd.DataFrame(data={
'vol_perc_bub_with_noise': Vol_with_noise,
'CO2_bub_dens_gcm3_with_noise': CO2_bub_dens_gcm3_with_noise,
'melt_dens_kgm3_with_noise': melt_dens_kgm3_with_noise,
'vol_perc_bub': df_c['vol_perc_bub'].iloc[sample_i],
'CO2_bub_dens_gcm3': CO2_bub_dens_gcm3,
'error_type_vol_perc_bub': error_type_vol_perc_bub,
'error_dist_Vol': error_dist_vol_perc_bub,
'error_CO2_bub_dens_gcm3': error_CO2_bub_dens_gcm3,
'error_type_CO2_bub_dens_gcm3': error_type_CO2_bub_dens_gcm3,
'error_dist_CO2_bub_dens_gcm3': error_dist_CO2_bub_dens_gcm3,
})
else:
df_out=pd.DataFrame(data={
'vol_perc_bub_with_noise': Vol_with_noise,
'CO2_bub_dens_gcm3_with_noise': CO2_bub_dens_gcm3_with_noise,
'melt_dens_kgm3_with_noise': melt_dens_kgm3_with_noise,
'vol_perc_bub': df_c['vol_perc_bub'].iloc[sample_i],
'CO2_bub_dens_gcm3': CO2_bub_dens_gcm3,
'error_CO2_bub_dens_gcm3': error_CO2_bub_dens_gcm3,
'error_type_CO2_bub_dens_gcm3': error_type_CO2_bub_dens_gcm3,
'error_dist_CO2_bub_dens_gcm3': error_dist_CO2_bub_dens_gcm3,
'Vol_VB_with_noise': Vol_VB_sphere_with_noise,
'Vol_MI_with_noise': Vol_MI_sphere_with_noise,
'VB_x_with_noise': x_VB_with_noise,
'VB_y_with_noise':y_VB_with_noise,
'VB_z_with_noise': z_VB_with_noise,
'MI_x_with_noise': x_MI_with_noise,
'MI_y_with_noise': y_MI_with_noise,
'MI_z_with_noise': z_MI_with_noise,
})
CO2_eq_melt=10**4*((df_out['vol_perc_bub_with_noise']*df_out['CO2_bub_dens_gcm3_with_noise']))/(df_out['melt_dens_kgm3_with_noise']/1000)
df_out.insert(0, 'CO2_eq_melt_ppm_MC',CO2_eq_melt)
#df_out.insert(1, 'CO2_eq_melt_ppm_noMC',float(CO2_eq_melt_ind.values))
df_out.insert(1, 'CO2_eq_melt_ppm_noMC', CO2_eq_melt_ind.iloc[sample_i])
return df_out