import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import patches
import lmfit
from lmfit.models import GaussianModel, VoigtModel, LinearModel, ConstantModel
from scipy.signal import find_peaks
import os
import re
from os import listdir
from os.path import isfile, join
from tqdm import tqdm
import datetime
import calendar
encode="ISO-8859-1"
## File to convert power to measured power for UCB
def power_corr_UCB(df):
# Mapping of magnification to correction factor
mag_factor = {
5: 0.80,
20: 0.79,
40: 0.77,
50: 0.62,
100: 0.43
}
# Create corrected power column
df['Power_corr'] = df['power (mW)'] * df['Mag (X)'].map(mag_factor)
return df
## GEt video mag
# Function to check if "Video Image" is in the first line, considering variations
[docs]
def line_contains_video_image(line):
""" This function returns video image information """
return "video image" in line.lower()
[docs]
def get_video_mag(metadata_path):
""" This function finds all the video files in a single folder, and returns a dataframe of the filename and the magnification used.
"""
folder_path=metadata_path
data=[]
# Code below this
# Ensure the directory exists and contains files
if os.path.exists(folder_path) and os.path.isdir(folder_path):
# Go through each file in the folder
for filename in os.listdir(folder_path):
if filename.endswith('.txt'): # Confirming it's a text file
file_path = os.path.join(folder_path, filename)
with open(file_path, 'r', encoding="ISO-8859-1") as file:
first_line = file.readline()
# Initialize placeholders for magnification, width, and height
magnification = None
image_width = None
image_height = None
if "video image" in first_line.lower(): # Checks if "Video Image" is in the line
for line in file:
if "Objective Magnification:" in line:
magnification = line.split(":")[-1].strip()
elif "Image Width [µm]:" in line:
image_width = line.split(":")[-1].strip()
elif "Image Height [µm]:" in line:
image_height = line.split(":")[-1].strip()
# Add to data if magnification is found (assuming it's mandatory)
if magnification:
data.append({
"Filename": filename,
"Mag": magnification,
"Width (µm)": image_width,
"Height (µm)": image_height
})
else:
print(f"The specified path {folder_path} does not exist or is not a directory.")
# Create a DataFrame from the data
df = pd.DataFrame(data)
# Display the DataFrame or a message if empty
if not df.empty:
return df
else:
print("No data found. Please check the folder path and the content of the files.")
## Functions for getting file names
[docs]
def check_for_duplicates(spectra_path, prefix=True, prefix_str=' ', exception=True):
""" This function checks for duplicate filenames in a specified directory and prints the duplicates if found.
Parameters:
spectra_path (str):
The path of the directory containing the files to be checked for duplicates.
prefix (bool):
If True, the function will remove the specified prefix string from the filenames before checking for duplicates. Default is True.
prefix_str (str:
The prefix string to be removed from filenames if 'prefix' is set to True. Default is a single space ' '.
Returns:
file_m (numpy.ndarray): A numpy array containing the modified filenames after removing the prefix (if specified).
"""
All_files_spectra= [f for f in listdir(spectra_path) if isfile(join(spectra_path, f))]
file_m=np.zeros(len(All_files_spectra), dtype=object)
for i in range(0, len(All_files_spectra)):
name=All_files_spectra[i]
# If no prefix or suffix to remove, simple
if prefix is False:
name2=name
else:
#name2=name.split(prefix_str, maxsplit=1)[1:]
name2 = name.split(" ", 1)[1]
file_m[i]=name2[0]
if len(file_m)!=len(pd.Series(file_m).unique()):
file_m_s=pd.Series(file_m)
print('duplicates')
print(file_m_s[file_m_s.duplicated()])
print('OOPS. at least one of your file name is duplicated go back to your spectra, you named a file twice, this will confuse the stitching ')
#raise Exception('Duplicate file')
if exception is True:
raise TypeError('SORT OUT YOUR DUPLICATES BEFORE PROCEEDING!')
return file_m
[docs]
def get_files(path, ID_str=None, file_ext='txt', exclude_str=None, exclude_type=None, sort=True):
""" This function takes a user path, and extracts all files which contain the ID_str
Parameters
-----------
path: str
Folder user wishes to read data from
sort: bool
If true, sorts files alphabetically
ID_str: list
Finds all files containing this string (e.g. ['Ne', 'NE']
exclude_str: str
Excludes files with this string in the name
file_ext: str
Gets all files of this format only (e.g. txt)
Returns
-----------
list: file names as a list.
"""
Allfiles = [f for f in listdir(path) if isfile(join(path, f))]
# Take only files with the right file extension
if ID_str is not None:
Allfiles_type=[item for item in Allfiles if file_ext in item and ID_str in item]
else:
Allfiles_type=[item for item in Allfiles if file_ext in item]
if exclude_str is None:
Ne_files=Allfiles_type
else:
Ne_files=[x for x in Allfiles_type if not any(e in x for e in exclude_str)]
# Allfiles = [f for f in listdir(path) if isfile(join(path, f))]
# Ne_files=[item for item in Allfiles if ID_str in item and file_ext in item and exclude_str not in item]
if sort is True:
Ne_files=sorted(Ne_files)
return Ne_files
[docs]
def get_all_txt_files(path):
""" This function takes a user path, and gets all the .txt. files in that path.
Parameters
-----------
path: str
Folder user wishes to read data from
"""
Allfiles_all = [f for f in listdir(path) if isfile(join(path, f))]
# Use only txt files
type(Allfiles_all)
All_files=[]
for file in Allfiles_all:
if '.txt' in file and 'pandas' not in file:
All_files.append(format(file))
return All_files
# Function to get magnification of
## Functions to just simply get data to plot up
[docs]
def get_data(*, path=None, filename=None, Diad_files=None, filetype='Witec_ASCII'):
"""
Extracts data as a np.array from user file of differen types
Parameters
---------------
path: str
path with spectra in
filename: str
Filename of specific spectra
filetype: str
choose from 'Witec_ASCII', 'headless_txt', 'headless_csv', 'head_csv', 'Witec_ASCII',
'HORIBA_txt', 'Renishaw_txt'
Diad_Files:
Name of file, if you dont want to have to specify a path
"""
if filename=='settings.txt':
raise TypeError('Your settings file is being read. Please add this to the list of exclude_str at the top of the notebook')
if Diad_files is None:
if filetype == 'headless_txt':
df=pd.read_csv(path+'/'+filename, sep="\t", header=None )
if filetype=='Witec_ASCII':
df=read_witec_to_df(path=path, filename=filename)
if filetype=='Renishaw_txt':
df_long=pd.read_csv(path+'/'+filename, sep="\t" )
df=df_long.iloc[:, 0:2]
if filetype=='HORIBA_txt':
df=read_HORIBA_to_df(path=path, filename=filename)
if filetype=='headless_csv':
df=pd.read_csv(path+str('/')+filename, header=None)
if filetype=='head_csv':
df=pd.read_csv(path+str('/')+filename)
if Diad_files is not None:
if filetype == 'headless_txt':
df=pd.read_csv(Diad_files, sep="\t", header=None )
if filetype=='Witec_ASCII':
df=read_witec_to_df(Diad_files)
if filetype=='Renishaw_txt':
df_long=pd.read_csv(Diad_files, sep="\t" )
df=df_long.iloc[:, 0:2]
if filetype=='HORIBA_txt':
df=read_HORIBA_to_df(Diad_files)
if filetype=='headless_csv':
df=pd.read_csv(Diad_files, header=None)
if filetype=='head_csv':
df=pd.read_csv(Diad_files)
np_in = np.array(df)
x_values = np_in[:, 0]
if np.all(np.diff(x_values) < 0):
#print('I flipped')
np_in = np.flipud(np_in)
# print(df_in)
# print('finish this bit')
#
# df_in = df_in.astype(float)
#
# # Check if values in the first column are in descending order
# if np.all(np.diff(df_in[:, 0]) <= 0):
return np_in
## Reading different file formats
[docs]
def read_HORIBA_to_df(*, path=None, filename):
""" This function takes in a HORIBA .txt. file with headers with #, and looks down to the row where Data starts (no #),
and saves this to a new csv file called pandas_.... old file. It exports the data as a pandas dataframe
Parameters
-----------
path: str
Folder user wishes to read data from
filename: str
Specific file being read
Returns
------------
pd.DataFrame:
Dataframe of x-y data
"""
path2=path+'/'+ 'Peak_fits_txt'
if os.path.exists(path2):
a='path exists'
else:
os.makedirs(path+'/'+ 'Peak_fits_txt', exist_ok=False)
print('Ive made a new folder to store your intermediate txt files in')
if path is None:
fr = open(filename, 'r', encoding=encode)
fw=open('pandas2_'+filename, 'w')
else:
fr = open(path+'/'+filename, 'r', encoding=encode)
fw= open(path+'/'+'Peak_fits_txt'+'/'+'pandas2_'+filename, 'w')
if fr.readline().startswith('#Acq. time'):
out='HORIBA txt file recognised'
else:
raise TypeError('Not a HORIBA txt file with headers')
while True:
l=fr.readline()
if not l.startswith('#'):
break
for line in fr:
fw.write(line)
fw.close()
fr.close()
if path is None:
#print(filename)
df=pd.read_csv('pandas2_'+filename, sep="\t", header=None)
else:
#print(filename)
df=pd.read_csv(path+'/'+'Peak_fits_txt'+'/'+'pandas2_'+filename, sep="\t", header=None)
return df
[docs]
def read_witec_to_df(*, path=None, filename):
""" This function takes in a WITec ASCII.txt. file with metadata mixed with data, and looks down to the row where Data starts,
and saves this to a new file called pandas_.... old file. It exports the data as a pandas dataframe
Parameters
-----------
path: str
Folder user wishes to read data from
filename: str
Specific file being read
Returns
------------
pd.DataFrame:
Dataframe of x-y data
"""
if path is None:
path=os.getcwd()
path2=path+'/'+ 'Peak_fits_txt'
if os.path.exists(path2):
a='path exists'
else:
os.makedirs(path+'/'+ 'Peak_fits_txt', exist_ok=False)
print('Ive made a new folder to store your intermediate txt files in')
if path is None:
fr = open(filename, 'r', encoding=encode)
fw=open('pandas2_'+filename, 'w')
else:
fr = open(path+'/'+filename, 'r', encoding=encode)
fw= open(path+'/'+'Peak_fits_txt'+'/'+'pandas2_'+filename, 'w')
if fr.readline().startswith('//Exported ASCII'):
out='ASCI file recognised'
else:
raise TypeError('file not an ASCI file')
while True:
l=fr.readline()
if l.startswith('[Data]'):
break
for line in fr:
fw.write(line)
fw.close()
fr.close()
if path is None:
df=pd.read_csv('pandas2_'+filename, sep="\t")
else:
#print(filename)
df=pd.read_csv(path+'/'+'Peak_fits_txt'+'/'+'pandas2_'+filename, sep="\t")
array=np.array(df)
if np.median(array[:, 1])==0:
raise TypeError(filename+': The median y value is 0, is it possible you stopped the acq before you got any counts? Please delete this file so it doesnt break the loops')
return df
## Function to extract metadata based on creation or modification of file
## Functions to extract things for HORIBA
## HORIBA acquisition time
# encode="ISO-8859-1"
# def extract_duration_horiba(*, path, filename):
# """ This function extracts the duration from a HORIBA file by finding the line starting with #Acq. """
# fr = open(path+'/'+filename, 'r', encoding=encode)
#
# while True:
# l=fr.readline()
# if l.startswith('#Acq.'):
# line=l
# break
# return line
#
# def extract_accumulations_horiba(*, path, filename):
# """ This function extracts the accumulations from a HORIBA file by finding the line starting with #Accumu. """
# fr = open(path+'/'+filename, 'r', encoding=encode)
#
# while True:
# l=fr.readline()
# if l.startswith('#Accumu'):
# line=l
# break
# return line
#
# def extract_objective_horiba(*, path, filename):
# """ This function extracts the objective used from a HORIBA file by finding the line starting with #Object. """
# fr = open(path+'/'+filename, 'r', encoding=encode)
#
# while True:
# l=fr.readline()
# if l.startswith('#Object'):
# line=l
# break
# return line
#
# def extract_date_horiba(*, path, filename):
# """ This function extracts the date used from a HORIBA file by finding the line starting with #Date. """
# fr = open(path+'/'+filename, 'r', encoding=encode)
#
# while True:
# l=fr.readline()
# if l.startswith('#Date'):
# line=l
# break
# return line
#
# def extract_spectral_center_horiba(*, path, filename):
# """ This function extracts the spectral center used from a HORIBA file by finding the line starting with #Spectro (cm-¹). """
# fr = open(path+'/'+filename, 'r', encoding=encode)
#
# while True:
# l=fr.readline()
# if l.startswith('#Spectro (cm-¹)'):
# line=l
# break
# return line
#
# def extract_24hr_time_horiba(*, path, filename):
# """ This function extracts the 24 hr time from a HORIBA file by finding the line starting with #Acquired. """
# fr = open(path+'/'+filename, 'r', encoding=encode)
#
# while True:
# l=fr.readline()
# if l.startswith('#Acquired'):
# line=l
# break
# return line
#
# def extract_spectraname_horiba(*, path, filename):
# """
# This function extracts the spectral name from HORIBA files
# """
# fr = open(path+'/'+filename, 'r', encoding=encode)
#
# while True:
# l=fr.readline()
# if l.startswith('#Title'):
# line=l
# break
# return line
#
#
#
#
#
#
import numpy as np
encode = "ISO-8859-1"
def extract_duration_horiba(*, path, filename):
with open(path + '/' + filename, 'r', encoding=encode) as fr:
for l in fr:
if l.startswith('#Acq.'):
return l
return np.nan
def extract_accumulations_horiba(*, path, filename):
with open(path + '/' + filename, 'r', encoding=encode) as fr:
for l in fr:
if l.startswith('#Accumulations'):
return l
return np.nan
def extract_objective_horiba(*, path, filename):
with open(path + '/' + filename, 'r', encoding=encode) as fr:
for l in fr:
if l.startswith('#Objective'):
return l
return np.nan
def extract_date_horiba(*, path, filename):
with open(path + '/' + filename, 'r', encoding=encode) as fr:
for l in fr:
if l.startswith('#Date'):
return l
return np.nan
def extract_spectral_center_horiba(*, path, filename):
with open(path + '/' + filename, 'r', encoding=encode) as fr:
for l in fr:
if l.startswith('#Spectro (cm-¹)'):
try:
return float(l.split('=')[1].strip())
except (IndexError, ValueError):
return np.nan
return np.nan
def extract_24hr_time_horiba(*, path, filename):
with open(path + '/' + filename, 'r', encoding=encode) as fr:
for l in fr:
if l.startswith('#Acquired'):
return l
return np.nan
def extract_spectraname_horiba(*, path, filename):
with open(path + '/' + filename, 'r', encoding=encode) as fr:
for l in fr:
if l.startswith('#Title'):
return l
return np.nan
## Functions to extract metadata from WITEC files (v instrument specific)
[docs]
def checks_if_video_witec(*, path, filename):
""" Checks if a WITEC file is an image (as doesnt have all metadata)
"""
fr = open(path+'/'+filename, 'r', encoding=encode)
l1=fr.readline()
#print(l1)
if 'Video' in l1:
return 'Video'
else:
return 'not Video'
[docs]
def checks_if_imagescan_witec(*, path, filename):
""" Checks if a WITEC file is an imagescan (as doesnt have all metadata)
"""
fr = open(path+'/'+filename, 'r', encoding=encode)
l1=fr.readline()
#print(l1)
if 'Scan' in l1:
return 'Scan'
else:
return 'not Scan'
[docs]
def checks_if_general_witec(*, path, filename):
""" Checks if a WITEC file is a spectra file with all the right metadata
"""
fr = open(path+'/'+filename, 'r', encoding=encode)
l1=fr.readline()
#print(l1)
if 'General' in l1:
return 'General'
else:
return 'not General'
## Functions for extracting the metadata from WITEC files
[docs]
def calculates_time_witec(*, path, filename):
""" calculates time as seconds after midnight for non video files for WITEC files
"""
# Need to throw out video and peak fit files "general"
line_general=checks_if_general_witec(path=path, filename=filename)
line_video_check=checks_if_video_witec(path=path, filename=filename)
line_scan=checks_if_imagescan_witec(path=path, filename=filename)
# If not a
if line_video_check == "Video":
line3_sec_int=np.nan
line2=np.nan
if line_general == 'General':
line3_sec_int=np.nan
line2=np.nan
if line_scan== "Scan":
line3_sec_int=np.nan
line2=np.nan
# If a real spectra file
if line_video_check == 'not Video' and line_scan == "not Scan": # Had to remove general for berkeley
line=extract_time_stamp_witec(path=path, filename=filename)
line2=line.strip('Start Time:\t')
if 'PM' in line2:
line3=line2.strip(' PM\n')
line3_hr=line3.split(':')[0]
line3_min=re.search(':(.*):', line3).group(1)
line3_sec=re.search(':(.*)', line2).group(1)[3:5]
if 'AM' in line2:
line3=line2.strip(' AM\n')
line3_hr=line3.split(':')[0]
line3_min=re.search(':(.*):', line3).group(1)
line3_sec=re.search(':(.*)', line2).group(1)[3:5]
# If its any pm after 12, you add 12 hours to the time
if line3_hr != '12' and 'PM' in line2:
line3_sec_int=12*60*60+float(line3_hr)*60*60+float(line3_min)*60+float(line3_sec)
elif line3_hr=='12' and 'AM' in line2:
line3_sec_int=float(line3_hr)*60*60+float(line3_min)*60+float(line3_sec)-12*60*60
# If its 12 pm, then you can just do the maths as normal
else:
line3_sec_int=float(line3_hr)*60*60+float(line3_min)*60+float(line3_sec)
return line3_sec_int, line2
## Getting nice names from any file types
# These are largely redundant.
## Stitching together looped and individually fitted spectra
def get_ind_saved_files(*, path, ID_str='ind_fit_', sort=True, file_ext='.csv'):
Allfiles = [f for f in listdir(path) if isfile(join(path, f))]
ind_files=[item for item in Allfiles if ID_str in item and file_ext in item]
if sort is True:
ind_files=sorted(ind_files)
return ind_files
def stitch_loop_individual_fits(*, fit_individually=True,
saved_spectra_path, looped_df,
ID_str='ind_fit_', sort=True, file_ext='.csv'):
df_Dense=looped_df.copy()
ind_files=get_ind_saved_files(path=saved_spectra_path,
sort=sort, ID_str=ID_str, file_ext=file_ext)
if fit_individually:
df_Dense2 = pd.DataFrame([])
for file in ind_files:
data=pd.read_csv(file)
df_Dense2 = pd.concat([df_Dense2, data], axis=0)
df_Dense_loop=df_Dense.reset_index(drop=True)
cols=list(df_Dense_loop.columns)
for file in df_Dense_loop['filename'].unique():
if file in df_Dense2['filename'].unique():
df_Dense2_fill=df_Dense2.loc[df_Dense2['filename']==file]
df_Dense_loop.loc[df_Dense_loop['filename']==file, cols]= df_Dense2_fill[cols].values
else:
df_Dense_Fill=df_Dense_loop.loc[df_Dense_loop['filename']==file]
df_Dense_loop.loc[df_Dense_loop['filename']==file, cols]=df_Dense_Fill[cols]
#df_Dense_loop.loc[df_Dense_loop['filename']==file, 'filename']= file + str(' ind_fit')
df_Dense_Combo=df_Dense_loop.copy()
else:
df_Dense_Combo=df_Dense
return df_Dense_Combo
## Save settings files
[docs]
def save_settings(meta_path, spectra_path, spectra_filetype, prefix, prefix_str, spectra_file_ext, meta_file_ext, TruPower):
""" This function saves settings so you can load them across multiple notebooks without repition
Parameters
-------------------
meta_path: str
Path where your metadata is stored
spectra_path: str
path where your spectra is stored
spectra_filetype: str
Style of data.
Choose from 'Witec_ASCII', 'headless_txt', 'headless_csv', 'head_csv', 'Witec_ASCII', 'HORIBA_txt', 'Renishaw_txt'
spectra_file_ext, meta_file_ext: str
Extension of spectra file and metadatafile. e.g. '.txt', '.csv'
prefix: bool
If True, removes 01, 02, from filename (WITEC problem)
Also need to state prefix_str: prefix separating string (in this case, 01 Ne would be ' '
TruPower: bool
If WITEC instrument and you have TruPower, set as True
Returns
--------------
file called settings.txt with these saved.
"""
filetype_opts = ['Witec_ASCII', 'headless_txt', 'headless_csv', 'head_csv', 'Witec_ASCII', 'HORIBA_txt', 'Renishaw_txt']
if spectra_filetype in filetype_opts:
# Proceed with your logic here
print(f"Good job! Filetype {spectra_filetype} is valid.")
# You can add more logic here if needed
else:
raise TypeError(f"Invalid spectra_filetype: {filetype}. Supported filetypes are {filetype_opts}")
# Get the current folder
folder = os.getcwd()
# Create the settings dictionary
settings = {
'meta_path': meta_path,
'spectra_path': spectra_path,
'spectra_filetype': spectra_filetype,
'prefix': prefix,
'prefix_str': repr(prefix_str),
'spectra_file_ext': spectra_file_ext,
'meta_file_ext': meta_file_ext,
'TruPower': TruPower,
}
# Construct the settings file path
settings_file_path = os.path.join(folder, 'settings.txt')
# Write the settings to the file
with open(settings_file_path, 'w') as file:
for key, value in settings.items():
file.write(f"{key}={value}\n")
[docs]
def get_settings():
""" This function reads the settings file saved in step 1, and loads the options"""
# Get the current folder
folder = os.getcwd()
# Construct the settings file path
settings_file_path = os.path.join(folder, 'settings.txt')
# Read the settings from the file
settings = {}
with open(settings_file_path, 'r') as file:
for line in file:
line = line.strip()
if line:
key, value = line.split('=')
settings[key] = value
if key == 'prefix_str':
value = eval(value) # Evaluate the string to retrieve the original value
settings[key] = value
if 'prefix' in settings:
settings['prefix'] = settings['prefix'].lower() == 'true'
if 'TruPower' in settings:
settings['TruPower'] = settings['TruPower'].lower() == 'true'
# Return the settings
return settings.get('meta_path'), settings.get('spectra_path'), settings.get('spectra_filetype'), \
settings.get('prefix'), settings.get('prefix_str'), settings.get('spectra_file_ext'), settings.get('meta_file_ext'),settings.get('TruPower')
## Give nice column names
## Lets do the look up code.
#
[docs]
def add_column_name_descriptions(df):
""" Adds a new to inputted dataframe, with a description of what the diadfit columns mean underneath
Parameters
-------------------
df: pandas dataframe, including some columns from DiadFit (can have other columns too)
Returns
----------------------
df: A dataframe with a new row, with descriptions for columns matching our reference key.
"""
lookup_key = {
'filename': 'name of file',
'Density g/cm3': 'Density of CO2 in g/cm3',
'σ Density g/cm3': '1 sigma error on density (combined from peak fitting, Ne correction model, and densimeter equation)',
'σ Density g/cm3 (from Ne+peakfit)': '1 sigma error on density (from just peak fitting + Ne correction model)',
'σ Density g/cm3 (from densimeter)': '1 sigma error on density (from just the densimeter equation)',
'Corrected_Splitting': 'Splitting in cm-1 after correcting for instrument drift',
'Corrected_Splitting_σ': '1 sigma error on splitting (combined from peak fitting, Ne correction model)',
'Corrected_Splitting_σ_Ne': '1 sigma error on splitting just from Ne correction model',
'Corrected_Splitting_σ_peak_fit': '1 sigma error on splitting just from peak fitting',
'power (mW)': 'Laser power used in mW measured by WITEC TruPower',
'Spectral Center': 'Spectral Center used for analysis',
'in range': 'Y or N - Is the corrected splitting within the calibration range of the densimeter?',
'Notes': 'Which segment of the densimeter was used (e.g. which of several polynomials)',
'LowD_RT': 'Density calculated using the low density part of the Room Temp densimeter',
'HighD_RT': 'Density calculated using the high density part of the Room Temp densimeter',
'LowD_SC': 'Density calculated using the low density segment of the 37C densimeter',
'LowD_SC_σ': 'Error on density calculated using the low density segment of the 37C densimeter',
'MedD_RC': 'Density calculated using the medium density segment of the 37C densimeter',
'MedD_SC_σ': 'Error on density calculated using the medium density segment of the 37C densimeter',
'HighD_SC': 'Density calculated using the high density segment of the 37C densimeter',
'HighD_SC_σ': 'Error on density calculated using the high density segment of the 37C densimeter',
'Temperature': 'User entered Temp description: SupCrit or RoomT ',
'Splitting': 'Distance between fitted peak centers of Diad 1 and Diad 2 (cm-1)',
'Split_σ': 'Error on splitting',
'Diad1_Combofit_Cent': 'Fitted peak center (cm-1) of Diad1 (combined fit of diad, HB, gaussian background etc. )',
'Diad1_cent_err': 'Error on peak center of Diad1 (cm-1, calculated using lmfit)',
'Diad1_Combofit_Height': 'Height (intensity) of Diad1 combined fit',
'Diad1_Voigt_Cent': 'Fitted peak center (cm-1) of Diad1 for just the main peak',
'Diad1_Voigt_Area': 'Fitted area of Diad1 for just the main peak',
'Diad1_Voigt_Sigma': 'Fitted sigma of Diad1 for just the main peak',
'Diad1_Residual': 'Residual of fit to Diad1 (see DiadFit paper for explanation)',
'Diad1_Prop_Lor': 'Proportion of Lorentzian in Psuedovoigt peak for Diad1',
'Diad1_fwhm': 'Full Width Half Maximum of the fit to Diad1',
'Diad1_refit': 'Notes any warnings that flagged during iterative fitting',
'Diad2_Combofit_Cent': 'Fitted peak center (cm-1) of Diad2 (combined fit of diad, HB, gaussian background etc. )',
'Diad2_cent_err': 'Error on peak center of Diad2 (cm-1, calculated using lmfit)',
'Diad2_Combofit_Height': 'Height (intensity) of Diad2 combined fit',
'Diad2_Voigt_Cent': 'Fitted peak center (cm-1) of Diad2 for just the main peak',
'Diad2_Voigt_Area': 'Fitted area of Diad2 for just the main peak',
'Diad2_Voigt_Sigma': 'Fitted sigma of Diad2 for just the main peak',
'Diad2_Residual': 'Residual of fit to Diad2 (see DiadFit paper for explanation)',
'Diad2_Prop_Lor': 'Proportion of Lorentzian in Psuedovoigt peak for Diad2',
'Diad2_fwhm': 'Full Width Half Maximum of the fit to Diad2',
'Diad2_refit': 'Notes any warnings that flagged during iterative fitting',
'HB1_Cent': 'Fitted peak center of HB1 (cm-1)',
'HB1_Area': 'Fitted area of HB1',
'HB1_Sigma': 'Fitted sigma of HB1',
'HB2_Cent': 'Fitted peak center of HB2 (cm-1)',
'HB2_Area': 'Fitted area of HB2',
'HB2_Sigma': 'Fitted sigma of HB2',
'C13_Cent': 'Fitted peak center of the C13 peak (cm-1)',
'C13_Area': 'Fitted area of the C13 peak',
'C13_Sigma': 'Fitted sigma of the C13 peak',
'Diad2_Gauss_Cent': 'Fitted peak center (cm-1) of the Gaussian background on Diad2 (if used)',
'Diad2_Gauss_Area': 'Fitted area of the Gaussian background on Diad2',
'Diad2_Gauss_Sigma': 'Fitted sigma of the Gaussian backgroun on Diad2',
'Diad1_Gauss_Cent': 'Fitted peak center (cm-1) of the Gaussian background on Diad1 (if used)',
'Diad1_Gauss_Area': 'Fitted area of the Gaussian background on Diad1',
'Diad1_Gauss_Sigma': 'Fitted sigma of the Gaussian backgroun on Diad1',
'Diad1_Asym50': 'Asymmetry of Diad1 using a 50% intensity cut off (see DeVitre et al. 2023, Volcanica)',
'Diad1_Asym70': 'Asymmetry of Diad1 using a 70% intensity cut off (see DeVitre et al. 2023, Volcanica)',
'Diad1_Yuan2017_sym_factor': 'Symmetry factor of Diad1 following Yuan 2017',
'Diad1_Remigi2021_BSF': 'BSF factor of Diad1 following Remigi (2021)',
'Diad2_Asym50': 'Asymmetry of Diad2 using a 50% intensity cut off (see DeVitre et al. 2023, Volcanica)',
'Diad2_Asym70': 'Asymmetry of Diad2 using a 70% intensity cut off (see DeVitre et al. 2023, Volcanica)',
'Diad2_Yuan2017_sym_factor': 'Symmetry factor of Diad2 following Yuan 2017',
'Diad2_Remigi2021_BSF': 'BSF factor of Diad2 following Remigi (2021)',
'Diad1_PDF_Model': 'Name of the probability density function used to fit Diad1',
'Diad2_PDF_Model': 'Name of the probability density function used to fit Diad2',
'Standard': 'Is the analysis a standard (Yes/No)',
'date': 'Full date of analysis',
'Month': 'Month of analysis',
'Day': 'Day of the week of analysis',
'Int_time (s)': 'Integration time of each individual spectra in s',
'accumulations':'How many individual spectra are collected and averaged for a single reported spectra',
'Mag (X)': 'Objective used during analysis',
'duration': 'Duration of analysis as a string from WITEC',
'24hr_time': 'Time converted to a 24 hr clock',
'sec since midnight': 'time of acquisition as seconds after midnight on the day of analysis',
'Peak_Cent_SO2': 'Fitted peak center (cm-1) of the SO2 peak',
'Peak_Area_SO2': 'Fitted peak area (cm-1) of the SO2 peak',
'Peak_Height_SO2': 'Fitted peak height (cm-1) of the SO2 peak',
'Model_name_x': 'Model used to fit the SO2 peak',
'Peak_Cent_Carb': 'Fitted peak center (cm-1) of the Carb peak',
'Peak_Area_Carb': 'Fitted peak area (cm-1) of the Carb peak',
'Peak_Height_Carb': 'Fitted peak height (cm-1) of the Carb peak',
'Model_name_y': 'Model used to fit the SO2 peak',
'Carb_Diad_Ratio': 'Area of carbonate peak divided by sum of area of Diad1 and Diad2 ',
'SO2_Diad_Ratio': 'Area of the SO2 peak divided by sum of area of Diad1 and Diad2',
'SO2_mol_ratio': 'Molar proportion of SO2 in the gas species',
'time': 'seconds after midnight used for Ne correction ',
'preferred_values': 'Preferred value for Ne correction',
'lower_values': 'Preferred value - 1 sigma for Ne correction',
'upper_values': 'Preferred value + 1 sigma for Ne correction',
'SingleCalc_D_km': 'Depth calculated using the preferred (average) value for the input parameters of the MC simulation',
'SingleCalc_P_kbar': 'Pressure calculated using the preferred (average) value for the input parameters of the MC simulation',
'Mean_MC_P_kbar': 'Mean pressure calculated by averaging all the MC simulations for a single FI',
'Med_MC_P_kbar':'Median pressure calculated by averaging all the MC simulations for a single FI',
'std_dev_MC_P_kbar':'Std deviation of pressure calculated from all the MC simulations for a single FI',
'std_dev_MC_P_kbar_from_percentile':'Std deviation of pressure calculated from 84th-16th quantile/2 calculated from all the MC simulations for a single FI',
'Mean_MC_D_km': 'Mean depth calculated by averaging all the MC simulations for a single FI',
'Med_MC_D_km':'Median depth calculated by averaging all the MC simulations for a single FI',
'std_dev_MC_D_km':'Std deviation of depth calculated from all the MC simulations for a single FI',
'std_dev_MC_D_km_from_percentile':'Std deviation of depth calculated from 84th-16th quantile/2 calculated from all the MC simulations for a single FI',
'error_T_K': 'Input error in K for the Monte Carlo simulation',
'CO2_dens_gcm3_input': 'Input CO2 content in g/cm3 for the Monte Carlo simulation',
'error_CO2_dens_gcm3': 'Input CO2 error in g/cm3 for the Monte Carlo simulation',
'crust_dens_kgm3_input': 'Selected crustal density for the Monte Carlo simulation',
'error_crust_dens_kgm3':'Input crustal density error for the Monte Carlo simulation',
'model': 'Selected model to convert pressure to depth in the crust',
'EOS': 'Selected EOS to convert density to pressure'
}
# Create a list of descriptions based on the lookup key
description_row = [lookup_key.get(col, '') for col in df.columns]
# Create a new DataFrame from the description row
description_df = pd.DataFrame([description_row], columns=df.columns)
# Use pd.concat to combine the description row and original DataFrame
df_with_descriptions = pd.concat([description_df, df], ignore_index=True)
# Display the DataFrame with descriptions
df_with_descriptions
return df_with_descriptions