Source code for DiadFit.argon_lines

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import patches
import lmfit
from lmfit.models import GaussianModel, VoigtModel, LinearModel, ConstantModel, PseudoVoigtModel
from scipy.signal import find_peaks
import os
import re
from os import listdir
from os.path import isfile, join
from DiadFit.importing_data_files import *
from typing import Tuple, Optional
from dataclasses import dataclass
import matplotlib.patches as patches
from tqdm.notebook import tqdm
import re
from DiadFit.ne_lines import *



encode="ISO-8859-1"


[docs]
def calculate_Ar_splitting(wavelength=532.05, line1_shift=1117, line2_shift=1447, cut_off_intensity=2000):
    """
    Calculates ideal splitting in air between lines closest to user-specified line shift. E.g. if user enters
    1117 and 1447 line, it looks for the nearest theoretical Ne line position based on your wavelength,
    and calculates the ideal splitting between these theoretical line positions. This is used to calculate the
    ideal Ne line splitting for doing the Ne correction routine of Lamadrid et al. (2017)

    Parameters
    -------------
    Wavelength: int
        Wavelength of your laser
    line1_shift: int, float
        Estimate of position of line 1
    line2_shift: int, float
        Estimate of position of line 2
    cut_off_intensity: int, float
        only searches through lines with a theoretical intensity from NIST grater than this value

    Returns
    ----------
    df of theoretical splitting, line positions used for this, and entered Ne line positions
    """

    df_Ar=calculate_Ar_line_positions(wavelength=wavelength, cut_off_intensity=cut_off_intensity)

    closest1=find_closest(df_Ar, line1_shift).loc['Raman_shift (cm-1)']
    closest2=find_closest(df_Ar, line2_shift).loc['Raman_shift (cm-1)']

    diff=abs(closest1-closest2)

    df=pd.DataFrame(data={'Ar_Split': diff,
    'Line_1': closest1,
    'Line_2': closest2,
    'Entered Pos Line 1': line1_shift,
    'Entered Pos Line 2': line2_shift}, index=[0])


    return df



[docs]
def calculate_Ar_line_positions(wavelength=532.05, cut_off_intensity=2000):
    """
    Calculates Raman shift for a given laser wavelength of Ne lines, using the datatable from NIST of Ne line
    emissoin in air and the intensity of each line.

    Parameters
    ---------------
    Wavelength: float
        Wavelength of laser
    cut_off_intensity: float
        Only chooses lines with intensities greater than this

    Returns
    ------------
    pd.DataFrame
        df wih Raman shift, intensity, and emission line position in air.


    """

    Ar_emission_line_air=np.array([
2420.456,
2516.789,
2534.709,
2562.087,
2891.612,
2942.893,
2979.05,
3033.508,
3093.402,
3243.689,
3293.64,
3307.228,
3350.924,
3376.436,
3388.531,
3476.747,
3478.232,
3491.244,
3491.536,
3509.778,
3514.388,
3545.596,
3545.845,
3559.508,
3561.03,
3576.616,
3581.608,
3582.355,
3588.441,
3622.138,
3639.833,
3718.206,
3729.309,
3737.889,
3765.27,
3766.119,
3770.369,
3770.52,
3780.84,
3803.172,
3809.456,
3850.581,
3868.528,
3925.719,
3928.623,
3932.547,
3946.097,
3948.979,
3979.356,
3994.792,
4013.857,
4033.809,
4035.46,
4042.894,
4044.418,
4052.921,
4072.005,
4072.385,
4076.628,
4079.574,
4082.387,
4103.912,
4131.724,
4156.086,
4158.59,
4164.18,
4179.297,
4181.884,
4190.713,
4191.029,
4198.317,
4200.674,
4218.665,
4222.637,
4226.988,
4228.158,
4237.22,
4251.185,
4259.362,
4266.286,
4266.527,
4272.169,
4277.528,
4282.898,
4300.101,
4300.65,
4309.239,
4331.2,
4332.03,
4333.561,
4335.338,
4345.168,
4348.064,
4352.205,
4362.066,
4367.832,
4370.753,
4371.329,
4375.954,
4379.667,
4385.057,
4400.097,
4400.986,
4426.001,
4430.189,
4430.996,
4433.838,
4439.461,
4448.879,
4474.759,
4481.811,
4510.733,
4522.323,
4530.552,
4545.052,
4564.405,
4579.35,
4589.898,
4609.567,
4637.233,
4657.901,
4721.591,
4726.868,
4732.053,
4735.906,
4764.865,
4806.02,
4847.81,
4865.91,
4879.864,
4889.042,
4904.752,
4933.209,
4965.08,
5009.334,
5017.163,
5062.037,
5090.495,
5141.783,
5145.308,
5165.773,
5187.746,
5216.814,
5495.874,
5558.702,
5606.733,
5650.704,
5739.520,
5888.584,
5912.085,
6032.127,
6043.223,
6059.372,
6114.923,
6172.278,
6243.12,
6384.717,
6416.307,
6483.082,
6638.221,
6639.74,
6643.698,
6666.359,
6677.282,
6684.293,
6752.834,
6861.269,
6871.289,
6937.664,
6965.431,
7030.251,
7067.218,
7068.736,
7107.478,
7125.82,
7147.042,
7206.98,
7272.936,
7311.716,
7316.005,
7353.293,
7372.118,
7380.426,
7383.98,
7392.98,
7435.368,
7503.869,
7514.652,
7635.106,
7723.761,
7724.207,
7948.176,
8006.157,
8014.786,
8103.693,
8115.311,
8264.522,
8392.27,
8408.21,
8424.648,
8521.442,
8667.944,
8771.86,
8849.91,
9075.394,
9122.967,
9194.638,
9224.499,
9291.531,
9354.22,
9657.786,
9784.503,
10052.06,
10332.72,
10467.177,
10470.054,
10506.5,
10673.565,
10683.034,
10733.87,
10759.16,
10812.896,
11106.46,
11488.109,
11668.71,
12112.326,
12139.738,
12343.393,
12402.827,
12439.321,
12456.12,
12487.663,
12702.281,
12733.418,
12802.739,
12933.195,
12956.659,
13008.264,
13213.99,
13228.107,
13230.9,
13272.64,
13313.21,
13367.111,
13499.41,
13504.191,
13599.333,
13622.659,
13678.55,
13718.577,
14093.64,
15046.5,
15172.69,
15989.49,
16519.86,
16940.58,
20616.23,
20986.11,
23133.2,
23966.52,

    ])*0.1


    Intensity=np.array([
    2,
    3,
    3,
    5,
    8,
    70,
    30,
    15,
    15,
    7,
    8,
    7,
    8,
    8,
    8,
    25,
    7,
    15,
    30,
    25,
    25,
    25,
    25,
    30,
    30,
    25,
    8,
    15,
    25,
    8,
    7,
    12,
    25,
    15,
    50,
    15,
    1,
    7,
    8,
    8,
    15,
    25,
    12,
    12,
    15,
    8,
    25,
    1,
    7,
    12,
    15,
    15,
    7,
    50,
    1,
    30,
    70,
    25,
    8,
    12,
    8,
    50,
    100,
    12,
    11,
    1,
    12,
    1,
    3,
    1,
    6,
    11,
    8,
    8,
    8,
    30,
    30,
    1,
    6,
    3,
    25,
    4,
    200,
    7,
    3,
    8,
    25,
    70,
    15,
    3,
    1,
    1,
    250,
    15,
    8,
    15,
    70,
    25,
    15,
    50,
    15,
    25,
    70,
    130,
    50,
    15,
    15,
    7,
    12,
    30,
    70,
    3,
    1,
    7,
    130,
    7,
    130,
    130,
    200,
    12,
    130,
    7,
    200,
    15,
    100,
    250,
    200,
    50,
    15,
    250,
    25,
    7,
    12,
    70,
    15,
    25,
    25,
    7,
    30,
    25,
    8,
    1,
    7,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    2,
    1,
    1,
    30,
    50,
    8,
    1,
    2,
    8,
    8,
    7,
    15,
    8,
    3,
    12,
    4,
    7,
    4,
    1,
    300,
    4,
    300,
    3,
    1,
    1,
    30,
    2,
    60,
    1,
    1,
    2,
    6,
    7,
    300,
    1,
    1,
    600,
    400,
    700,
    400,
    300,
    600,
    600,
    700,
    600,
    1000,
    300,
    1,
    400,
    600,
    400,
    130,
    7,
    5,
    1,
    1000,
    15,
    400,
    11,
    50,
    700,
    130,
    5,
    1,
    30,
    50,
    5,
    6,
    2,
    1,
    1,
    2,
    1,
    11,
    6,
    6,
    1,
    1,
    6,
    6,
    3,
    6,
    4,
    1,
    6,
    1,
    14,
    6,
    6,
    6,
    3,
    14,
    30,
    30,
    1,
    30,
    1,
    11,
    6,
    30,
    6,
    3,
    1,
    1,
    1,
    14,
    1,
    1,
    1,
    1,


    ])


    Raman_shift=(10**7/wavelength)-(10**7/Ar_emission_line_air)

    df_Ar=pd.DataFrame(data={'Raman_shift (cm-1)': Raman_shift,
                            'Intensity': Intensity,
                            'Ar emission line in air': Ar_emission_line_air})

    df_Ar_r=df_Ar.loc[df_Ar['Intensity']>cut_off_intensity]

    df_Ar_r=df_Ar_r.loc[df_Ar['Raman_shift (cm-1)']>0]
    return df_Ar_r



## Function that is basically just the Ne function but with some renaming.


def loop_Ar_lines(*, files, spectra_path, filetype, config_ID_peaks, config, df_fit_params,  prefix, plot_figure=True, print_df=False, const_params=True):
    # Call the Neon line processing function
    df_ne = loop_Ne_lines(
        files=files,
        spectra_path=spectra_path,
        filetype=filetype,
        config_ID_peaks=config_ID_peaks,
        config=config,
        df_fit_params=df_fit_params,
        prefix=prefix,
        plot_figure=plot_figure,
        print_df=False,
        const_params=const_params
    )



    # Rename columns by replacing 'Ne' with 'Ar'
    df_ar = df_ne.rename(columns=lambda col: col.replace('Ne', 'Ar'))

    return df_ar


def Argon_id_config(*, height, distance, prominence, width, threshold,
                    peak1_cent, peak2_cent, n_peaks,
                    exclude_range_1=None, exclude_range_2=None):
    return Neon_id_config(
        height=height,
        distance=distance,
        prominence=prominence,
        width=width,
        threshold=threshold,
        peak1_cent=peak1_cent,
        peak2_cent=peak2_cent,
        n_peaks=n_peaks,
        exclude_range_1=exclude_range_1,
        exclude_range_2=exclude_range_2
    )

def identify_Ar_lines(*, path, filename, filetype, config, print_df=False):
    return identify_Ne_lines(
        path=path,
        filename=filename,
        filetype=filetype,
        config=config,
        print_df=print_df
    )

def Ar_peak_config(**kwargs):
    return Ne_peak_config(**kwargs)


def fit_Ar_lines(*, Ar, filename, path, prefix=False, config,
                 Ar_center_1, Ar_center_2,
                 Ar_prom_1, Ar_prom_2,
                 const_params=False):
    # Call the Neon version internally
    df_ne = fit_Ne_lines(
        Ne=Ar,
        filename=filename,
        path=path,
        prefix=prefix,
        config=config,
        Ne_center_1=Ar_center_1,
        Ne_center_2=Ar_center_2,
        Ne_prom_1=Ar_prom_1,
        Ne_prom_2=Ar_prom_2,
        const_params=const_params
    )

    # Rename output columns from 'Ne' to 'Ar'
    df_ar = df_ne.rename(columns=lambda col: col.replace('Ne', 'Ar'))

    return df_ar


import matplotlib.pyplot as plt


[docs]
def plot_Ar_corrections(df=None, x_axis=None, x_label='index', marker='o', mec='k', mfc='r'):
    """
    Plot correction-related information for Ar spectra.
    Assumes column names have 'Ar_Corr', '1σ_Ar_Corr', etc.
    """
    if x_axis is not None:
        x = x_axis
    else:
        x = df.index

    fig, ((ax5, ax6),  (ax3, ax4), (ax1, ax2)) = plt.subplots(3, 2, figsize=(10, 12))

    # Peak 1
    ax5.errorbar(x, df['pk1_peak_cent'], xerr=0, yerr=df['error_pk1'].fillna(0).infer_objects(),
                 fmt='o', ecolor='k', elinewidth=0.8, mfc='b', ms=5, mec='k', capsize=3)
    ax5.set_xlabel(x_label)
    ax5.set_ylabel('Peak 1 center')

    # Peak 2
    ax6.plot(x, df['pk2_peak_cent'], marker, mec=mec, mfc=mfc)
    ax6.errorbar(x, df['pk2_peak_cent'], xerr=0, yerr=df['error_pk2'].fillna(0).infer_objects(),
                 fmt='o', ecolor='k', elinewidth=0.8, mfc=mfc, ms=5, mec=mec, capsize=3)
    ax6.set_xlabel(x_label)
    ax6.set_ylabel('Peak 2 center')

    # Correction vs. Peak 2
    ax3.errorbar(df['Ar_Corr'], df['pk2_peak_cent'],
                 xerr=df['1σ_Ar_Corr'].fillna(0).infer_objects(),
                 yerr=df['error_pk2'].fillna(0).infer_objects(),
                 fmt='o', ecolor='k', elinewidth=0.8, mfc='b', ms=5, mec='k', capsize=3)
    ax3.set_xlabel('Ar Correction factor')
    ax3.set_ylabel('Peak 2 center')

    # Correction vs. Peak 1
    ax4.errorbar(df['Ar_Corr'], df['pk1_peak_cent'],
                 xerr=df['1σ_Ar_Corr'].fillna(0).infer_objects(),
                 yerr=df['error_pk1'].fillna(0).infer_objects(),
                 fmt='o', ecolor='k', elinewidth=0.8, mfc='b', ms=5, mec='k', capsize=3)
    ax4.set_xlabel('Ar Correction factor')
    ax4.set_ylabel('Peak 1 center')

    # Ar Correction vs. x
    ax1.errorbar(x, df['Ar_Corr'], xerr=0, yerr=df['1σ_Ar_Corr'].fillna(0).infer_objects(),
                 fmt='o', ecolor='k', elinewidth=0.8, mfc='grey', ms=5, mec='k', capsize=3)
    ax1.set_ylabel('Ar Correction factor')
    ax1.set_xlabel(x_label)

    # Ar Correction vs. residuals (placeholder logic — update as needed)
    if 'residual_sum' in df.columns:
        ax2.errorbar(df['residual_sum'], df['Ar_Corr'],
                     xerr=0, yerr=df['1σ_Ar_Corr'].fillna(0).infer_objects(),
                     fmt='o', ecolor='k', elinewidth=0.8, mfc='grey', ms=5, mec='k', capsize=3)

    ax2.set_xlabel('Sum of pk1 and pk2 residual')
    ax2.set_ylabel('Ar Correction factor')

    for ax in [ax1, ax2, ax3, ax4, ax5, ax6]:
        ax.ticklabel_format(useOffset=False)

    plt.setp(ax3.get_xticklabels(), rotation=30, ha='right')
    plt.setp(ax4.get_xticklabels(), rotation=30, ha='right')

    fig.tight_layout()
    return fig




[docs]
def filter_Ar_Line_neighbours(*, df_combo=None, Corr_factor=None, number_av=6, offset=0.00005, file_name_filt=None):
    """
    Filters Ar correction factors that deviate by more than `offset` from the
    local median of neighboring values (defined by `number_av`).
    Optionally excludes specified filenames via `file_name_filt`.
    """
    if df_combo is not None:
        Corr_factor = df_combo['Ar_Corr']

    Corr_factor_Filt = np.zeros(len(Corr_factor), dtype=float)
    median_loop = np.zeros(len(Corr_factor), dtype=float)

    for i in range(len(Corr_factor)):
        if i < len(Corr_factor) / 2:
            median_loop[i] = np.nanmedian(Corr_factor[i:i+number_av])
        else:
            median_loop[i] = np.nanmedian(Corr_factor[i-number_av:i])

        if (
            Corr_factor[i] > (median_loop[i] + offset)
            or Corr_factor[i] < (median_loop[i] - offset)
        ):
            Corr_factor_Filt[i] = np.nan
        else:
            Corr_factor_Filt[i] = Corr_factor[i]

    ds = pd.Series(Corr_factor_Filt)

    if file_name_filt is not None:
        pattern = '|'.join(file_name_filt)
        mask = df_combo['filename_x'].str.contains(pattern)
        ds = ds.where(~mask, np.nan)

    return ds




[docs]
def generate_Ar_corr_model(*, time, Ar_corr, N_poly=3, CI=0.67, bootstrap=False,
                           std_error=True, N_bootstrap=500,
                           save_fig=False, pkl_name='polyfit_data_Ar.pkl'):
    """Generates a polynomial correction model for Ar correction data."""

    x_all = np.array([time])

    if isinstance(Ar_corr, pd.DataFrame):
        y_all = np.array([Ar_corr['Ar_Corr']])
        y_err = Ar_corr['1σ_Ar_Corr']
    else:
        y_all = Ar_corr
        y_err = 0 * Ar_corr

    non_nan_indices = ~np.isnan(x_all) & ~np.isnan(y_all)
    x = x_all[non_nan_indices]
    y = y_all[non_nan_indices]

    coefficients = np.polyfit(x, y, N_poly)
    Pf = np.poly1d(coefficients)

    data = {'model': Pf, 'x': x, 'y': y}
    with open(pkl_name, 'wb') as f:
        pickle.dump(data, f)

    new_x_plot = np.linspace(np.min(x), np.max(x), 100)

    if bootstrap:
        Ar_corr2 = calculate_Ar_corr_bootstrap_values(
            pickle_str=pkl_name,
            new_x=pd.Series(new_x_plot),
            N_poly=N_poly,
            CI=CI,
            N_bootstrap=N_bootstrap
        )
    elif std_error:
        Ar_corr2 = calculate_Ar_corr_std_err_values(
            pickle_str=pkl_name,
            new_x=pd.Series(new_x_plot),
            CI=CI
        )

    fig, ax1 = plt.subplots(1, 1, figsize=(10, 5))
    ax1.errorbar(x, y, xerr=0, yerr=y_err, fmt='o', ecolor='k',
                 elinewidth=0.8, mfc='grey', ms=5, mec='k', capsize=3)

    ax1.plot(new_x_plot, Ar_corr2['preferred_values'], '-k', label='best fit')
    ax1.plot(new_x_plot, Ar_corr2['lower_values'], ':k', label='lower bound')
    ax1.plot(new_x_plot, Ar_corr2['upper_values'], ':k', label='upper bound')
    ax1.plot(x, y, '+r', label='Ar lines')

    ax1.set_xlabel('sec after midnight')
    ax1.set_ylabel('Ar Corr factor')
    ax1.ticklabel_format(useOffset=False)
    ax1.legend()

    suffix = {1: 'st', 2: 'nd', 3: 'rd'}.get(N_poly, 'th')
    ax1.set_title(f"{N_poly}$^{{{suffix}}}$ degree polynomial: {int(CI * 100)}% prediction interval")

    if save_fig:
        fig.savefig('Ar_line_correction.png')

    return Pf, fig



def calculate_Ar_corr_std_err_values(*, pickle_str, new_x, CI=0.67):
    with open(pickle_str, 'rb') as f:
        data = pickle.load(f)

    model = data['model']
    N_poly = model.order - 1
    x = data['x']
    y = data['y']

    new_x_array = np.asarray(new_x)
    residuals = y - model(x)
    residual_std = np.std(residuals)

    mean_x = np.mean(x)
    n = len(x)
    standard_errors = residual_std * np.sqrt(1 + 1/n + (new_x_array - mean_x)**2 / np.sum((x - mean_x)**2))

    df_dof = len(x) - (N_poly + 1)
    t_value = t.ppf((1 + CI) / 2, df_dof)

    preferred_values = model(new_x_array)
    lower_values = preferred_values - t_value * standard_errors
    upper_values = preferred_values + t_value * standard_errors

    return pd.DataFrame({
        'time': new_x_array,
        'preferred_values': preferred_values,
        'lower_values': lower_values,
        'upper_values': upper_values
    })


def calculate_Ar_corr_bootstrap_values(*, pickle_str, new_x, N_poly=3, CI=0.67, N_bootstrap=500):
    with open(pickle_str, 'rb') as f:
        data = pickle.load(f)

    Pf = data['model']
    x = data['x']
    y = data['y']

    x_values = new_x
    preferred_values = []
    lower_values = []
    upper_values = []

    for new_x in x_values:
        bootstrap_predictions = []
        for _ in range(N_bootstrap):
            bootstrap_indices = np.random.choice(len(x), size=len(x), replace=True)
            bootstrap_x = x[bootstrap_indices]
            bootstrap_y = y[bootstrap_indices]
            bootstrap_coefficients = np.polyfit(bootstrap_x, bootstrap_y, N_poly)
            bootstrap_Pf = np.poly1d(bootstrap_coefficients)
            bootstrap_predictions.append(bootstrap_Pf(new_x))

        bootstrap_predictions_sorted = np.sort(bootstrap_predictions)
        lower_idx = int(((1 - CI) / 2) * N_bootstrap)
        upper_idx = int((1 - (1 - CI) / 2) * N_bootstrap)

        preferred_values.append(Pf(new_x))
        lower_values.append(bootstrap_predictions_sorted[lower_idx])
        upper_values.append(bootstrap_predictions_sorted[upper_idx])

    return pd.DataFrame({
        'time': x_values,
        'preferred_values': preferred_values,
        'lower_values': lower_values,
        'upper_values': upper_values
    })