Evaluating Imputation Against Ground Truth Fold Change
%load_ext autoreload
%autoreload 2
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
import seaborn as sns
from pyproteonet.simulation import molecule_set_from_degree_distribution, simulate_protein_peptide_dataset, simulate_mcars, simulate_mnars_thresholding
from pyproteonet.aggregation import maxlfq
from pyproteonet.processing import logarithmize
Simulating a Dataset
# We define some degree distributions of protein and peptide nodes roughly assembling those of a real world dataset
protein_deg_distribution = [0, 0.1445, 0.1221, 0.1151, 0.0933, 0.0692, 0.0655, 0.0508, 0.0472, 0.0362, 0.0311, 0.0277, 0.0209, 0.0199, 0.0163, 0.0143,
0.012, 0.0105, 0.0093, 0.0087, 0.0081, 0.0063, 0.0063, 0.0055, 0.0054, 0.0043, 0.0043, 0.0042, 0.0039, 0.0037, 0.0034,
0.0031, 0.0022, 0.0021, 0.0019, 0.0019, 0.0019, 0.0015, 0.0012, 0.001, 0.001]
peptide_deg_distribution = [0, 0.9591, 0.0341, 0.0046, 0.0014]
First, we create a set of proteins with related peptides. Next, we simulate abundance values for those peptides. During the simulation we also simulate a fold change of 30% of all proteins. This is done by simulating a single condition group affecting 30% of all proteins inthe last five samples.
# We aim for a simulated dataset with 100 proteins and 10 samples
num_proteins = 100
num_samples = 10
# We use a simple heuristic to determine the number of peptides for the given number of proteins while still closely matching the degree distributions
protein_degs = np.round(num_proteins * np.array(protein_deg_distribution))
prot_edges = np.sum(np.arange(len(protein_deg_distribution)) * protein_degs)
num_peptides = 1
pep_edges = 0
while pep_edges < prot_edges:
num_peptides += 1
peptide_degs = np.round(num_peptides * np.array(peptide_deg_distribution))
pep_edges = np.sum(np.arange(len(peptide_deg_distribution)) * peptide_degs)
if pep_edges > prot_edges:
diff = pep_edges - prot_edges
for i in range(len(peptide_degs)-1, -1, -1):
if peptide_degs[i] > 0 and i <= diff:
peptide_degs[i] -= 1
diff -= i
if diff == 0:
break
# Create a protein peptide molecule set for the given number of proteins/peptides and degree distribution
ms = molecule_set_from_degree_distribution(molecule1_name='protein', molecule2_name='peptide', mapping_name='peptide-protein',
molecule1_degree_distribution=protein_degs, molecule2_degree_distribution=peptide_degs)
# Lets simulate some abundance values for the given molecule set
condition_proteins = ms.molecules['protein'].sample(frac=0.3).index
ds = simulate_protein_peptide_dataset(molecule_set=ms, mapping='peptide-protein', samples=num_samples,
log_abundance_mu=15.9, log_abundance_sigma=1.8,
log_protein_error_sigma=0.3, peptide_noise_sigma= 115005.3,
flyability_alpha=0.7, flyability_beta=2.1, simulate_flyability=True,
condition_samples=[[f'sample{s}' for s in (range(5,10))]], condition_affected=[condition_proteins],
log2_condition_means=[2.0], log2_condition_stds=[0.66])
Finally, we incorporate some missing values (MNARs and MCARs)
simulate_mnars_thresholding(dataset=ds, thresh_mu=115005.3 / 2, thresh_sigma=115005.3 / 4, molecule='peptide', column='abundance',
result_column='abundance_missing', mask_column='is_mnar', inplace=True)
simulate_mcars(dataset=ds, amount=0.3, molecule='peptide', column='abundance', result_column='abundance_missing', mask_column='is_mcar', inplace=True)
<pyproteonet.data.dataset.Dataset at 0x7f98331499f0>
#We look at the number of MNAR's and MCAR's
df = ds.values['peptide'].df
df.is_mnar.sum() / df.shape[0], df.is_mcar.sum() / df.shape[0]
(0.025, 0.2996688741721854)
In the end all abundance/aggregated values are logarithmized as it is commonly done in proteomics because logarithmized values are more normally distributed.
ds = logarithmize(data=ds, columns=['abundance', 'abundance_gt', 'abundance_missing'])
MaxLFQ aggregation
ds.values['protein']['aggregated'] = maxlfq(dataset=ds, molecule='protein', mapping='peptide-protein', partner_column='abundance_missing',
min_ratios=2, median_fallback=False, is_log=True)
Now the ‘aggregated’ value column holds the aggregated values and the ‘abundance_gt’ value column which was written during the simulation holds the ground truth values
ds.values['protein'].df
| abundance_gt | aggregated | ||
|---|---|---|---|
| sample | id | ||
| sample0 | 0 | 17.169365 | NaN |
| 1 | 20.560688 | NaN | |
| 2 | 15.000974 | NaN | |
| 3 | 14.404043 | NaN | |
| 4 | 15.042509 | NaN | |
| ... | ... | ... | ... |
| sample9 | 92 | 16.595688 | 15.078889 |
| 93 | 18.847798 | 17.029333 | |
| 94 | 19.827979 | 17.730384 | |
| 95 | 13.603674 | 12.169696 | |
| 96 | 17.982749 | 16.312612 |
970 rows × 2 columns
Plotting the aggregated abundance ratios we see two clusters corresponding to the proteins affected by the condition and the unaffected ones.
from pyproteonet.visualization import plot_ratio_scatter
categories = pd.Series(data='no fold change', index=ds.molecules['protein'].index)
categories[condition_proteins] = 'fold change'
a,b = plot_ratio_scatter(dataset=ds, molecule='protein', columns=['aggregated'],
numerator_samples=[f'sample{s}' for s in (range(5,10))], denominator_samples=[f'sample{s}' for s in (range(5))],
plot_density=False, is_log=True, alpha=0.5, categories=categories)
Missing Value Imputation
Pyproteonet provides a wide range of established imputation functions combining both native python implementations and wrappers around R packages for imputation functions where no Python implementation is available yet.
Here we use the high level api to impute on both protein and peptide level using a bunch of different imputation functions.
from pyproteonet.imputation import impute_molecule
imputation_methods = ["minprob", "mindet", "bpca", "missforest", "knn", "isvd", "dae"]
impute_molecule(dataset=ds, molecule='protein', column='aggregated', methods=imputation_methods)
impute_molecule(dataset=ds, molecule='peptide', column='abundance_missing', methods=imputation_methods)
Show code cell output
R[write to console]: Loading required package: tmvtnorm
R[write to console]: Loading required package: mvtnorm
R[write to console]: Loading required package: Matrix
R[write to console]: Loading required package: stats4
R[write to console]: Loading required package: gmm
R[write to console]: Loading required package: sandwich
R[write to console]: Loading required package: norm
R[write to console]: This package has some major limitations
(for example, it does not work reliably when
the number of variables exceeds 30),
and has been superseded by the norm2 package.
R[write to console]: Loading required package: pcaMethods
R[write to console]: Loading required package: Biobase
R[write to console]: Loading required package: BiocGenerics
R[write to console]:
Attaching package: ‘BiocGenerics’
R[write to console]: The following objects are masked from ‘package:stats’:
IQR, mad, sd, var, xtabs
R[write to console]: The following objects are masked from ‘package:base’:
anyDuplicated, aperm, append, as.data.frame, basename, cbind,
colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
table, tapply, union, unique, unsplit, which.max, which.min
R[write to console]: Welcome to Bioconductor
Vignettes contain introductory material; view with
'browseVignettes()'. To cite Bioconductor, see
'citation("Biobase")', and for packages 'citation("pkgname")'.
R[write to console]:
Attaching package: ‘pcaMethods’
R[write to console]: The following object is masked from ‘package:stats’:
loadings
R[write to console]: Loading required package: impute
R[write to console]: In addition:
R[write to console]: There were 11 warnings (use warnings() to see them)
R[write to console]:
minprob minprob
[1] 0.3002219
mindet mindet
bpca bpca
missforest missforest
Iteration: 0
Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Iteration: 6
Iteration: 7
Iteration: 8
Iteration: 9
knn knn
isvd isvd
[IterativeSVD] Iter 1: observed MAE=1.029508
[IterativeSVD] Iter 2: observed MAE=0.361089
[IterativeSVD] Iter 3: observed MAE=0.277206
[IterativeSVD] Iter 4: observed MAE=0.251593
[IterativeSVD] Iter 5: observed MAE=0.239997
[IterativeSVD] Iter 6: observed MAE=0.233418
[IterativeSVD] Iter 7: observed MAE=0.228806
[IterativeSVD] Iter 8: observed MAE=0.225354
[IterativeSVD] Iter 9: observed MAE=0.222133
[IterativeSVD] Iter 10: observed MAE=0.219068
[IterativeSVD] Iter 11: observed MAE=0.216221
[IterativeSVD] Iter 12: observed MAE=0.213613
[IterativeSVD] Iter 13: observed MAE=0.211435
[IterativeSVD] Iter 14: observed MAE=0.209869
[IterativeSVD] Iter 15: observed MAE=0.208677
[IterativeSVD] Iter 16: observed MAE=0.207568
[IterativeSVD] Iter 17: observed MAE=0.206538
[IterativeSVD] Iter 18: observed MAE=0.205668
[IterativeSVD] Iter 19: observed MAE=0.204957
13.719917530273548
dae dae
| epoch | train_loss | valid_loss | time |
|---|---|---|---|
| 0 | 771.526489 | 92.160431 | 00:00 |
| 1 | 772.366943 | 92.353256 | 00:00 |
| 2 | 769.833069 | 92.375992 | 00:00 |
| 3 | 768.604248 | 92.269379 | 00:00 |
| 4 | 758.263245 | 91.880707 | 00:00 |
| 5 | 747.080200 | 91.415390 | 00:00 |
| 6 | 728.280090 | 90.840988 | 00:00 |
| 7 | 712.120911 | 90.002388 | 00:00 |
| 8 | 691.932617 | 88.560638 | 00:00 |
| 9 | 668.623352 | 86.934418 | 00:00 |
| 10 | 645.239624 | 84.854103 | 00:00 |
| 11 | 617.785217 | 82.413429 | 00:00 |
| 12 | 591.351379 | 79.926804 | 00:00 |
| 13 | 565.105164 | 76.967102 | 00:00 |
| 14 | 540.104065 | 74.121017 | 00:00 |
| 15 | 514.766785 | 71.036179 | 00:00 |
| 16 | 491.550842 | 67.346024 | 00:00 |
| 17 | 468.812012 | 63.549061 | 00:00 |
| 18 | 449.108002 | 59.731007 | 00:00 |
| 19 | 430.298523 | 55.514488 | 00:00 |
| 20 | 411.570648 | 52.218212 | 00:00 |
| 21 | 395.587616 | 49.133953 | 00:00 |
| 22 | 380.467834 | 45.290352 | 00:00 |
| 23 | 366.138885 | 41.591652 | 00:00 |
| 24 | 352.394226 | 38.126877 | 00:00 |
| 25 | 340.403351 | 35.215355 | 00:00 |
| 26 | 329.015961 | 31.941929 | 00:00 |
| 27 | 318.154938 | 29.208054 | 00:00 |
| 28 | 307.556396 | 27.451818 | 00:00 |
| 29 | 297.848755 | 25.800251 | 00:00 |
| 30 | 288.697937 | 24.675629 | 00:00 |
| 31 | 280.368744 | 22.831230 | 00:00 |
| 32 | 271.839752 | 21.028332 | 00:00 |
| 33 | 263.920349 | 20.149044 | 00:00 |
| 34 | 256.748627 | 19.689053 | 00:00 |
| 35 | 250.305237 | 19.224886 | 00:00 |
| 36 | 243.825073 | 18.174835 | 00:00 |
| 37 | 237.186234 | 17.868359 | 00:00 |
| 38 | 231.362656 | 17.417484 | 00:00 |
| 39 | 226.324219 | 17.180439 | 00:00 |
| 40 | 220.601761 | 16.944290 | 00:00 |
| 41 | 215.210754 | 16.577560 | 00:00 |
| 42 | 210.437592 | 16.556936 | 00:00 |
| 43 | 205.902206 | 16.823841 | 00:00 |
| 44 | 201.798828 | 15.859558 | 00:00 |
| 45 | 197.523987 | 15.849012 | 00:00 |
| 46 | 193.360168 | 15.454900 | 00:00 |
| 47 | 189.400604 | 14.698169 | 00:00 |
| 48 | 185.586548 | 14.064270 | 00:00 |
| 49 | 182.094086 | 14.232521 | 00:00 |
minprob minprob
[1] 0.3743327
mindet mindet
bpca bpca
missforest missforest
Iteration: 0
Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Iteration: 6
Iteration: 7
knn knn
isvd isvd
[IterativeSVD] Iter 1: observed MAE=3.794830
[IterativeSVD] Iter 2: observed MAE=1.191989
[IterativeSVD] Iter 3: observed MAE=0.644522
[IterativeSVD] Iter 4: observed MAE=0.479546
[IterativeSVD] Iter 5: observed MAE=0.414731
[IterativeSVD] Iter 6: observed MAE=0.382314
[IterativeSVD] Iter 7: observed MAE=0.361593
[IterativeSVD] Iter 8: observed MAE=0.345830
[IterativeSVD] Iter 9: observed MAE=0.332967
[IterativeSVD] Iter 10: observed MAE=0.321903
[IterativeSVD] Iter 11: observed MAE=0.313058
[IterativeSVD] Iter 12: observed MAE=0.305612
[IterativeSVD] Iter 13: observed MAE=0.298658
[IterativeSVD] Iter 14: observed MAE=0.291231
[IterativeSVD] Iter 15: observed MAE=0.281993
[IterativeSVD] Iter 16: observed MAE=0.270293
[IterativeSVD] Iter 17: observed MAE=0.257632
[IterativeSVD] Iter 18: observed MAE=0.246249
[IterativeSVD] Iter 19: observed MAE=0.237744
[IterativeSVD] Iter 20: observed MAE=0.232672
[IterativeSVD] Iter 21: observed MAE=0.229847
[IterativeSVD] Iter 22: observed MAE=0.228363
14.489411675754466
dae dae
| epoch | train_loss | valid_loss | time |
|---|---|---|---|
| 0 | 4583.745117 | 426.797913 | 00:00 |
| 1 | 4558.276367 | 426.060699 | 00:00 |
| 2 | 4545.520020 | 424.628357 | 00:00 |
| 3 | 4505.635742 | 422.432526 | 00:00 |
| 4 | 4463.166016 | 419.978271 | 00:00 |
| 5 | 4402.731445 | 417.171478 | 00:00 |
| 6 | 4331.463379 | 413.485657 | 00:00 |
| 7 | 4241.655273 | 409.318634 | 00:00 |
| 8 | 4149.645020 | 403.610596 | 00:00 |
| 9 | 4035.374268 | 396.451233 | 00:00 |
| 10 | 3915.376221 | 387.736786 | 00:00 |
| 11 | 3784.795654 | 377.046326 | 00:00 |
| 12 | 3641.189941 | 365.024231 | 00:00 |
| 13 | 3500.675293 | 351.603699 | 00:00 |
| 14 | 3357.156250 | 337.225555 | 00:00 |
| 15 | 3215.336426 | 320.989746 | 00:00 |
| 16 | 3081.312744 | 304.547363 | 00:00 |
| 17 | 2951.336426 | 288.108887 | 00:00 |
| 18 | 2829.898438 | 271.372681 | 00:00 |
| 19 | 2714.050049 | 253.985641 | 00:00 |
| 20 | 2605.306152 | 239.081329 | 00:00 |
| 21 | 2500.918945 | 223.682159 | 00:00 |
| 22 | 2405.522217 | 207.702240 | 00:00 |
| 23 | 2315.056396 | 191.600220 | 00:00 |
| 24 | 2231.274902 | 176.463211 | 00:00 |
| 25 | 2152.593506 | 161.810226 | 00:00 |
| 26 | 2080.827637 | 146.981323 | 00:00 |
| 27 | 2011.464722 | 135.124786 | 00:00 |
| 28 | 1945.286255 | 122.995033 | 00:00 |
| 29 | 1883.757935 | 114.385330 | 00:00 |
| 30 | 1825.023438 | 106.605972 | 00:00 |
| 31 | 1769.324829 | 99.771904 | 00:00 |
| 32 | 1716.415161 | 94.857063 | 00:00 |
| 33 | 1669.574707 | 88.853897 | 00:00 |
| 34 | 1622.764526 | 82.341179 | 00:00 |
| 35 | 1578.125977 | 76.525879 | 00:00 |
| 36 | 1534.842773 | 72.577148 | 00:00 |
| 37 | 1494.982544 | 68.411743 | 00:00 |
| 38 | 1457.871216 | 64.500877 | 00:00 |
| 39 | 1422.914795 | 62.013630 | 00:00 |
| 40 | 1388.780029 | 60.291847 | 00:00 |
| 41 | 1357.178955 | 58.403954 | 00:00 |
| 42 | 1327.356812 | 56.958199 | 00:00 |
| 43 | 1297.922363 | 56.064789 | 00:00 |
| 44 | 1269.961914 | 54.674122 | 00:00 |
| 45 | 1242.876831 | 54.273338 | 00:00 |
| 46 | 1217.293823 | 54.264595 | 00:00 |
| 47 | 1192.430908 | 53.302429 | 00:00 |
| 48 | 1169.106323 | 52.484703 | 00:00 |
| 49 | 1146.657837 | 52.133827 | 00:00 |
Looking at the result we can see that the missing values are gone:
ds.values['peptide'].df
| abundance | abundance_gt | abundance_missing | is_mnar | is_mcar | minprob | mindet | bpca | missforest | knn | isvd | dae | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| sample | id | ||||||||||||
| sample0 | 0 | 15.651 | 17.169 | NaN | False | True | 11.000 | 10.565 | 16.101 | 16.062 | 16.047 | 16.135 | 15.313 |
| 1 | 20.386 | 20.561 | 20.386 | False | False | 20.386 | 20.386 | 20.386 | 20.386 | 20.386 | 20.386 | 20.386 | |
| 2 | 14.708 | 15.001 | 14.708 | False | False | 14.708 | 14.708 | 14.708 | 14.708 | 14.708 | 14.708 | 14.708 | |
| 3 | 14.319 | 14.404 | 14.319 | False | False | 14.319 | 14.319 | 14.319 | 14.319 | 14.319 | 14.319 | 14.319 | |
| 4 | 12.920 | 15.043 | 12.920 | False | False | 12.920 | 12.920 | 12.920 | 12.920 | 12.920 | 12.920 | 12.920 | |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| sample9 | 599 | 14.434 | 18.261 | 14.434 | False | False | 14.434 | 14.434 | 14.434 | 14.434 | 14.434 | 14.434 | 14.434 |
| 600 | 16.882 | 19.213 | NaN | False | True | 9.549 | 9.778 | 17.072 | 16.992 | 17.279 | 17.056 | 16.109 | |
| 601 | 16.952 | 19.500 | NaN | False | True | 8.785 | 9.778 | 17.070 | 17.281 | 17.256 | 17.073 | 17.250 | |
| 602 | 14.137 | 18.043 | 14.137 | False | False | 14.137 | 14.137 | 14.137 | 14.137 | 14.137 | 14.137 | 14.137 | |
| 603 | 17.885 | 18.359 | 17.885 | False | False | 17.885 | 17.885 | 17.885 | 17.885 | 17.885 | 17.885 | 17.885 |
6040 rows × 12 columns
Graph Neural Network Imputation
from pyproteonet.imputation.dnn.gnn import impute_heterogeneous_gnn
_ = impute_heterogeneous_gnn(dataset=ds, molecule='protein', column='aggregated', mapping='peptide-protein', partner_column='abundance_missing',
molecule_result_column=f'gnn_hetero', partner_result_column=f'gnn_hetero',
max_epochs=1000, early_stopping_patience=7, epoch_size=30, training_fraction=0.25, log_every_n_steps=30)
Show code cell output
seed: 611519400
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
| Name | Type | Params
------------------------------------------------------
0 | embedding | Embedding | 485
1 | molecule_fc_model | Sequential | 11.0 K
2 | partner_fc_model | Sequential | 11.4 K
3 | molecule_gat | HeteroGraphConv | 34.4 K
4 | partner_gat | HeteroGraphConv | 50.4 K
5 | molecule_gat2 | HeteroGraphConv | 66.4 K
6 | molecule_linear | Linear | 820
7 | partner_linear | Linear | 1.2 K
8 | loss_fn | GaussianNLLLoss | 0
------------------------------------------------------
176 K Trainable params
0 Non-trainable params
176 K Total params
0.705 Total estimated model params size (MB)
step29: num_masked_molecule:726.0 || num_masked_partner:1100.066650390625 || molecule_loss:0.4965510070323944 || partner_loss:0.4411449432373047 || train_loss:0.9376960396766663 || epoch:0 ||
step59: num_masked_molecule:726.0 || num_masked_partner:1035.0333251953125 || molecule_loss:0.3079593777656555 || partner_loss:0.026088429614901543 || train_loss:0.3340478539466858 || epoch:1 ||
step89: num_masked_molecule:726.0 || num_masked_partner:1013.2666625976562 || molecule_loss:-0.19135525822639465 || partner_loss:-0.2356344610452652 || train_loss:-0.42698976397514343 || epoch:2 ||
step119: num_masked_molecule:726.0 || num_masked_partner:1024.5999755859375 || molecule_loss:-0.4036961495876312 || partner_loss:-0.3087603449821472 || train_loss:-0.712456464767456 || epoch:3 ||
step149: num_masked_molecule:726.0 || num_masked_partner:1068.4000244140625 || molecule_loss:-0.5029523372650146 || partner_loss:-0.3543926477432251 || train_loss:-0.857344925403595 || epoch:4 ||
step179: num_masked_molecule:726.0 || num_masked_partner:1079.4666748046875 || molecule_loss:-0.49853944778442383 || partner_loss:-0.3519468307495117 || train_loss:-0.850486159324646 || epoch:5 ||
step209: num_masked_molecule:726.0 || num_masked_partner:1202.3333740234375 || molecule_loss:-0.545192301273346 || partner_loss:-0.3749948740005493 || train_loss:-0.9201869964599609 || epoch:6 ||
step239: num_masked_molecule:726.0 || num_masked_partner:1087.5999755859375 || molecule_loss:-0.47056013345718384 || partner_loss:-0.37001076340675354 || train_loss:-0.8405709862709045 || epoch:7 ||
step269: num_masked_molecule:726.0 || num_masked_partner:1064.6666259765625 || molecule_loss:-0.5025004744529724 || partner_loss:-0.39020660519599915 || train_loss:-0.8927069902420044 || epoch:8 ||
step299: num_masked_molecule:726.0 || num_masked_partner:1119.699951171875 || molecule_loss:-0.6098951697349548 || partner_loss:-0.4187478721141815 || train_loss:-1.0286431312561035 || epoch:9 ||
step329: num_masked_molecule:726.0 || num_masked_partner:1070.0999755859375 || molecule_loss:-0.6323655247688293 || partner_loss:-0.45870205760002136 || train_loss:-1.0910675525665283 || epoch:10 ||
step359: num_masked_molecule:726.0 || num_masked_partner:1022.2666625976562 || molecule_loss:-0.6693475246429443 || partner_loss:-0.46193426847457886 || train_loss:-1.1312816143035889 || epoch:11 ||
step389: num_masked_molecule:726.0 || num_masked_partner:1037.5 || molecule_loss:-0.6690173745155334 || partner_loss:-0.4616609811782837 || train_loss:-1.130678415298462 || epoch:12 ||
step419: num_masked_molecule:726.0 || num_masked_partner:1045.3333740234375 || molecule_loss:-0.730419933795929 || partner_loss:-0.507056474685669 || train_loss:-1.2374764680862427 || epoch:13 ||
step449: num_masked_molecule:726.0 || num_masked_partner:1012.7333374023438 || molecule_loss:-0.722473680973053 || partner_loss:-0.5137044787406921 || train_loss:-1.2361780405044556 || epoch:14 ||
step479: num_masked_molecule:726.0 || num_masked_partner:1028.066650390625 || molecule_loss:-0.6839271187782288 || partner_loss:-0.5060281157493591 || train_loss:-1.189955234527588 || epoch:15 ||
step509: num_masked_molecule:726.0 || num_masked_partner:1087.699951171875 || molecule_loss:-0.7172583937644958 || partner_loss:-0.5181266069412231 || train_loss:-1.2353848218917847 || epoch:16 ||
step539: num_masked_molecule:726.0 || num_masked_partner:1008.1666870117188 || molecule_loss:-0.7352653741836548 || partner_loss:-0.5446591973304749 || train_loss:-1.2799246311187744 || epoch:17 ||
step569: num_masked_molecule:726.0 || num_masked_partner:1057.0333251953125 || molecule_loss:-0.7405372262001038 || partner_loss:-0.5526030659675598 || train_loss:-1.2931402921676636 || epoch:18 ||
step599: num_masked_molecule:726.0 || num_masked_partner:1024.0 || molecule_loss:-0.7776814103126526 || partner_loss:-0.5644718408584595 || train_loss:-1.342153549194336 || epoch:19 ||
step629: num_masked_molecule:726.0 || num_masked_partner:1110.300048828125 || molecule_loss:-0.7743927836418152 || partner_loss:-0.5480443835258484 || train_loss:-1.3224371671676636 || epoch:20 ||
step659: num_masked_molecule:726.0 || num_masked_partner:1055.9000244140625 || molecule_loss:-0.8264914751052856 || partner_loss:-0.5862985849380493 || train_loss:-1.412790060043335 || epoch:21 ||
step689: num_masked_molecule:726.0 || num_masked_partner:1136.6666259765625 || molecule_loss:-0.813257098197937 || partner_loss:-0.5911661386489868 || train_loss:-1.4044231176376343 || epoch:22 ||
step719: num_masked_molecule:726.0 || num_masked_partner:1104.7667236328125 || molecule_loss:-0.8293827772140503 || partner_loss:-0.5931750535964966 || train_loss:-1.4225578308105469 || epoch:23 ||
step749: num_masked_molecule:726.0 || num_masked_partner:1021.3666381835938 || molecule_loss:-0.8359233736991882 || partner_loss:-0.6279093623161316 || train_loss:-1.4638323783874512 || epoch:24 ||
step779: num_masked_molecule:726.0 || num_masked_partner:1085.13330078125 || molecule_loss:-0.8605287075042725 || partner_loss:-0.6250839829444885 || train_loss:-1.4856127500534058 || epoch:25 ||
step809: num_masked_molecule:726.0 || num_masked_partner:979.066650390625 || molecule_loss:-0.8720875382423401 || partner_loss:-0.6600542068481445 || train_loss:-1.5321418046951294 || epoch:26 ||
step839: num_masked_molecule:726.0 || num_masked_partner:1009.8333129882812 || molecule_loss:-0.8814008235931396 || partner_loss:-0.665562093257904 || train_loss:-1.5469627380371094 || epoch:27 ||
step869: num_masked_molecule:726.0 || num_masked_partner:1070.1666259765625 || molecule_loss:-0.8766546845436096 || partner_loss:-0.6474056839942932 || train_loss:-1.5240601301193237 || epoch:28 ||
step899: num_masked_molecule:726.0 || num_masked_partner:1109.86669921875 || molecule_loss:-0.8748965859413147 || partner_loss:-0.6583003997802734 || train_loss:-1.5331969261169434 || epoch:29 ||
step929: num_masked_molecule:726.0 || num_masked_partner:991.2666625976562 || molecule_loss:-0.9041153788566589 || partner_loss:-0.6923929452896118 || train_loss:-1.596508264541626 || epoch:30 ||
step959: num_masked_molecule:726.0 || num_masked_partner:1139.7667236328125 || molecule_loss:-0.8907600045204163 || partner_loss:-0.6543554067611694 || train_loss:-1.5451151132583618 || epoch:31 ||
step989: num_masked_molecule:726.0 || num_masked_partner:1088.4000244140625 || molecule_loss:-0.9054028391838074 || partner_loss:-0.6714745163917542 || train_loss:-1.5768771171569824 || epoch:32 ||
step1019: num_masked_molecule:726.0 || num_masked_partner:1049.8333740234375 || molecule_loss:-0.9136359691619873 || partner_loss:-0.7057571411132812 || train_loss:-1.619393229484558 || epoch:33 ||
step1049: num_masked_molecule:726.0 || num_masked_partner:1021.433349609375 || molecule_loss:-0.9122346639633179 || partner_loss:-0.7000260949134827 || train_loss:-1.6122606992721558 || epoch:34 ||
step1079: num_masked_molecule:726.0 || num_masked_partner:1064.2667236328125 || molecule_loss:-0.9102837443351746 || partner_loss:-0.7140386700630188 || train_loss:-1.6243220567703247 || epoch:35 ||
step1109: num_masked_molecule:726.0 || num_masked_partner:1119.5 || molecule_loss:-0.9347782135009766 || partner_loss:-0.7171137928962708 || train_loss:-1.6518919467926025 || epoch:36 ||
step1139: num_masked_molecule:726.0 || num_masked_partner:982.2000122070312 || molecule_loss:-0.9539870619773865 || partner_loss:-0.7451453804969788 || train_loss:-1.6991325616836548 || epoch:37 ||
step1169: num_masked_molecule:726.0 || num_masked_partner:1010.9000244140625 || molecule_loss:-0.934962272644043 || partner_loss:-0.7454560995101929 || train_loss:-1.6804183721542358 || epoch:38 ||
step1199: num_masked_molecule:726.0 || num_masked_partner:1055.0 || molecule_loss:-0.9453296065330505 || partner_loss:-0.7514131665229797 || train_loss:-1.6967426538467407 || epoch:39 ||
step1229: num_masked_molecule:726.0 || num_masked_partner:995.5333251953125 || molecule_loss:-0.9665142893791199 || partner_loss:-0.764931857585907 || train_loss:-1.731446385383606 || epoch:40 ||
step1259: num_masked_molecule:726.0 || num_masked_partner:1039.6666259765625 || molecule_loss:-0.9687608480453491 || partner_loss:-0.7705090641975403 || train_loss:-1.7392699718475342 || epoch:41 ||
step1289: num_masked_molecule:726.0 || num_masked_partner:1086.5333251953125 || molecule_loss:-0.9889089465141296 || partner_loss:-0.7700208425521851 || train_loss:-1.7589298486709595 || epoch:42 ||
step1319: num_masked_molecule:726.0 || num_masked_partner:1106.699951171875 || molecule_loss:-0.9803321361541748 || partner_loss:-0.7794199585914612 || train_loss:-1.7597521543502808 || epoch:43 ||
step1349: num_masked_molecule:726.0 || num_masked_partner:1009.9666748046875 || molecule_loss:-0.9949647784233093 || partner_loss:-0.785544216632843 || train_loss:-1.7805092334747314 || epoch:44 ||
step1379: num_masked_molecule:726.0 || num_masked_partner:1043.4000244140625 || molecule_loss:-0.9993769526481628 || partner_loss:-0.803527295589447 || train_loss:-1.8029043674468994 || epoch:45 ||
step1409: num_masked_molecule:726.0 || num_masked_partner:1140.199951171875 || molecule_loss:-1.0143009424209595 || partner_loss:-0.792439341545105 || train_loss:-1.8067405223846436 || epoch:46 ||
step1439: num_masked_molecule:726.0 || num_masked_partner:985.9000244140625 || molecule_loss:-1.0157802104949951 || partner_loss:-0.801406741142273 || train_loss:-1.817186713218689 || epoch:47 ||
step1469: num_masked_molecule:726.0 || num_masked_partner:983.4000244140625 || molecule_loss:-1.0334373712539673 || partner_loss:-0.8253453373908997 || train_loss:-1.8587825298309326 || epoch:48 ||
step1499: num_masked_molecule:726.0 || num_masked_partner:1051.7332763671875 || molecule_loss:-1.0181365013122559 || partner_loss:-0.8146819472312927 || train_loss:-1.8328182697296143 || epoch:49 ||
step1529: num_masked_molecule:726.0 || num_masked_partner:1076.566650390625 || molecule_loss:-1.0346359014511108 || partner_loss:-0.81679767370224 || train_loss:-1.851433515548706 || epoch:50 ||
step1559: num_masked_molecule:726.0 || num_masked_partner:1174.5999755859375 || molecule_loss:-1.0566203594207764 || partner_loss:-0.8248631954193115 || train_loss:-1.8814834356307983 || epoch:51 ||
step1589: num_masked_molecule:726.0 || num_masked_partner:1072.7667236328125 || molecule_loss:-1.037369728088379 || partner_loss:-0.8392760753631592 || train_loss:-1.876645565032959 || epoch:52 ||
step1619: num_masked_molecule:726.0 || num_masked_partner:1030.8333740234375 || molecule_loss:-1.0543652772903442 || partner_loss:-0.8409631252288818 || train_loss:-1.8953280448913574 || epoch:53 ||
step1649: num_masked_molecule:726.0 || num_masked_partner:1045.066650390625 || molecule_loss:-1.0495632886886597 || partner_loss:-0.8441323041915894 || train_loss:-1.8936954736709595 || epoch:54 ||
step1679: num_masked_molecule:726.0 || num_masked_partner:1146.2332763671875 || molecule_loss:-1.0817667245864868 || partner_loss:-0.8280280828475952 || train_loss:-1.9097949266433716 || epoch:55 ||
step1709: num_masked_molecule:726.0 || num_masked_partner:1037.800048828125 || molecule_loss:-1.0899382829666138 || partner_loss:-0.8692459464073181 || train_loss:-1.9591840505599976 || epoch:56 ||
step1739: num_masked_molecule:726.0 || num_masked_partner:1010.6666870117188 || molecule_loss:-1.0975162982940674 || partner_loss:-0.8926358819007874 || train_loss:-1.99015212059021 || epoch:57 ||
step1769: num_masked_molecule:726.0 || num_masked_partner:1148.0 || molecule_loss:-1.0803073644638062 || partner_loss:-0.870618462562561 || train_loss:-1.950925588607788 || epoch:58 ||
step1799: num_masked_molecule:726.0 || num_masked_partner:1000.7666625976562 || molecule_loss:-1.0922099351882935 || partner_loss:-0.8835612535476685 || train_loss:-1.975771188735962 || epoch:59 ||
step1829: num_masked_molecule:726.0 || num_masked_partner:973.3333129882812 || molecule_loss:-1.1173169612884521 || partner_loss:-0.9026615023612976 || train_loss:-2.0199780464172363 || epoch:60 ||
step1859: num_masked_molecule:726.0 || num_masked_partner:1018.1666870117188 || molecule_loss:-1.1046700477600098 || partner_loss:-0.8955172896385193 || train_loss:-2.000187635421753 || epoch:61 ||
step1889: num_masked_molecule:726.0 || num_masked_partner:924.1333618164062 || molecule_loss:-1.1399754285812378 || partner_loss:-0.9060159921646118 || train_loss:-2.0459911823272705 || epoch:62 ||
step1919: num_masked_molecule:726.0 || num_masked_partner:1186.8333740234375 || molecule_loss:-1.1187496185302734 || partner_loss:-0.8819963932037354 || train_loss:-2.000746011734009 || epoch:63 ||
step1949: num_masked_molecule:726.0 || num_masked_partner:1036.199951171875 || molecule_loss:-1.1434788703918457 || partner_loss:-0.9053875207901001 || train_loss:-2.0488662719726562 || epoch:64 ||
step1979: num_masked_molecule:726.0 || num_masked_partner:1140.63330078125 || molecule_loss:-1.1350418329238892 || partner_loss:-0.8943266272544861 || train_loss:-2.0293684005737305 || epoch:65 ||
step2009: num_masked_molecule:726.0 || num_masked_partner:1060.9666748046875 || molecule_loss:-1.168619155883789 || partner_loss:-0.9246275424957275 || train_loss:-2.0932469367980957 || epoch:66 ||
step2039: num_masked_molecule:726.0 || num_masked_partner:1102.933349609375 || molecule_loss:-1.1653698682785034 || partner_loss:-0.9179772734642029 || train_loss:-2.0833470821380615 || epoch:67 ||
step2069: num_masked_molecule:726.0 || num_masked_partner:1100.5999755859375 || molecule_loss:-1.1653379201889038 || partner_loss:-0.9129053950309753 || train_loss:-2.0782434940338135 || epoch:68 ||
step2099: num_masked_molecule:726.0 || num_masked_partner:1095.86669921875 || molecule_loss:-1.1672157049179077 || partner_loss:-0.9130270481109619 || train_loss:-2.08024263381958 || epoch:69 ||
step2129: num_masked_molecule:726.0 || num_masked_partner:1028.566650390625 || molecule_loss:-1.1828758716583252 || partner_loss:-0.9405314326286316 || train_loss:-2.1234071254730225 || epoch:70 ||
step2159: num_masked_molecule:726.0 || num_masked_partner:1104.9000244140625 || molecule_loss:-1.1876522302627563 || partner_loss:-0.9315553307533264 || train_loss:-2.1192076206207275 || epoch:71 ||
step2189: num_masked_molecule:726.0 || num_masked_partner:1019.7666625976562 || molecule_loss:-1.205991268157959 || partner_loss:-0.9466720819473267 || train_loss:-2.152663230895996 || epoch:72 ||
step2219: num_masked_molecule:726.0 || num_masked_partner:1076.6666259765625 || molecule_loss:-1.1927111148834229 || partner_loss:-0.9489060044288635 || train_loss:-2.1416170597076416 || epoch:73 ||
step2249: num_masked_molecule:726.0 || num_masked_partner:1069.1666259765625 || molecule_loss:-1.1742908954620361 || partner_loss:-0.9380027055740356 || train_loss:-2.112293243408203 || epoch:74 ||
step2279: num_masked_molecule:726.0 || num_masked_partner:1182.13330078125 || molecule_loss:-1.1886842250823975 || partner_loss:-0.9302987456321716 || train_loss:-2.1189827919006348 || epoch:75 ||
step2309: num_masked_molecule:726.0 || num_masked_partner:1056.8333740234375 || molecule_loss:-1.212022304534912 || partner_loss:-0.9502575993537903 || train_loss:-2.1622800827026367 || epoch:76 ||
step2339: num_masked_molecule:726.0 || num_masked_partner:1012.6333618164062 || molecule_loss:-1.2337737083435059 || partner_loss:-0.9571922421455383 || train_loss:-2.1909658908843994 || epoch:77 ||
step2369: num_masked_molecule:726.0 || num_masked_partner:1005.7999877929688 || molecule_loss:-1.2319817543029785 || partner_loss:-0.9658077359199524 || train_loss:-2.1977896690368652 || epoch:78 ||
step2399: num_masked_molecule:726.0 || num_masked_partner:1137.7667236328125 || molecule_loss:-1.2354296445846558 || partner_loss:-0.9447720050811768 || train_loss:-2.180201768875122 || epoch:79 ||
step2429: num_masked_molecule:726.0 || num_masked_partner:997.566650390625 || molecule_loss:-1.2405979633331299 || partner_loss:-0.9732113480567932 || train_loss:-2.2138092517852783 || epoch:80 ||
step2459: num_masked_molecule:726.0 || num_masked_partner:966.8666381835938 || molecule_loss:-1.2557622194290161 || partner_loss:-0.9808309078216553 || train_loss:-2.236593008041382 || epoch:81 ||
step2489: num_masked_molecule:726.0 || num_masked_partner:1010.0333251953125 || molecule_loss:-1.2544045448303223 || partner_loss:-0.9827736616134644 || train_loss:-2.237178325653076 || epoch:82 ||
step2519: num_masked_molecule:726.0 || num_masked_partner:1035.066650390625 || molecule_loss:-1.2518256902694702 || partner_loss:-0.9906922578811646 || train_loss:-2.242518186569214 || epoch:83 ||
step2549: num_masked_molecule:726.0 || num_masked_partner:1079.433349609375 || molecule_loss:-1.2596325874328613 || partner_loss:-0.9961376786231995 || train_loss:-2.255770206451416 || epoch:84 ||
step2579: num_masked_molecule:726.0 || num_masked_partner:957.5999755859375 || molecule_loss:-1.2579421997070312 || partner_loss:-0.9910205006599426 || train_loss:-2.248962879180908 || epoch:85 ||
step2609: num_masked_molecule:726.0 || num_masked_partner:1064.4000244140625 || molecule_loss:-1.270037293434143 || partner_loss:-0.9873427748680115 || train_loss:-2.2573800086975098 || epoch:86 ||
step2639: num_masked_molecule:726.0 || num_masked_partner:989.9000244140625 || molecule_loss:-1.2828160524368286 || partner_loss:-1.0118578672409058 || train_loss:-2.2946736812591553 || epoch:87 ||
step2669: num_masked_molecule:726.0 || num_masked_partner:1089.699951171875 || molecule_loss:-1.272154450416565 || partner_loss:-0.994814932346344 || train_loss:-2.2669694423675537 || epoch:88 ||
step2699: num_masked_molecule:726.0 || num_masked_partner:1169.36669921875 || molecule_loss:-1.2712759971618652 || partner_loss:-0.9949489235877991 || train_loss:-2.2662246227264404 || epoch:89 ||
step2729: num_masked_molecule:726.0 || num_masked_partner:1052.566650390625 || molecule_loss:-1.2798277139663696 || partner_loss:-0.9988877177238464 || train_loss:-2.2787156105041504 || epoch:90 ||
step2759: num_masked_molecule:726.0 || num_masked_partner:1027.199951171875 || molecule_loss:-1.2892494201660156 || partner_loss:-1.0110894441604614 || train_loss:-2.3003387451171875 || epoch:91 ||
step2789: num_masked_molecule:726.0 || num_masked_partner:985.8666381835938 || molecule_loss:-1.302141547203064 || partner_loss:-1.014817714691162 || train_loss:-2.3169591426849365 || epoch:92 ||
step2819: num_masked_molecule:726.0 || num_masked_partner:1065.2332763671875 || molecule_loss:-1.2997256517410278 || partner_loss:-1.020464301109314 || train_loss:-2.320189952850342 || epoch:93 ||
step2849: num_masked_molecule:726.0 || num_masked_partner:1074.5999755859375 || molecule_loss:-1.2961069345474243 || partner_loss:-1.0081396102905273 || train_loss:-2.304246425628662 || epoch:94 ||
step2879: num_masked_molecule:726.0 || num_masked_partner:1074.199951171875 || molecule_loss:-1.3059227466583252 || partner_loss:-1.0131717920303345 || train_loss:-2.3190948963165283 || epoch:95 ||
step2909: num_masked_molecule:726.0 || num_masked_partner:1047.4000244140625 || molecule_loss:-1.304827094078064 || partner_loss:-1.0202995538711548 || train_loss:-2.325126886367798 || epoch:96 ||
step2939: num_masked_molecule:726.0 || num_masked_partner:1090.0333251953125 || molecule_loss:-1.300012469291687 || partner_loss:-1.000986933708191 || train_loss:-2.300999402999878 || epoch:97 ||
step2969: num_masked_molecule:726.0 || num_masked_partner:1017.7666625976562 || molecule_loss:-1.3212788105010986 || partner_loss:-1.035373568534851 || train_loss:-2.35665225982666 || epoch:98 ||
step2999: num_masked_molecule:726.0 || num_masked_partner:993.7333374023438 || molecule_loss:-1.3178870677947998 || partner_loss:-1.036424994468689 || train_loss:-2.3543121814727783 || epoch:99 ||
step3029: num_masked_molecule:726.0 || num_masked_partner:998.5333251953125 || molecule_loss:-1.322153091430664 || partner_loss:-1.0287280082702637 || train_loss:-2.3508810997009277 || epoch:100 ||
step3059: num_masked_molecule:726.0 || num_masked_partner:1034.0333251953125 || molecule_loss:-1.2971094846725464 || partner_loss:-1.031297206878662 || train_loss:-2.328406810760498 || epoch:101 ||
step3089: num_masked_molecule:726.0 || num_masked_partner:1011.7999877929688 || molecule_loss:-1.333530068397522 || partner_loss:-1.0356030464172363 || train_loss:-2.3691327571868896 || epoch:102 ||
step3119: num_masked_molecule:726.0 || num_masked_partner:1107.9000244140625 || molecule_loss:-1.3338249921798706 || partner_loss:-1.021238088607788 || train_loss:-2.3550631999969482 || epoch:103 ||
step3149: num_masked_molecule:726.0 || num_masked_partner:971.3666381835938 || molecule_loss:-1.3429099321365356 || partner_loss:-1.0487263202667236 || train_loss:-2.391636371612549 || epoch:104 ||
step3179: num_masked_molecule:726.0 || num_masked_partner:1050.1666259765625 || molecule_loss:-1.342189073562622 || partner_loss:-1.0453040599822998 || train_loss:-2.387493371963501 || epoch:105 ||
step3209: num_masked_molecule:726.0 || num_masked_partner:1058.0 || molecule_loss:-1.342303991317749 || partner_loss:-1.041756510734558 || train_loss:-2.3840606212615967 || epoch:106 ||
step3239: num_masked_molecule:726.0 || num_masked_partner:986.0999755859375 || molecule_loss:-1.356974482536316 || partner_loss:-1.0646052360534668 || train_loss:-2.4215800762176514 || epoch:107 ||
step3269: num_masked_molecule:726.0 || num_masked_partner:1027.1666259765625 || molecule_loss:-1.3518201112747192 || partner_loss:-1.0424758195877075 || train_loss:-2.3942959308624268 || epoch:108 ||
step3299: num_masked_molecule:726.0 || num_masked_partner:974.4666748046875 || molecule_loss:-1.3549456596374512 || partner_loss:-1.052807331085205 || train_loss:-2.4077529907226562 || epoch:109 ||
step3329: num_masked_molecule:726.0 || num_masked_partner:1101.9000244140625 || molecule_loss:-1.3620893955230713 || partner_loss:-1.0447863340377808 || train_loss:-2.4068758487701416 || epoch:110 ||
step3359: num_masked_molecule:726.0 || num_masked_partner:1048.9000244140625 || molecule_loss:-1.3539717197418213 || partner_loss:-1.0623372793197632 || train_loss:-2.416309356689453 || epoch:111 ||
step3389: num_masked_molecule:726.0 || num_masked_partner:1030.3333740234375 || molecule_loss:-1.3606815338134766 || partner_loss:-1.0528395175933838 || train_loss:-2.4135208129882812 || epoch:112 ||
step3419: num_masked_molecule:726.0 || num_masked_partner:955.6666870117188 || molecule_loss:-1.372918963432312 || partner_loss:-1.0771234035491943 || train_loss:-2.450042486190796 || epoch:113 ||
step3449: num_masked_molecule:726.0 || num_masked_partner:973.8333129882812 || molecule_loss:-1.3832192420959473 || partner_loss:-1.0634123086929321 || train_loss:-2.44663143157959 || epoch:114 ||
step3479: num_masked_molecule:726.0 || num_masked_partner:1036.300048828125 || molecule_loss:-1.3683058023452759 || partner_loss:-1.0633835792541504 || train_loss:-2.4316890239715576 || epoch:115 ||
step3509: num_masked_molecule:726.0 || num_masked_partner:1010.2000122070312 || molecule_loss:-1.3830536603927612 || partner_loss:-1.068954348564148 || train_loss:-2.452008008956909 || epoch:116 ||
step3539: num_masked_molecule:726.0 || num_masked_partner:1005.7000122070312 || molecule_loss:-1.384613275527954 || partner_loss:-1.074123740196228 || train_loss:-2.4587368965148926 || epoch:117 ||
step3569: num_masked_molecule:726.0 || num_masked_partner:1051.0333251953125 || molecule_loss:-1.3920537233352661 || partner_loss:-1.0654757022857666 || train_loss:-2.4575295448303223 || epoch:118 ||
step3599: num_masked_molecule:726.0 || num_masked_partner:1147.933349609375 || molecule_loss:-1.3833810091018677 || partner_loss:-1.0549660921096802 || train_loss:-2.438347101211548 || epoch:119 ||
step3629: num_masked_molecule:726.0 || num_masked_partner:1077.5999755859375 || molecule_loss:-1.3820745944976807 || partner_loss:-1.0627564191818237 || train_loss:-2.444831371307373 || epoch:120 ||
step3659: num_masked_molecule:726.0 || num_masked_partner:1073.9000244140625 || molecule_loss:-1.3942341804504395 || partner_loss:-1.0603405237197876 || train_loss:-2.4545748233795166 || epoch:121 ||
step3689: num_masked_molecule:726.0 || num_masked_partner:1109.6666259765625 || molecule_loss:-1.3899521827697754 || partner_loss:-1.0499317646026611 || train_loss:-2.4398839473724365 || epoch:122 ||
step3719: num_masked_molecule:726.0 || num_masked_partner:1120.9666748046875 || molecule_loss:-1.4119142293930054 || partner_loss:-1.0580874681472778 || train_loss:-2.470001697540283 || epoch:123 ||
step3749: num_masked_molecule:726.0 || num_masked_partner:1094.5 || molecule_loss:-1.3814094066619873 || partner_loss:-1.0658226013183594 || train_loss:-2.4472317695617676 || epoch:124 ||
step3779: num_masked_molecule:726.0 || num_masked_partner:1041.0999755859375 || molecule_loss:-1.4080185890197754 || partner_loss:-1.0778658390045166 || train_loss:-2.485884428024292 || epoch:125 ||
step3809: num_masked_molecule:726.0 || num_masked_partner:940.6333618164062 || molecule_loss:-1.4040311574935913 || partner_loss:-1.1004084348678589 || train_loss:-2.50443959236145 || epoch:126 ||
step3839: num_masked_molecule:726.0 || num_masked_partner:1086.5333251953125 || molecule_loss:-1.416605830192566 || partner_loss:-1.0759029388427734 || train_loss:-2.49250864982605 || epoch:127 ||
step3869: num_masked_molecule:726.0 || num_masked_partner:1023.7000122070312 || molecule_loss:-1.4320847988128662 || partner_loss:-1.096185564994812 || train_loss:-2.5282704830169678 || epoch:128 ||
step3899: num_masked_molecule:726.0 || num_masked_partner:1053.63330078125 || molecule_loss:-1.415101408958435 || partner_loss:-1.0774004459381104 || train_loss:-2.492501735687256 || epoch:129 ||
step3929: num_masked_molecule:726.0 || num_masked_partner:1099.2332763671875 || molecule_loss:-1.4301247596740723 || partner_loss:-1.083634853363037 || train_loss:-2.5137593746185303 || epoch:130 ||
step3959: num_masked_molecule:726.0 || num_masked_partner:956.0 || molecule_loss:-1.4191194772720337 || partner_loss:-1.0942126512527466 || train_loss:-2.513331890106201 || epoch:131 ||
step3989: num_masked_molecule:726.0 || num_masked_partner:1024.199951171875 || molecule_loss:-1.4258414506912231 || partner_loss:-1.0949925184249878 || train_loss:-2.520833730697632 || epoch:132 ||
step4019: num_masked_molecule:726.0 || num_masked_partner:1072.4000244140625 || molecule_loss:-1.4500161409378052 || partner_loss:-1.0935717821121216 || train_loss:-2.5435876846313477 || epoch:133 ||
step4049: num_masked_molecule:726.0 || num_masked_partner:1095.6666259765625 || molecule_loss:-1.4258198738098145 || partner_loss:-1.0872559547424316 || train_loss:-2.513076066970825 || epoch:134 ||
step4079: num_masked_molecule:726.0 || num_masked_partner:1058.933349609375 || molecule_loss:-1.4403308629989624 || partner_loss:-1.090928554534912 || train_loss:-2.531259298324585 || epoch:135 ||
step4109: num_masked_molecule:726.0 || num_masked_partner:1058.566650390625 || molecule_loss:-1.4443422555923462 || partner_loss:-1.0939170122146606 || train_loss:-2.5382590293884277 || epoch:136 ||
step4139: num_masked_molecule:726.0 || num_masked_partner:1153.9666748046875 || molecule_loss:-1.4402320384979248 || partner_loss:-1.078032374382019 || train_loss:-2.5182647705078125 || epoch:137 ||
step4169: num_masked_molecule:726.0 || num_masked_partner:907.1333618164062 || molecule_loss:-1.447055697441101 || partner_loss:-1.1156059503555298 || train_loss:-2.5626611709594727 || epoch:138 ||
step4199: num_masked_molecule:726.0 || num_masked_partner:962.4666748046875 || molecule_loss:-1.453112006187439 || partner_loss:-1.1123119592666626 || train_loss:-2.5654242038726807 || epoch:139 ||
step4229: num_masked_molecule:726.0 || num_masked_partner:1076.199951171875 || molecule_loss:-1.4583872556686401 || partner_loss:-1.1005842685699463 || train_loss:-2.558971405029297 || epoch:140 ||
step4259: num_masked_molecule:726.0 || num_masked_partner:1117.7667236328125 || molecule_loss:-1.4408568143844604 || partner_loss:-1.090828537940979 || train_loss:-2.5316853523254395 || epoch:141 ||
step4289: num_masked_molecule:726.0 || num_masked_partner:1103.5 || molecule_loss:-1.4667301177978516 || partner_loss:-1.1032249927520752 || train_loss:-2.5699543952941895 || epoch:142 ||
step4319: num_masked_molecule:726.0 || num_masked_partner:1088.4000244140625 || molecule_loss:-1.4581748247146606 || partner_loss:-1.0999674797058105 || train_loss:-2.5581421852111816 || epoch:143 ||
step4349: num_masked_molecule:726.0 || num_masked_partner:1059.433349609375 || molecule_loss:-1.45566725730896 || partner_loss:-1.1075527667999268 || train_loss:-2.5632200241088867 || epoch:144 ||
step4379: num_masked_molecule:726.0 || num_masked_partner:1172.433349609375 || molecule_loss:-1.472609043121338 || partner_loss:-1.092396855354309 || train_loss:-2.5650057792663574 || epoch:145 ||
step4409: num_masked_molecule:726.0 || num_masked_partner:985.566650390625 || molecule_loss:-1.4781265258789062 || partner_loss:-1.1260569095611572 || train_loss:-2.6041834354400635 || epoch:146 ||
step4439: num_masked_molecule:726.0 || num_masked_partner:1115.8333740234375 || molecule_loss:-1.4825230836868286 || partner_loss:-1.1054446697235107 || train_loss:-2.58796763420105 || epoch:147 ||
step4469: num_masked_molecule:726.0 || num_masked_partner:974.4000244140625 || molecule_loss:-1.4903604984283447 || partner_loss:-1.1227779388427734 || train_loss:-2.61313796043396 || epoch:148 ||
step4499: num_masked_molecule:726.0 || num_masked_partner:1098.2332763671875 || molecule_loss:-1.4861323833465576 || partner_loss:-1.0995885133743286 || train_loss:-2.5857207775115967 || epoch:149 ||
step4529: num_masked_molecule:726.0 || num_masked_partner:1024.800048828125 || molecule_loss:-1.4900097846984863 || partner_loss:-1.1176047325134277 || train_loss:-2.6076149940490723 || epoch:150 ||
step4559: num_masked_molecule:726.0 || num_masked_partner:981.7000122070312 || molecule_loss:-1.4896836280822754 || partner_loss:-1.1290066242218018 || train_loss:-2.6186907291412354 || epoch:151 ||
step4589: num_masked_molecule:726.0 || num_masked_partner:1080.0999755859375 || molecule_loss:-1.4915770292282104 || partner_loss:-1.1146584749221802 || train_loss:-2.6062352657318115 || epoch:152 ||
step4619: num_masked_molecule:726.0 || num_masked_partner:1103.800048828125 || molecule_loss:-1.4884955883026123 || partner_loss:-1.1100897789001465 || train_loss:-2.598585605621338 || epoch:153 ||
step4649: num_masked_molecule:726.0 || num_masked_partner:986.1333618164062 || molecule_loss:-1.5072274208068848 || partner_loss:-1.1295430660247803 || train_loss:-2.6367697715759277 || epoch:154 ||
step4679: num_masked_molecule:726.0 || num_masked_partner:1008.2000122070312 || molecule_loss:-1.5132561922073364 || partner_loss:-1.1203256845474243 || train_loss:-2.633582592010498 || epoch:155 ||
step4709: num_masked_molecule:726.0 || num_masked_partner:1011.9000244140625 || molecule_loss:-1.484210729598999 || partner_loss:-1.1299494504928589 || train_loss:-2.6141598224639893 || epoch:156 ||
step4739: num_masked_molecule:726.0 || num_masked_partner:1091.0999755859375 || molecule_loss:-1.4978324174880981 || partner_loss:-1.112225890159607 || train_loss:-2.610058307647705 || epoch:157 ||
step4769: num_masked_molecule:726.0 || num_masked_partner:1185.066650390625 || molecule_loss:-1.504968285560608 || partner_loss:-1.1101605892181396 || train_loss:-2.615128517150879 || epoch:158 ||
step4799: num_masked_molecule:726.0 || num_masked_partner:1017.4000244140625 || molecule_loss:-1.5109955072402954 || partner_loss:-1.1441339254379272 || train_loss:-2.6551289558410645 || epoch:159 ||
step4829: num_masked_molecule:726.0 || num_masked_partner:998.6333618164062 || molecule_loss:-1.5080370903015137 || partner_loss:-1.1475878953933716 || train_loss:-2.6556246280670166 || epoch:160 ||
step4859: num_masked_molecule:726.0 || num_masked_partner:1173.4000244140625 || molecule_loss:-1.5082473754882812 || partner_loss:-1.1150858402252197 || train_loss:-2.623332977294922 || epoch:161 ||
step4889: num_masked_molecule:726.0 || num_masked_partner:1062.2332763671875 || molecule_loss:-1.5195255279541016 || partner_loss:-1.1344455480575562 || train_loss:-2.65397047996521 || epoch:162 ||
step4919: num_masked_molecule:726.0 || num_masked_partner:998.8333129882812 || molecule_loss:-1.548446536064148 || partner_loss:-1.1556682586669922 || train_loss:-2.7041144371032715 || epoch:163 ||
step4949: num_masked_molecule:726.0 || num_masked_partner:970.2999877929688 || molecule_loss:-1.5229873657226562 || partner_loss:-1.137349247932434 || train_loss:-2.6603362560272217 || epoch:164 ||
step4979: num_masked_molecule:726.0 || num_masked_partner:1113.433349609375 || molecule_loss:-1.5138256549835205 || partner_loss:-1.1198792457580566 || train_loss:-2.633704423904419 || epoch:165 ||
step5009: num_masked_molecule:726.0 || num_masked_partner:996.1333618164062 || molecule_loss:-1.532779335975647 || partner_loss:-1.149556279182434 || train_loss:-2.682335376739502 || epoch:166 ||
step5039: num_masked_molecule:726.0 || num_masked_partner:1010.566650390625 || molecule_loss:-1.5579205751419067 || partner_loss:-1.1566312313079834 || train_loss:-2.714552640914917 || epoch:167 ||
step5069: num_masked_molecule:726.0 || num_masked_partner:1057.699951171875 || molecule_loss:-1.54557204246521 || partner_loss:-1.155557632446289 || train_loss:-2.701129913330078 || epoch:168 ||
step5099: num_masked_molecule:726.0 || num_masked_partner:1003.9666748046875 || molecule_loss:-1.5569409132003784 || partner_loss:-1.1513677835464478 || train_loss:-2.7083094120025635 || epoch:169 ||
step5129: num_masked_molecule:726.0 || num_masked_partner:959.433349609375 || molecule_loss:-1.5585733652114868 || partner_loss:-1.1556090116500854 || train_loss:-2.7141823768615723 || epoch:170 ||
step5159: num_masked_molecule:726.0 || num_masked_partner:1170.566650390625 || molecule_loss:-1.540384292602539 || partner_loss:-1.125343680381775 || train_loss:-2.6657278537750244 || epoch:171 ||
step5189: num_masked_molecule:726.0 || num_masked_partner:905.566650390625 || molecule_loss:-1.5687005519866943 || partner_loss:-1.1740463972091675 || train_loss:-2.7427470684051514 || epoch:172 ||
step5219: num_masked_molecule:726.0 || num_masked_partner:1127.36669921875 || molecule_loss:-1.548170566558838 || partner_loss:-1.1296813488006592 || train_loss:-2.677851915359497 || epoch:173 ||
step5249: num_masked_molecule:726.0 || num_masked_partner:1121.933349609375 || molecule_loss:-1.5502243041992188 || partner_loss:-1.1427909135818481 || train_loss:-2.6930153369903564 || epoch:174 ||
step5279: num_masked_molecule:726.0 || num_masked_partner:1025.5999755859375 || molecule_loss:-1.549673080444336 || partner_loss:-1.1613256931304932 || train_loss:-2.710999011993408 || epoch:175 ||
step5309: num_masked_molecule:726.0 || num_masked_partner:998.2666625976562 || molecule_loss:-1.5565258264541626 || partner_loss:-1.166730523109436 || train_loss:-2.7232563495635986 || epoch:176 ||
step5339: num_masked_molecule:726.0 || num_masked_partner:1107.199951171875 || molecule_loss:-1.5741074085235596 || partner_loss:-1.1469115018844604 || train_loss:-2.7210190296173096 || epoch:177 ||
step5369: num_masked_molecule:726.0 || num_masked_partner:1165.800048828125 || molecule_loss:-1.5624641180038452 || partner_loss:-1.1312538385391235 || train_loss:-2.6937179565429688 || epoch:178 ||
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
step5399: num_masked_molecule:726.0 || num_masked_partner:1068.7332763671875 || molecule_loss:-1.551928162574768 || partner_loss:-1.159980297088623 || train_loss:-2.7119085788726807 || epoch:179 ||
Evaluation
First we evaluate on ratio level.
Such an imputation can also be used for real-word spike-in datasets where the ground truth abundance is not known but the ground truth ratio between condition groups is known.
Using the calculate_ratio_absolute_error function we calculate ratios from all possible sample pairs between the high and low sampls (numerator_samples and denominator_samples) and compare those ratios against the ground truth.
import math
from pyproteonet.metrics import calculate_ratio_absolute_error
from matplotlib import pyplot as plt
numerator_samples = [f'sample{s}' for s in (range(5,10))]
denominator_samples = [f'sample{s}' for s in (range(5))]
ground_truth_ratios = math.e**ds.get_wf('protein', 'abundance_gt')
ground_truth_ratios = ground_truth_ratios[numerator_samples].mean(axis=1) / ground_truth_ratios[denominator_samples].mean(axis=1)
ground_truth_ratios = ground_truth_ratios.groupby('id').mean()
#calculating ratios
ratio_mae = calculate_ratio_absolute_error(
dataset=ds,
numerator_samples=numerator_samples, denominator_samples=denominator_samples,
molecule='protein',
columns=imputation_methods + ['gnn_hetero'],
ground_truth_ratios=ground_truth_ratios,
calculate_log2_ratio=True,
is_log=True,
)
#plotting the results
ratio_mae["gt"] = ground_truth_ratios.loc[
ratio_mae.index.get_level_values("id")
].values
ratio_mae["gt"] = np.abs(np.log(ratio_mae["gt"])) >= np.log(2)
m = {False: "No DE", True: "DE"}
ratio_mae_subsets = {"All": ratio_mae}
for k, g in ratio_mae.groupby("gt"):
ratio_mae_subsets[m[k]] = g
fig, axs = plt.subplots(1, 3, sharey=True, figsize=(15, 5))
res_df = []
for i, (k, data) in enumerate(ratio_mae_subsets.items()):
data = data.groupby(["nominator_sample", "denominator_sample"]).mean()
del data["gt"]
data = data.melt(
value_vars=imputation_methods + ['gnn_hetero'], var_name="Imputation Method", value_name="MAE"
)
sns.boxplot(data=data, x="Imputation Method", y="MAE", ax=axs[i])
axs[i].set_title(k)
axs[i].set_xticklabels(axs[i].get_xticklabels(), rotation=45, ha="right")
axs[i].set_ylabel('$Log_2(Ratio)$ MAE')
Often differentially expressed molecules are found by statistical testing. Therefore, it is of interest which imputation method best allows correctly finding differentially expressed molecules from the imputed values.
Given the ground truth ratios we can investigate this with reference to common metrics like precision, recall etc. using the evaluate_des function.
from pyproteonet.metrics import evaluate_des
res = evaluate_des(dataset=ds, molecule='protein', columns=imputation_methods + ['gnn_hetero', 'aggregated'], numerator_samples=numerator_samples,
denominator_samples=denominator_samples, gt_fc=ground_truth_ratios,
min_fc=2, max_pvalue=0.05, is_log=True, absolute_metrics=False)
res['Method'] = res.index
res.loc[:, ['Recall', 'Precision', 'Specificity', 'Accuracy', 'FP Rate', 'F1 Score']] = (res[['Recall', 'Precision', 'Specificity', 'Accuracy', 'FP Rate', 'F1 Score']] - res.loc['aggregated'])
res = res.drop(labels=['aggregated'])
de_res = res.melt(id_vars=['Method'], value_vars=['Recall', 'Precision', 'Specificity', 'Accuracy', 'FP Rate', 'F1 Score'], value_name='%', var_name='Metric')
ax = sns.barplot(data=de_res, x='Method', y='%', hue='Metric')
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
[Text(0, 0, 'minprob'),
Text(1, 0, 'mindet'),
Text(2, 0, 'bpca'),
Text(3, 0, 'missforest'),
Text(4, 0, 'knn'),
Text(5, 0, 'isvd'),
Text(6, 0, 'dae'),
Text(7, 0, 'gnn_hetero')]