Another example of sample QC

[1]:
# This illustrates the simplest, smallest batch (n=2) of methylation array sample results using methylprep and methylcheck.
#python 3.7
import pandas as pd
import numpy as np
import seaborn as sb
import matplotlib.pyplot as plt
%matplotlib inline
[2]:
%load_ext autoreload
%autoreload 2
import methylcheck
from pprint import pprint as pp
[4]:
# load sample data
from pathlib import Path
data_dir = '~/'
betas = pd.read_pickle(Path(data_dir, 'beta_values.pkl'))
[5]:
betas.head()
[5]:
202908430131_R07C01 202908540141_R06C01 202908540141_R07C01 202908540141_R01C01 202908540141_R02C01 202908430131_R08C01 202908540141_R05C01 202908540141_R08C01 202908540141_R04C01 202908540141_R03C01
IlmnID
cg07881041 0.904343 0.900771 0.900851 0.882696 0.909091 0.876905 0.906337 0.919900 0.895526 0.896339
cg23229610 0.923033 0.914954 0.914858 0.902453 0.920394 0.922635 0.925886 0.925216 0.925416 0.920663
cg03513874 0.931137 0.919958 0.928000 0.921894 0.927230 0.909573 0.913583 0.936886 0.931464 0.916364
cg05451842 0.031496 0.038683 0.031413 0.030096 0.032964 0.020800 0.027175 0.032345 0.029695 0.031083
cg14797042 0.940138 0.934215 0.937744 0.935057 0.939669 0.934442 0.940236 0.943643 0.936240 0.937548

beta_density_plot

[6]:
#plot each sample separately
methylcheck.beta_density_plot(betas)
../_images/docs_another-methylcheck-qc-example_6_0.png
[7]:
# this is a mushed average of all samples into one line.
methylcheck.mean_beta_plot(betas)
../_images/docs_another-methylcheck-qc-example_7_0.png
[9]:
filtered_df = methylcheck.cumulative_sum_beta_distribution(betas, cutoff=0.7)
# use this to remove outliers, based on the cutoff value.
# this looks identical the beta_density_plot because no samples were removed.
Calculating area under curve for each sample.
10it [00:02,  5.04it/s]
../_images/docs_another-methylcheck-qc-example_8_2.png
[13]:
methylcheck.beta_mds_plot(betas, filter_stdev=1.8)
Your data needed to be transposed (df = df.transpose()).
(10, 865859)
Making sure that probes are in columns (the second number should be larger than the first).
Starting MDS fit_transform. this may take a while.
You can now remove outliers based on their transformed beta values
 falling outside a range, defined by the sample standard deviation.
Your acceptable value range: x=(-41.0 to 41.0), y=(-45.0 to 45.0).
axes None [<matplotlib.axes._subplots.AxesSubplot object at 0x7fa4c949df98>] assigned to ax.
../_images/docs_another-methylcheck-qc-example_9_1.png
Original samples (0, 2) vs filtered (10, 2)
Your scale factor was: 1.8
Enter new scale factor, <enter> to accept and save:
[13]:
IlmnID cg07881041 cg23229610 cg03513874 cg05451842 cg14797042 cg09838562 cg25458538 cg09261072 cg02404579 cg04118974 ... cg22005990 cg05384275 cg21496658 cg27017993 cg19551589 cg10218605 cg06899844 cg22494081 cg22623303 cg21064505
202908430131_R07C01 0.904343 0.923033 0.931137 0.031496 0.940138 0.032304 0.929787 0.598709 0.845217 0.650180 ... 0.151468 0.013035 0.013853 0.916667 0.027761 0.387055 0.035152 0.970914 0.972975 0.955782
202908540141_R06C01 0.900771 0.914954 0.919958 0.038683 0.934215 0.034625 0.934069 0.618233 0.854144 0.629472 ... 0.263880 0.014193 0.021349 0.903981 0.031160 0.463987 0.040007 0.966107 0.965929 0.949739
202908540141_R07C01 0.900851 0.914858 0.928000 0.031413 0.937744 0.030491 0.929340 0.573967 0.823189 0.577657 ... 0.225832 0.015058 0.014798 0.919489 0.076705 0.387620 0.035784 0.969959 0.966356 0.956764
202908540141_R01C01 0.882696 0.902453 0.921894 0.030096 0.935057 0.033836 0.923905 0.616446 0.842846 0.606424 ... 0.234230 0.016249 0.025378 0.900429 0.028450 0.511480 0.041063 0.963544 0.963379 0.949240
202908540141_R02C01 0.909091 0.920394 0.927230 0.032964 0.939669 0.029871 0.922341 0.594974 0.826403 0.571482 ... 0.173193 0.015260 0.018583 0.900903 0.029741 0.415422 0.038572 0.968489 0.965089 0.953391
202908430131_R08C01 0.876905 0.922635 0.909573 0.020800 0.934442 0.042985 0.928857 0.578807 0.872678 0.597685 ... 0.199412 0.009801 0.015225 0.907851 0.028729 0.550254 0.034694 0.974369 0.969074 0.956301
202908540141_R05C01 0.906337 0.925886 0.913583 0.027175 0.940236 0.032446 0.927417 0.595427 0.827010 0.507028 ... 0.108926 0.015647 0.017776 0.907857 0.028055 0.595931 0.037338 0.966914 0.967480 0.948400
202908540141_R08C01 0.919900 0.925216 0.936886 0.032345 0.943643 0.036972 0.933692 0.607197 0.857196 0.644634 ... 0.198390 0.013094 0.016988 0.915133 0.026774 0.466936 0.033210 0.973266 0.968754 0.960836
202908540141_R04C01 0.895526 0.925416 0.931464 0.029695 0.936240 0.035692 0.921008 0.570685 0.836864 0.537062 ... 0.089540 0.014230 0.016273 0.914756 0.032220 0.650614 0.041320 0.967863 0.967791 0.953156
202908540141_R03C01 0.896339 0.920663 0.916364 0.031083 0.937548 0.037391 0.934201 0.575947 0.803317 0.546901 ... 0.138550 0.013856 0.015158 0.912915 0.033248 0.448766 0.040269 0.965765 0.965241 0.957500

10 rows × 865859 columns

[ ]: