[29]:
import pandas as pd

Orienting dataframes to match

[ ]:
"""the trick is to ensure the index matches, or that both contain one column for merging.
"""
[30]:
meta = pd.read_pickle('sample_Sheet_meta_data.pkl')
meta
[30]:
Sample_Type Sentrix_ID Sentrix_Position Sample_Group Sample_Name Sample_Plate Sample_Type Sub_Type Sample_Well Pool_ID GSM_ID Control Sample_ID
0 Unknown 202908430131 R07C01 None CTRL01 None Unknown None None None GSM3927205 False 202908430131_R07C01
1 Unknown 202908540141 R06C01 None CTRL04 None Unknown None None None GSM3927208 False 202908540141_R06C01
2 Unknown 202908540141 R07C01 None CTRL05 None Unknown None None None GSM3927209 False 202908540141_R07C01
3 Unknown 202908540141 R01C01 None L4 None Unknown None None None GSM3927214 False 202908540141_R01C01
4 Unknown 202908540141 R02C01 None L2 None Unknown None None None GSM3927212 False 202908540141_R02C01
5 Unknown 202908430131 R08C01 None CTRL02 None Unknown None None None GSM3927206 False 202908430131_R08C01
6 Unknown 202908540141 R05C01 None CTRL03 None Unknown None None None GSM3927207 False 202908540141_R05C01
7 Unknown 202908540141 R08C01 None CTRL06 None Unknown None None None GSM3927210 False 202908540141_R08C01
8 Unknown 202908540141 R04C01 None L3 None Unknown None None None GSM3927213 False 202908540141_R04C01
9 Unknown 202908540141 R03C01 None L1 None Unknown None None None GSM3927211 False 202908540141_R03C01
[31]:
m_values = pd.read_pickle('m_values.pkl')
m_values.head()
[31]:
202908430131_R07C01 202908540141_R06C01 202908540141_R07C01 202908540141_R01C01 202908540141_R02C01 202908430131_R08C01 202908540141_R05C01 202908540141_R08C01 202908540141_R04C01 202908540141_R03C01
IlmnID
cg07881041 3.412787 3.345855 3.349728 3.082507 3.542418 2.960507 3.465868 3.729893 3.263243 3.290433
cg23229610 4.038538 3.865307 3.852169 3.671637 4.042739 4.180840 4.124988 4.124023 4.165693 4.018463
cg03513874 4.116045 3.840853 4.047271 3.949184 4.078279 3.633142 3.701213 4.302513 4.155497 3.791341
cg05451842 -4.910128 -4.602309 -4.914334 -4.967814 -4.836660 -5.514546 -5.124825 -4.866898 -4.993714 -4.925330
cg14797042 4.464024 4.292200 4.383563 4.382857 4.562915 4.372119 4.479269 4.617060 4.424321 4.435270
[32]:
meta = meta.set_index('Sample_ID')
[36]:
m_values = m_values.transpose() if m_values.shape[1] < m_values.shape[0] else m_values # ensure structure same
merged_df = m_values.merge(meta, left_index=True, right_index=True)
[37]:
meta
[37]:
Sample_Type Sentrix_ID Sentrix_Position Sample_Group Sample_Name Sample_Plate Sample_Type Sub_Type Sample_Well Pool_ID GSM_ID Control
Sample_ID
202908430131_R07C01 Unknown 202908430131 R07C01 None CTRL01 None Unknown None None None GSM3927205 False
202908540141_R06C01 Unknown 202908540141 R06C01 None CTRL04 None Unknown None None None GSM3927208 False
202908540141_R07C01 Unknown 202908540141 R07C01 None CTRL05 None Unknown None None None GSM3927209 False
202908540141_R01C01 Unknown 202908540141 R01C01 None L4 None Unknown None None None GSM3927214 False
202908540141_R02C01 Unknown 202908540141 R02C01 None L2 None Unknown None None None GSM3927212 False
202908430131_R08C01 Unknown 202908430131 R08C01 None CTRL02 None Unknown None None None GSM3927206 False
202908540141_R05C01 Unknown 202908540141 R05C01 None CTRL03 None Unknown None None None GSM3927207 False
202908540141_R08C01 Unknown 202908540141 R08C01 None CTRL06 None Unknown None None None GSM3927210 False
202908540141_R04C01 Unknown 202908540141 R04C01 None L3 None Unknown None None None GSM3927213 False
202908540141_R03C01 Unknown 202908540141 R03C01 None L1 None Unknown None None None GSM3927211 False
[38]:
m_values
[38]:
IlmnID cg07881041 cg23229610 cg03513874 cg05451842 cg14797042 cg09838562 cg25458538 cg09261072 cg02404579 cg04118974 ... cg22005990 cg05384275 cg21496658 cg27017993 cg19551589 cg10218605 cg06899844 cg22494081 cg22623303 cg21064505
202908430131_R07C01 3.412787 4.038538 4.116045 -4.910128 4.464024 -4.850363 3.972917 0.608777 2.593320 0.957829 ... -2.463625 -6.222727 -6.135573 3.945602 -5.102091 -0.631836 -4.743207 5.585948 5.890584 5.112240
202908540141_R06C01 3.345855 3.865307 3.840853 -4.602309 4.292200 -4.745925 4.091592 0.729237 2.690233 0.826782 ... -1.456612 -6.098733 -5.501676 3.735050 -4.929893 -0.175451 -4.550831 5.341666 5.371025 4.830027
202908540141_R07C01 3.349728 3.852169 4.047271 -4.914334 4.383563 -4.937930 3.948131 0.458526 2.325352 0.504637 ... -1.755983 -6.012443 -6.038302 4.025450 -3.566083 -0.630898 -4.719253 5.517068 5.454447 5.133564
202908540141_R01C01 3.082507 3.671637 3.949184 -4.967814 4.382857 -4.760096 3.880529 0.724771 2.592081 0.699158 ... -1.679747 -5.896445 -5.243735 3.737237 -5.057497 0.113837 -4.504871 5.290402 5.445842 4.979013
202908540141_R02C01 3.542418 4.042739 4.078279 -4.836660 4.562915 -4.963109 3.829859 0.591076 2.381769 0.477761 ... -2.228573 -5.989043 -5.702124 3.718148 -4.994833 -0.454764 -4.600786 5.529454 5.507032 5.079351
202908430131_R08C01 2.960507 4.180840 3.633142 -5.514546 4.372119 -4.384490 3.946780 0.490380 2.962171 0.635061 ... -1.980125 -6.638985 -5.998120 3.912966 -5.049951 0.338431 -4.757568 5.891426 5.677103 5.228642
202908540141_R05C01 3.465868 4.124988 3.701213 -5.124825 4.479269 -4.845135 3.916670 0.590274 2.374042 0.087433 ... -3.009043 -5.955131 -5.769887 3.774773 -5.085601 0.606516 -4.654901 5.387020 5.523357 4.829708
202908540141_R08C01 3.729893 4.124023 4.302513 -4.866898 4.617060 -4.646371 4.056437 0.661098 2.730637 0.926858 ... -1.991519 -6.215806 -5.837299 3.949614 -5.154335 -0.154815 -4.827380 5.775283 5.607601 5.354515
202908540141_R04C01 3.263243 4.165693 4.155497 -4.993714 4.424321 -4.701726 3.771189 0.440081 2.481938 0.269274 ... -3.321619 -6.094163 -5.897993 3.959106 -4.880647 0.943185 -4.500393 5.433155 5.592485 5.004386
202908540141_R03C01 3.290433 4.018463 3.791341 -4.925330 4.435270 -4.627422 4.117044 0.474540 2.137687 0.330344 ... -2.611546 -6.131170 -5.999020 3.935216 -4.829910 -0.259069 -4.535684 5.345188 5.456787 5.241151

10 rows × 865859 columns

[39]:
merged_df
[39]:
cg07881041 cg23229610 cg03513874 cg05451842 cg14797042 cg09838562 cg25458538 cg09261072 cg02404579 cg04118974 ... Sentrix_Position Sample_Group Sample_Name Sample_Plate Sample_Type Sub_Type Sample_Well Pool_ID GSM_ID Control
202908430131_R07C01 3.412787 4.038538 4.116045 -4.910128 4.464024 -4.850363 3.972917 0.608777 2.593320 0.957829 ... R07C01 None CTRL01 None Unknown None None None GSM3927205 False
202908540141_R06C01 3.345855 3.865307 3.840853 -4.602309 4.292200 -4.745925 4.091592 0.729237 2.690233 0.826782 ... R06C01 None CTRL04 None Unknown None None None GSM3927208 False
202908540141_R07C01 3.349728 3.852169 4.047271 -4.914334 4.383563 -4.937930 3.948131 0.458526 2.325352 0.504637 ... R07C01 None CTRL05 None Unknown None None None GSM3927209 False
202908540141_R01C01 3.082507 3.671637 3.949184 -4.967814 4.382857 -4.760096 3.880529 0.724771 2.592081 0.699158 ... R01C01 None L4 None Unknown None None None GSM3927214 False
202908540141_R02C01 3.542418 4.042739 4.078279 -4.836660 4.562915 -4.963109 3.829859 0.591076 2.381769 0.477761 ... R02C01 None L2 None Unknown None None None GSM3927212 False
202908430131_R08C01 2.960507 4.180840 3.633142 -5.514546 4.372119 -4.384490 3.946780 0.490380 2.962171 0.635061 ... R08C01 None CTRL02 None Unknown None None None GSM3927206 False
202908540141_R05C01 3.465868 4.124988 3.701213 -5.124825 4.479269 -4.845135 3.916670 0.590274 2.374042 0.087433 ... R05C01 None CTRL03 None Unknown None None None GSM3927207 False
202908540141_R08C01 3.729893 4.124023 4.302513 -4.866898 4.617060 -4.646371 4.056437 0.661098 2.730637 0.926858 ... R08C01 None CTRL06 None Unknown None None None GSM3927210 False
202908540141_R04C01 3.263243 4.165693 4.155497 -4.993714 4.424321 -4.701726 3.771189 0.440081 2.481938 0.269274 ... R04C01 None L3 None Unknown None None None GSM3927213 False
202908540141_R03C01 3.290433 4.018463 3.791341 -4.925330 4.435270 -4.627422 4.117044 0.474540 2.137687 0.330344 ... R03C01 None L1 None Unknown None None None GSM3927211 False

10 rows × 865871 columns