r"""
=================
Utils
=================
Utilities for building, indexing, and manipulating
and xarray dataset topology
specific to most **phippery** functions provided in this package
"""
# dependencies
import pandas as pd
import numpy as np
import xarray as xr
import scipy.stats as st
# built-in python3
import os
import re
import copy
import itertools
import pickle
from functools import reduce
from collections import defaultdict
[docs]def iter_groups(ds, by, dim="sample"):
"""This function returns an iterator
yeilding subsets of the provided dataset,
grouped by items in the metadata of either of the
dimensions specified.
Parameters
----------
ds : xarray.DataSet
The dataset to iterate over.
Returns
-------
generator :
Returns subsets of the original dataset sliced by
either sample or peptide table groups.
Example
-------
>>> phippery.get_annotation_table(ds, "sample")
sample_metadata fastq_filename reference seq_dir sample_type
sample_id
0 sample_0.fastq refa expa beads_only
1 sample_1.fastq refa expa beads_only
2 sample_2.fastq refa expa library
3 sample_3.fastq refa expa library
>>> ds["counts"].values
array([[458, 204, 897, 419],
[599, 292, 436, 186],
[ 75, 90, 978, 471],
[872, 33, 108, 505],
[206, 107, 981, 208]])
>>> sample_groups = iter_groups(ds, by="sample_type")
>>> for group, phip_dataset in sample_groups:
... print(group)
... print(phip_dataset["counts"].values)
...
beads_only
[[458 204]
[599 292]
[ 75 90]
[872 33]
[206 107]]
library
[[897 419]
[436 186]
[978 471]
[108 505]
[981 208]]
"""
table = get_annotation_table(ds, dim=dim)
for group, group_df in table.groupby(by):
group_ds = ds.loc[{f"{dim}_id": list(group_df.index.values)}]
yield group, group_ds
[docs]def get_annotation_table(ds, dim="sample"):
"""
return a copy of the peptide table after converting all
the data types applying the pandas NaN heuristic
Parameters
----------
ds : xarray.DataSet
The dataset to extract an annotation from.
dim : str
The annotation table to grab: "sample" or "peptide".
Returns
-------
pd.DataFrame :
The annotation table.
"""
st = ds[f"{dim}_table"].to_pandas().convert_dtypes()
st.index.name = f"{dim}_id"
return st
[docs]def stitch_dataset(
counts,
peptide_table,
sample_table,
):
"""Build an phippery xarray dataset from individual
tables.
Note
----
If the counts matrix that you're passing has the shape
(M x N) for M peptides, and N samples, the
sample table should have a len of N,
and peptide table should have len M
Parameters
----------
counts : numpy.ndarray
The counts matrix for sample peptide enrichments.
sample_table : pd.DataFrame
The sample annotations corresponding to the columns of
the counts matrix.
peptide_table : pd.DataFrame
The peptide annotations corresponding to the rows of
the counts matrix.
Returns
-------
xarray.DataSet :
The formatted phippery xarray dataset used by most of the phippery functionality.
"""
# make sure the coordinated match up.
assert np.all(counts.columns == sample_table.index)
assert np.all(counts.index == peptide_table.index)
# we are returning the xarray dataset organized by four coordinates seen below.
pds = xr.Dataset(
{
"counts": (["peptide_id", "sample_id"], counts),
"sample_table": (["sample_id", "sample_metadata"], sample_table),
"peptide_table": (["peptide_id", "peptide_metadata"], peptide_table),
},
coords={
"sample_id": counts.columns.values,
"peptide_id": counts.index.values,
"sample_metadata": sample_table.columns.values,
"peptide_metadata": peptide_table.columns.values,
},
)
return pds
[docs]def collect_counts(counts):
r"""merge individual tsv files for a bunh of samples
into a counts matrix.
Parameters
----------
counts : list[str]
A list a filepaths relative to current working directory to read in.
The filepaths should point to tab-separated files for each sample
which contains two columns (without headers):
1. peptide ids - the integer peptide identifiers
2. enrichments - the respective enrichments for any peptide id
Returns
-------
pd.DataFrame :
The merged enrichments with peptides as the index, and filenames as column names.
"""
load = lambda path, sample: pd.read_csv( # noqa
path, index_col=0, sep="\t", names=["peptide_id", sample]
)
sample_dataframes = [
load(path, int(os.path.basename(path).split(".")[0])) for path in counts
]
merged_counts_df = reduce(
lambda l, r: pd.merge(l, r, how="outer", left_index=True, right_index=True),
sample_dataframes,
).fillna(0)
merged_counts_df.columns = merged_counts_df.columns.astype(int)
merged_counts_df.index = merged_counts_df.index.astype(int)
merged_counts_df.sort_index(inplace=True)
merged_counts_df.sort_index(axis=1, inplace=True)
return merged_counts_df
[docs]def to_tall(ds: xr.Dataset):
"""Melt a phippery xarray dataset into a single long-formatted
dataframe that has a unique sample peptide interaction on each
row. Ideal for ggplotting.
Parameters
----------
ds : xarray.DataSet
The dataset to extract an annotation from.
Returns
-------
pd.DataFrame :
The tall formatted dataset.
Example
-------
>>> ds["counts"].to_pandas()
sample_id 0 1
peptide_id
0 453 393
1 456 532
2 609 145
>>> to_tall(ds)[["sample_id", "peptide_id", "counts"]]
sample_id peptide_id counts
0 0 0 453
1 0 1 456
2 0 2 609
3 1 0 393
4 1 1 532
5 1 2 145
"""
return pd.concat([
sample_df
for sample_df in yield_tall(ds)
])
[docs]def yield_tall(ds: xr.Dataset):
"""For each sample, yield a tall DataFrame."""
# Get the table of samples
sample_table = ds.sample_table.to_pandas().reset_index().infer_objects()
# Keep track of the order of columns in all emitted items
cnames = None
# For each sample
for sample_id in sample_table["sample_id"].values:
# Make sure that values for this sample are present in all data tables
for dt in set(list(ds.data_vars)) - set(["sample_table", "peptide_table"]):
assert sample_id in ds[f"{dt}"].to_pandas().columns.values, f"Could not find sample '{sample_id}' in table for {dt}"
# Make a wide table
sample_df = pd.DataFrame({
f"{dt}": ds[
f"{dt}"
].to_pandas(
)[
sample_id
]
for dt in set(list(ds.data_vars)) - set(["sample_table", "peptide_table"])
}).assign(
sample_id=sample_id
)
# Get the table of peptides
peptide_table = ds.peptide_table.to_pandas().reset_index().infer_objects()
# merge the metadata into the melted datatables
sample_df = sample_df.merge(peptide_table, on="peptide_id")
# Merge the sample table
sample_df = sample_df.merge(sample_table, on="sample_id")
# If the column names have not yet been set
if cnames is None:
# Set them based on the first table
cnames = sample_df.columns.values
# If the column names were set in a previous iteration
else:
# Make sure that this table conforms to the same column order
sample_df = sample_df.reindex(columns=cnames)
yield sample_df
[docs]def to_wide(ds):
"""Take a phippery dataset and split it into
its separate components in a dictionary.
Parameters
----------
ds : xarray.DataSet
The dataset to separate.
Returns
-------
dict :
The dictionary of annotation tables and enrichment matrices.
"""
ret = {}
enr_layers = set(list(ds.data_vars)) - set(["sample_table", "peptide_table"])
for dt in enr_layers:
layer = copy.deepcopy(ds[f"{dt}"].to_pandas())
layer.index.name = ""
ret[dt] = layer
for at in ["sample", "peptide"]:
ret[at] = get_annotation_table(ds, dim=at)
return ret
[docs]def to_wide_csv(ds, file_prefix):
"""Take a phippery dataset and split it into
its separate components at writes each into a
comma separated file.
Note
----
This is the inverse operation of the
`dataset_from_csv()` utility function.
Generally speaking these functions are used for
long term storage in common formats when pickle
dumped binaries are not ideal.
Parameters
----------
ds : xarray.DataSet
The dataset to extract an annotation from.
file_prefix : str
The fileprefix relative to the current working directory
where the files should be written.
Returns
-------
None
"""
enr_layers = set(list(ds.data_vars)) - set(["sample_table", "peptide_table"])
for dt in enr_layers:
layer = ds[f"{dt}"].to_pandas()
layer.index.name = ""
layer.to_csv(f"{file_prefix}_{dt}.csv", na_rep="NA", index_label=None)
for at in ["sample", "peptide"]:
get_annotation_table(ds, dim=at).to_csv(
f"{file_prefix}_{at}_annotation_table.csv"
)
[docs]def id_coordinate_from_query_df(ds, query_df):
"""Given a dataframe with pandas query statements
for both samples and peptides, return the relevent sample
and peptide id's after applying the logical AND of all queries.
Parameters
----------
ds : xarray.DataSet
The dataset you would like to query.
query_df : pd.DataFrame
A dataframe with must have two columns (including headers):
1. "dimension" - either "sample" or "peptide" to specify expression dimension
2. "expression" - The pandas query expression to apply.
Returns
-------
tuple : list, list
Return a tuple of sample id's and peptide id's
"""
sq = list(query_df.loc[query_df["dimension"] == "sample", "expression"].values)
sid = id_query(ds, "sample", " & ".join(sq))
pq = list(query_df.loc[query_df["dimension"] == "peptide", "expression"].values)
pid = id_query(ds, "peptide", " & ".join(pq))
return sid, pid
[docs]def ds_query(ds, query, dim="sample"):
"""
Apply a sample or peptide query statement to the
entire dataset.
Note
----
For more on pandas queries, see
`the pandas documentation <https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.query.html>`_
Parameters
----------
ds : xarray.DataSet
The dataset you would like to query.
query : str
pandas query expression
dim : str
The dimension to to apply the expression
Returns
-------
xarray.DataSet :
reference to the dataset slice from the given expression.
Example
-------
>>> phippery.get_annotation_table(ds, "peptide")
peptide_metadata Oligo virus
peptide_id
0 ATCG zika
1 ATCG zika
2 ATCG zika
3 ATCG zika
4 ATCG dengue
5 ATCG dengue
6 ATCG dengue
7 ATCG dengue
>>> zka_ds = ds_query(ds, "virus == 'zika'", dim="peptide")
>>> zka_ds["counts"].to_pandas()
sample_id 0 1 2 3 4 5 6 7 8 9
peptide_id
0 110 829 872 475 716 815 308 647 216 791
1 604 987 776 923 858 985 396 539 32 600
2 865 161 413 760 422 297 639 786 857 878
3 992 354 825 535 440 416 572 988 763 841
"""
idx = id_query(ds, query, dim)
return ds.loc[{f"{dim}_id": idx}]
[docs]def id_query(ds, query, dim="sample"):
"""
Apply a sample or peptide query statement to the
entire dataset and retrieve the respective indices.
Note
----
For more on pandas queries, see
https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.query.html
Parameters
----------
ds : xarray.DataSet
The dataset you would like to query.
query : str
pandas query expression
dim : str
Return
------
list[int] :
The list of integer identifiers that apply to a given expression
for the respective dimension.
"""
return get_annotation_table(ds, dim).query(query).index.values
[docs]def load(path):
"""simple wrapper for loading
xarray datasets from pickle binary
Parameters
----------
path : str
Relative path of binary phippery dataset
Returns
-------
xarray.DataSet :
phippery dataset
"""
return pickle.load(open(path, "rb"))
[docs]def dump(ds, path):
"""
simple wrapper for dumping xarray datasets to pickle binary
Parameters
----------
ds : xarray.DataSet
The dataset you would like to write to disk.
path : str
The relative path you would like to write to.
Returns
-------
None
"""
pickle.dump(ds, open(path, "wb"))
[docs]def dataset_from_csv(
peptide_table_filename,
sample_table_filename,
counts_table_filename
):
r"""Load a dataset from individual comma separated
files containing the counts matrix, as well as
sample and peptide annotation tables.
Note
----
This is the inverse operation of the
`to_wide_csv()` utility function.
Generally speaking these functions are used for
long term storage in common formats when pickle
dumped binaries are not ideal.
For now, this function only supports
a single enrichment table to be added with the
variable name "counts" to the dataset.
If you would like to add other transformation of the
enrichment table (i.e. cpm, mlxp, etc), you can
load the csv's via pandas and add to the dataset
using the `add_enrichment_layer_from_array` function
Parameters
----------
counts_table_filename : str
The glob filepath to csv file(s) containing the enrichments.
All files should have the first column be indices which match the
given peptide table index column.
The first row then should have column headers that match the
index of the sample table.
peptide_table_filename : str
The relative filepath to the peptide annotation table.
sample_table_filename : str
The relative filepath to the sample annotation table.
Returns
-------
xarray.DataSet :
The combined tables in a phippery dataset.
"""
counts_df = _collect_counts_matrix(counts_table_filename)
peptide_df = _collect_peptide_table(peptide_table_filename)
sample_df = _collect_sample_table(sample_table_filename)
return stitch_dataset(
counts=counts_df,
peptide_table=peptide_df,
sample_table=sample_df,
)
def _collect_sample_table(sample_table_filename: str):
"""Read and verify a sample table."""
sample_table = pd.read_csv(sample_table_filename, sep=",", index_col=0, header=0)
if sample_table.index.name != "sample_id":
raise ValueError("The name of the index must be 'sample_id'")
if sample_table.index.dtype != "int64":
raise ValueError("The index values for sample_id must be inferred as integers")
sample_table.sort_index(inplace=True)
return sample_table
def _collect_peptide_table(peptide_table_filename: str):
"""Read and verify a peptide table."""
peptide_table = pd.read_csv(peptide_table_filename, sep=",", index_col=0, header=0)
if peptide_table.index.name != "peptide_id":
raise ValueError
if peptide_table.index.dtype != "int64":
raise ValueError("The index values for peptide_id must be inferred as integers")
peptide_table.sort_index(inplace=True)
return peptide_table
def _collect_counts_matrix(counts_matrix_filename: str):
"""Read and verify a counts matrix file."""
counts_matrix = pd.read_csv(counts_matrix_filename, sep=",", index_col=0, header=0)
try:
counts_matrix.columns = counts_matrix.columns.astype(int)
except:
raise ValueError(
"column header values much be able to cast to type 'int' to match peptide table index"
)
try:
counts_matrix.index = counts_matrix.index.astype(int)
except:
raise ValueError(
"row index values much be able to cast to type 'int' to match peptide table index"
)
counts_matrix.sort_index(inplace=True)
counts_matrix.sort_index(axis=1, inplace=True)
return counts_matrix
[docs]def add_enrichment_layer_from_array(ds, enrichment, new_table_name=None, inplace=True):
"""Append an enrichment layer to the dataset.
Parameters
----------
ds : xarray.DataSet
The phippery dataset to append to.
enrichment : np.array
The enrichment matrix to append to the phippery dataset.
The number of rows should be the same length as ds.peptide_id
and the number of columns should be the same length as ds.sample_id
new_table_name : str
What you would like to name the enrichment layer.
inplace : bool
Determines whether to modify the passed dataset, or return an augmented
copy
Returns
-------
None | xarray.DataSet
The augmented phippery dataset copy is returned if inplace is ``True``
"""
if enrichment.shape != ds.counts.shape:
ins = enrichment.shape
cur = ds.counts.shape
pri = f"provided enrichment layer shape: {ins},"
pri += f"current working dataset counts shape: {cur}"
raise ValueError(
f"Enrichments must have the same shape as enrichments in dataset. {pri}"
)
enr_layers = set(list(ds.data_vars)) - set(["sample_table", "peptide_table"])
if new_table_name == None:
new_table_name = f"enrichment_layer_{len(enr_layers)+1}"
if inplace:
ds[new_table_name] = xr.DataArray(enrichment, dims=ds.counts.dims)
return None
else:
ds_copy = copy.deepcopy(ds)
ds_copy[new_table_name] = xr.DataArray(enrichment, dims=ds.counts.dims)
return ds_copy
def _throw_mismatched_features(df, by):
"""
When you collapse by some set of columns in the dataframe,
keep only features which homogeneous within groups.
This is similar to 'DataFrameGroupby.first()' method,
but instead of keeping the first factor level appearing for each group
category, we only throw any features which are note homogeneous within groups.
You could achieve the same functionality by listing features you know to be
homogeneous in the 'by' parameter.
"""
# Find out which collapse features are shared within groups
collapsed_sample_metadata = defaultdict(list)
for i, (group, group_df) in enumerate(df.groupby(by)):
for column, value in group_df.items():
v = value.values
if np.all(v == v[0]) or np.all([n != n for n in v]):
collapsed_sample_metadata[column].append(v[0])
# Throw out features that are not shared between groups
to_throw = [
key for key, value in collapsed_sample_metadata.items() if len(value) < i + 1
]
[collapsed_sample_metadata.pop(key) for key in to_throw]
return pd.DataFrame(collapsed_sample_metadata)
def _mean_pw_cc_by_multiple_tables(ds, by, dim="sample", data_tables="all"):
"""
A wrapper for computing pw cc within groups defined
with the 'by' parameter. Merges the correlations into a
single table
"""
# Compute mean pw cc on all possible data tables
if data_tables == "all":
data_tables = list(set(ds.data_vars) - set(["sample_table", "peptide_table"]))
# Some error handling
if dim not in ["sample", "peptide"]:
raise ValueError(f"parameter 'dim' must be either 'sample' or 'peptide'")
groups_avail = ds[f"{dim}_metadata"].values
for data_table in data_tables:
if data_table not in ds:
raise KeyError(f"{data_table} is not included in dataset.")
for group in by:
if group not in groups_avail:
raise KeyError(
f"{group} is not included as a column in the {dim} table. The available groups are {groups_avail}"
)
# Compute mean pw cc on all data layers - resulting in a df for each
corr_dfs = [
_mean_pw_cc_by(ds, by, data_table=data_table, dim=dim)
for data_table in data_tables
]
# return a single merged df containing info for all data layer pw cc
return reduce(
lambda l, r: pd.merge(l, r, how="outer", left_index=True, right_index=True),
corr_dfs,
)
def _mean_pw_cc_by(ds, by, data_table="counts", dim="sample"):
"""
Computes pairwise cc for all
dim in a group specified by 'group' column.
returns a dataframe with each group, it's
repective pw_cc, and the number of dims
in the group.
"""
data = ds[f"{data_table}"].to_pandas()
groups, pw_cc, n = [], [], []
for s_group, group_ds in iter_groups(ds, by, dim):
groups.append(s_group)
n.append(len(group_ds[f"{dim}_id"].values))
if len(group_ds[f"{dim}_id"].values) < 2:
pw_cc.append(1.0)
continue
correlations = []
for dim_ids in itertools.combinations(group_ds[f"{dim}_id"].values, 2):
dim_0_enrichment = data.loc[:, dim_ids[0]]
dim_1_enrichment = data.loc[:, dim_ids[1]]
correlation = (
st.pearsonr(dim_0_enrichment, dim_1_enrichment)[0]
if np.any(dim_0_enrichment != dim_1_enrichment)
else 1.0
)
correlations.append(correlation)
pw_cc.append(round(sum(correlations) / len(correlations), 5))
name = "_".join(by)
column_prefix = f"{name}_{data_table}"
ret = pd.DataFrame(
{
f"{name}": groups,
f"{column_prefix}_pw_cc": np.array(pw_cc).astype(np.float64),
f"{column_prefix}_n_reps": np.array(n).astype(int),
}
).set_index(name)
return ret
[docs]def collapse_sample_groups(*args, **kwargs):
"""wrap for sample collapse"""
return collapse_groups(*args, **kwargs, collapse_dim="sample")
[docs]def collapse_peptide_groups(*args, **kwargs):
"""wrap for peptide collapse"""
return collapse_groups(*args, **kwargs, collapse_dim="peptide")
[docs]def collapse_groups(
ds, by, collapse_dim="sample", agg_func=np.mean, compute_pw_cc=False, **kwargs
):
"""Collapse an xarray dataset along one of the annotation axis
by applying the agg_function to annotation groups of 'by'.
Parameters
----------
ds : xarray.DataSet
The phippery dataset to append to.
by : list
The name of the annotation feature you would like to collapse.
collapse_dim : str
The dimension you's like to collapse. "sample" or "peptide"
compute_pw_cc : bool
Whether or not to compute the mean pairwise correlation
of all values within any feature group that is being
collapsed.
agg_func : *function*
This function must take a one-dimensional array and aggregate
all values to a single number, agg_func(list[float | int]) -> float | int
Returns
-------
xarray.DataSet :
The collapsed phippery dataset.
Example
-------
>>> get_annotation_table(ds, dim="sample")
sample_metadata fastq_filename reference seq_dir sample_type
sample_id
0 sample_0.fastq refa expa beads_only
1 sample_1.fastq refa expa beads_only
2 sample_2.fastq refa expa library
3 sample_3.fastq refa expa library
4 sample_4.fastq refa expa IP
5 sample_5.fastq refa expa IP
>>> ds["counts"].to_pandas()
sample_id 0 1 2 3 4 5
peptide_id
0 7 0 3 2 3 2
1 6 3 1 0 7 5
2 9 1 7 8 4 7
>>> mean_sample_type_ds = collapse_groups(ds, by=["sample_type"])
>>> get_annotation_table(mean_sample_type_ds, dim="sample")
sample_metadata reference seq_dir sample_type
sample_id
0 refa expa IP
1 refa expa beads_only
2 refa expa library
>>> mean_sample_type_ds["counts"].to_pandas()
sample_id 0 1 2
peptide_id
0 2.5 3.5 2.5
1 6.0 4.5 0.5
2 5.5 5.0 7.5
"""
# Check to see if the group(s) is/are available
groups_avail = ds[f"{collapse_dim}_metadata"].values
for group in by:
if group not in groups_avail:
raise KeyError(
f"{group} is not included as a column in the {collapse_dim} table. The available groups are {groups_avail}"
)
# define collapse and fixed df
dims = set(["sample", "peptide"])
fixed_dim = list(dims - set([collapse_dim]))[0]
# grab relavent annotation tables
collapse_df = ds[f"{collapse_dim}_table"].to_pandas()
fixed_df = ds[f"{fixed_dim}_table"].to_pandas()
# Create group-able dataset by assigning table columns to a coordinate
if len(by) == 1:
coord = collapse_df[by[0]]
coord_ds = ds.assign_coords({f"{by[0]}": (f"{collapse_dim}_id", coord)})
else:
print(f"WARNING: Nothing available, here")
return None
# if were grouping by multiple things, we need to zip 'em into a tuple coord
# psuedo-code
# else:
# common_dim = f"{collapse_dim}_id"
# coor_arr = np.empty(len(ds[common_dim]), dtype=object)
# coor_arr[:] = list(zip(*(collapse_df[f].values for f in by)))
# coord_ds = ds.assign_coords(
# coord=xr.DataArray(coor_arr, collapse_dims=common_dim)
# )
del coord_ds["sample_table"]
del coord_ds["peptide_table"]
del coord_ds["sample_metadata"]
del coord_ds["peptide_metadata"]
# Perform the reduction on all data tables.
collapsed_enrichments = coord_ds.groupby(f"{by[0]}", squeeze=False).reduce(agg_func)
if collapse_dim == "sample":
collapsed_enrichments = collapsed_enrichments.transpose()
# Once the data tables are grouped we have no use for first copy.
del coord_ds
# Compile each of the collapsed xarray variables.
collapsed_xr_dfs = {
f"{dt}": (
["peptide_id", "sample_id"],
collapsed_enrichments[f"{dt}"].to_pandas(),
)
for dt in set(list(collapsed_enrichments.data_vars))
}
cat = _throw_mismatched_features(collapse_df, by)
# Compute mean pairwise correlation for all groups,
# for all enrichment layers - and add it to the
# resulting collapsed sample table
if compute_pw_cc:
mean_pw_cc = _mean_pw_cc_by(ds, by, **kwargs)
cat = cat.merge(mean_pw_cc, left_index=True, right_index=True)
# Insert the correct annotation tables to out dictionary of variables
collapsed_xr_dfs[f"{collapse_dim}_table"] = (
[f"{collapse_dim}_id", f"{collapse_dim}_metadata"],
cat,
)
collapsed_xr_dfs[f"{fixed_dim}_table"] = (
[f"{fixed_dim}_id", f"{fixed_dim}_metadata"],
fixed_df,
)
pds = xr.Dataset(
collapsed_xr_dfs,
coords={
f"{collapse_dim}_id": collapsed_xr_dfs[f"{collapse_dim}_table"][
1
].index.values,
f"{fixed_dim}_id": collapsed_xr_dfs[f"{fixed_dim}_table"][1].index.values,
f"{collapse_dim}_metadata": collapsed_xr_dfs[f"{collapse_dim}_table"][
1
].columns.values,
f"{fixed_dim}_metadata": collapsed_xr_dfs[f"{fixed_dim}_table"][
1
].columns.values,
},
)
return pds