IO (io.R)¶
Generally, this module contains the GCT class and relevant method definitions. These are:
GCT Object (S4 class)¶
#An S4 class to represent a GCT object
@slot mat a numeric matrix
@slot rid a character vector of row ids
@slot cid a character vector of column ids
@slot rdesc a \code{data.frame} of row descriptors
@slot rdesc a \code{data.frame} of column descriptors
@slot src a character indicating the source (usually file path) of the data
@description The GCT class serves to represent annotated
matrices. The \code{mat} slot contains said data and the
\code{rdesc} and \code{cdesc} slots contain data frames with
annotations about the rows and columns, respectively
@seealso \code{\link{parse.gctx}}, \code{\link{write.gctx}}, \code{\link{read.gctx.meta}}, \code{\link{read.gctx.ids}}
@seealso \link{http://clue.io/help} for more information on the GCT format
setClass("GCT",
representation(
mat = "matrix",
rid = "character",
cid = "character",
rdesc = "data.frame",
cdesc = "data.frame",
version = "character",
src = "character"
)
)
GCTX parsing functions¶
Parse a .gct or .gctx file to GCT object
parse.gctx <- function(fname, rid=NULL, cid=NULL, set_annot_rownames=F, matrix_only=F)
@param fname path to the GCTX file on disk
@param rid either a vector of character or integer
row indices or a path to a grp file containing character
row indices. Only these indicies will be parsed from the
file.
@param cid either a vector of character or integer
column indices or a path to a grp file containing character
column indices. Only these indicies will be parsed from the
file.
@param set_annot_rownames boolean indicating whether to set the
rownames on the row/column metadata data.frames. Set this to
false if the GCTX file has duplicate row/column ids.
@param matrix_only boolean indicating whether to parse only
the matrix (ignoring row and column annotations)
@details \code{parse.gctx} also supports parsing of plain text
GCT files, so this function can be used as a general GCT parser.
@examples
gct_file <- system.file("extdata", "modzs_n272x978.gctx", package="roller")
(ds <- parse.gctx(gct_file))
# matrix only
(ds <- parse.gctx(gct_file, matrix_only=T))
# only the first 10 rows and columns
(ds <- parse.gctx(gct_file, rid=1:10, cid=1:10))
@family GCTX parsing functions
Parse row/column metadata only
read.gctx.meta <- function(gctx_path, dimension="row", ids=NULL, set_annot_rownames=T)
@param gctx_path the path to the GCTX file
@param dimension which metadata to read (row or column)
@param ids a character vector of a subset of row/column ids
for which to read the metadata
@param set_annot_rownames a boolean indicating whether to set the
\code{rownames} addtribute of the returned \code{data.frame} to
the corresponding row/column ids.
@return a \code{data.frame} of metadata
@examples
gct_file <- system.file("extdata", "modzs_n272x978.gctx", package="roller")
# row meta
row_meta <- read.gctx.meta(gct_file)
str(row_meta)
# column meta
col_meta <- read.gctx.meta(gct_file, dimension="column")
str(col_meta)
# now for only the first 10 ids
col_meta_first10 <- read.gctx.meta(gct_file, dimension="column", ids=col_meta$id[1:10])
str(col_meta_first10)
@family GCTX parsing functions
Parse row/column ids only
read.gctx.ids <- function(gctx_path, dimension="row")
#Read GCTX row or column ids
@param gctx_path path to the GCTX file
@param dimension which ids to read (row or column)
@return a character vector of row or column ids from the provided file
@examples
gct_file <- system.file("extdata", "modzs_n272x978.gctx", package="roller")
# row ids
rid <- read.gctx.ids(gct_file)
head(rid)
# column ids
cid <- read.gctx.ids(gct_file, dimension="column")
head(cid)
@family GCTX parsing functions
GCTX writing functions¶
Write a GCT object to disk in .gct format
write.gct <- function(ds, ofile, precision=4, appenddim=T, ver=3)
@param ds the GCT object
@param ofile the desired output filename
@param precision the numeric precision at which to
save the matrix. See \code{details}.
@param appenddim boolean indicating whether to append
matrix dimensions to filename
@param ver the GCT version to write. See \code{details}.
@details Since GCT is text format, the higher \code{precision}
you choose, the larger the file size.
\code{ver} is assumed to be 3, aka GCT version 1.3, which supports
embedded row and column metadata in the GCT file. Any other value
passed to \code{ver} will result in a GCT version 1.2 file which
contains only the matrix data and no annotations.
@return NULL
@examples
\dontrun{
write.gct(ds, "dataset", precision=2)
}
@family GCTX parsing functions
Write a GCT object to disk in .gctx format
write.gctx <- function(ds, ofile, appenddim=T, compression_level=0, matrix_only=F)
@param ds a GCT object
@param ofile the desired file path for writing
@param appenddim boolean indicating whether the
resulting filename will have dimensions appended
(e.g. my_file_n384x978.gctx)
@param compression_level integer between 1-9 indicating
how much to compress data before writing. Higher values
result in smaller files but slower read times.
@param matrix_only boolean indicating whether to write
only the matrix data (and skip row, column annotations)
@examples
\dontrun{
# assume ds is a GCT object
write.gctx(ds, "my/desired/outpath/and/filename")
}
@family GCTX parsing functions
Write a ``data.frame`` of metadata only to a GCTX file
write.gctx.meta <- function(ofile, df, dimension="row")
@param ofile the desired file path for writing
@param df the \code{data.frame} of annotations
@param dimension the dimension to annotate
(row or column)
@examples
\dontrun{
# assume ds is a GCT object
write.gctx.meta("/my/file/path", cdesc_char, dimension="col")
}
@family GCTX parsing functions
@keywords internal
Parsing GRP files¶
Parse a .grp file to vector
parse.grp <- function(fname)
@param fname the file path to be parsed
@return a vector of the contents of \code{fname}
@examples
grp_path <- system.file("extdata", "lm_epsilon_n978.grp", package="roller")
values <- parse.grp(grp_path)
str(values)
@family CMap parsing functions
@seealso \link{http://clue.io/help} for details on the GRP file format
Writing to .grp files¶
Write a vector to a .grp file
write.grp <- function(vals, fname)
@param vals the vector of values to be written
@param fname the desired file name
@examples
\dontrun{
write.grp(letters, "letter.grp")
}
@family CMap parsing functions
@seealso \link{http://clue.io/help} for details on the GRP file format
Parsing GMX files¶
Parse a .gmx file to a list
parse.gmx <- function(fname)
@param fname the file path to be parsed
@return a list of the contents of \code{fname}. See details.
@details \code{parse.gmx} returns a nested list object. The top
level contains one list per column in \code{fname}. Each of
these is itself a list with the following fields:
- \code{head}: the name of the data (column in \code{fname})
- \code{desc}: description of the corresponding data
- \code{len}: the number of data items
- \code{entry}: a vector of the data items
@examples
gmx_path <- system.file("extdata", "lm_probes.gmx", package="roller")
gmx <- parse.gmx(gmx_path)
str(gmx)
@family CMap parsing functions
@seealso \link{http://clue.io/help} for details on the GMX file format
Parsing GMT files¶
Parse a .gmt file to a list
parse.gmt <- function(fname)
@param fname the file path to be parsed
@return a list of the contents of \code{fname}. See details.
@details \code{parse.gmt} returns a nested list object. The top
level contains one list per row in \code{fname}. Each of
these is itself a list with the following fields:
- \code{head}: the name of the data (row in \code{fname})
- \code{desc}: description of the corresponding data
- \code{len}: the number of data items
- \code{entry}: a vector of the data items
@examples
gmt_path <- system.file("extdata", "query_up.gmt", package="roller")
gmt <- parse.gmt(gmt_path)
str(gmt)
@family CMap parsing functions
@seealso \link{http://clue.io/help} for details on the GMT file format
Writing to GMT files¶
write.gmt <- function(lst, fname)
@param lst the nested list to write. See \code{details}.
@param fname the desired file name
@details \code{lst} needs to be a nested list where each
sub-list is itself a list with the following fields:
- \code{head}: the name of the data
- \code{desc}: description of the corresponding data
- \code{len}: the number of data items
- \code{entry}: a vector of the data items
@examples
\dontrun{
write.gmt(gene_set, "gene_set.gmt")
}
@family CMap parsing functions
@seealso \link{http://clue.io/help} for details on the GMT file format
Writing a data.frame
to a tsv file¶
write.tbl <- function(tbl, ofile, ...)
@param tbl the \code{data.frame} to be written
@param ofile the desired file name
@param ... additional arguments passed on to \code{write.table}
@details This method simply calls \code{write.table} with some
preset arguments that generate a unquoted, tab-delimited file
without row names.
@examples
\dontrun{
write.tbl(cdesc_char, "col_meta.txt")
}
@seealso \code{\link{write.table}}