:- lib(promise(exp_go_over_bioc_deps/0,call(exp_go_over_bioc_deps_load))). :- lib(stoics_lib:kv_decompose/3). :- lib(org_gid_map/3). :- lib(bio_list_sort_ne/2). exp_go_over_bioc_deps_load :- lib(suggests(bioc("GOstats"))), lib(suggests(bioc("GSEABase"))), % installed with GOstats lib(suggests(bioc("GO.db"))), % installed with GOstats lib(suggests(bioc("Category"))), % installed with GOstats assert(exp_go_over_bioc_deps). exp_go_gid_default(chicken, symb, ncbi). exp_go_gid_default( human, symb, ncbi). exp_go_gid_default( mouse, symb, mgim). exp_go_gid_default( pig, ensg, ncbi). exp_go_over_defaults( Args, Defs ) :- Defs = [ go('BP'), go_frame(bioc_ann_dbi), go_over_pv_cut(0.05), org(hs), % org_exp_id(ExpId), gid(Gid), gid_to(Gto), stem(go_over), to_file(false), universe(go_exp) ], ( (memberchk(org(InOrg),Args),ground(InOrg)) -> true; InOrg=hs ), bio_db_organism( InOrg, BdOrg ), exp_go_gid_default( BdOrg, Gid, Gto ), options_return( gid_to(Gto), Args, [pack(bio_analytics),pred(ex_go_over/3),option(gid_to(Gto))] ). /** exp_go_over( +CsvF, -GoOver, +Opts ). Perform gene ontology over-representation analysis. For experimental data in CsvF select de-regulated genes and on
those perform over representation analysis in gene ontology.
Results in GoOver are either as a values list or a csv file, the name of which is assumed to be the ground value of GoOVer. Opts * go(GoSec='BP') gene ontology section, in: =|[BP,MF,CC]|= * go_frame(GoFrame=bioc_ann_dbi) which go frame of GoTerms, Evidence and Gid to use. Default, bioc_ann_dbi, uses r_lib(org..eg.db) frames, alternative bio_db uses bio_db tables, before the intro of the option this was the way, so use this for backward compatibility * go_over_pv_cut(PvCut=0.05) p value filter for the results * org(Org=hs) one of bio_db_organism/2 first argument values * gid(Gid) the type of the experimental gene ids for the organism. The default depends on Org, most take symb, apart for pig which takes ensg (Caution, this used to be org_exp_id(OrgExpId).) * gid_to(GidTo) returns the db type for gene ids used in underlying call, mostly ncbi, apart for mouse where it is mgim * stem(Stem=false) stem for output csv file. When false, use basename of CsvF. * to_file(ToF=false) when GoOver is unbound, this controls whether the output goes to a file or a values list * universe(Univ=go_exp) Univ in : =|[genome,go,go_exp,experiment]|= * genome(Gen) all gene identifiers in relevant map predicates * go(Go) all gene identifiers appearing in any ontology * go_exp(GoExp) gene ontology genes that appear in experiment * experiment(Exp) all identifiers in the experiment * ontology(Onto) not implemented yet. like Go above, but only include this Ontology branch Options are also passed to bio_diffex/4. == ?- absolute_file_name( pack('bio_analytics/data/silac/bt.csv'), CsvF ), exp_go_over( CsvF, GoOver, [] ). CsvF = '.../swipl/pack/bio_analytics/data/silac/bt.csv', GoOver = [row('GOBPID', 'Pvalue', adj.pvalue, ...), row('GO:0061387', 0.000187, ...)|...]. ?- absolute_file_name( pack('bio_analytics/data/silac/bt.csv'), CsvF ), exp_go_over( CsvF, GoOver, [stem(here),to_file(true)] ). CsvF = '.../swipl/pack/bio_analytics/data/silac/bt.csv', GoOver = here_gontBP_p0.05_univExp.csv. ?- absolute_file_name( pack('bio_analytics/data/silac/bt.csv'), CsvF ), exp_go_over( CsvF, 'a_file.csv', [stem(here),to_file(true)] ). CsvF = '.../swipl/pack/bio_analytics/data/silac/bt.csv', ?- lib(by_unix). ?- @wc(-l,'a_file.csv'). 183 a_file.csv ?- @wc(-l,'here_gontBP_p0.05_univExp.csv'). 183 here_gontBP_p0.05_univExp.csv true. ?- absolute_file_name( pack('bio_analytics/data/silac/bt.csv'), CsvF ), exp_go_over( CsvF, OverF, [to_file(true)] ). CsvF = '.../swipl/pack/bio_analytics/data/silac/bt.csv', OverF = go_over_gontBP_p0.05_univExp.csv. ?- absolute_file_name( pack('bio_analytics/data/silac/bt.csv'), CsvF ), exp_go_over( CsvF, OverF, [to_file(true),stem(false)] ). CsvF = '.../swipl/pack/bio_analytics/data/silac/bt.csv', OverF = '.../swipl/pack/bio_analytics/data/silac/bt_gontBP_p0.05_univExp.csv'. == @author nicos angelopoulos @version 0.1 2019/5/2 @version 0.2 2022/12/20, =|Univ=go|= and =|Org=gallus|= @version 0.3 2023/6/5, option go_frame(GoFrame) @see go_over_universe/6 */ exp_go_over( CsvF, GoOver, Args ) :- Self = exp_go_over, options_append( Self, Args, Opts ), exp_go_over_bioc_deps, % make sure deps are loaded debug_call( exp_go_over, options, Self/Opts ), bio_diffex( CsvF, DEPrs, NDEPrs, Opts ), bio_db_org_in_opts( Org, Opts ), kv_decompose( DEPrs, DEGenes, _ ), debug_call( exp_go_over, length, de_pairs/DEPrs ), bio_list_sort_ne( DEGenes, DEGenesSet ), debug_call( exp_go_over, length, de_genes_set/DEGenesSet ), bio_list_sort_ne( DEGenes, DEGenesSet ), org_gid_map( DEGenesSet, Gids, Opts ), debug_call( exp_go_over, length, gids/Gids ), options( go_frame(GfOpt), Opts ), go_over_frame( GfOpt, Org, goFrameData, GofOrg ), goFrame <- 'GOFrame'(goFrameData, organism= +GofOrg), goAllFrame <- 'GOAllFrame'(goFrame), gsc <- 'GeneSetCollection'(goAllFrame, setType = 'GOCollection'() ), genes <- Gids, options( universe(UnivOpt), Opts ), go_over_universe( UnivOpt, Org, Gids, NDEPrs, Univ, Opts ), debug_call( exp_go_over, length, universe/Univ ), options( go(GoAspect), Opts ), universe <- Univ, universe <- 'as.character'(universe), genes <- 'as.character'(genes), options( go_over_pv_cut(PvCut), Opts ), ( debugging(Self) -> <- print(paste("length of universe: ", length(universe))) ; true ), % this is in r_lib('Category') gGparams <- 'GSEAGOHyperGParams'( name="bio_analytics_gont", geneSetCollection=gsc, geneIds = genes, universeGeneIds = universe, ontology = + GoAspect, pvalueCutoff = PvCut, conditional = 'FALSE', testDirection = "over" ), over <- hyperGTest(gGparams), dfOver <- 'data.frame'( summary(over) ), dfOver$'adj.pvalue' <- 'p.adjust'( dfOver$'Pvalue', method=+'BH' ), dfOver <- dfOver[*,c(1,2,8,3,4,5,6,7)], Use = use(Self,GoAspect,UnivOpt,PvCut), exp_go_over_return( GoOver, dfOver, CsvF, Use, Opts ). exp_go_over_return( GoOver, DfOveR, _CsvF, _Use, Opts ) :- var( GoOver ), options( to_file(false), Opts ), !, Rlist <- 'as.list'(DfOveR), findall( List, (member(Head=Tail,Rlist),List=[Head|Tail]), Lists), mtx_lists( GoOver, Lists ). exp_go_over_return( GoOver, DfOveR, CsvF, Use, Opts ) :- Use = use(Self,GoAspect,UnivOpt,PvCut), SubSep = '', options( stem(Stem), Opts ), atomic_list_concat( [gont,GoAspect], SubSep, GontAspTkn ), capit( UnivOpt, ShUniv ), atomic_list_concat( [univ,ShUniv], SubSep, UnivTkn ), (Stem == false -> TempF = CsvF; os_ext(csv,Stem,TempF)), atom_concat( p, PvCut, PvTkn ), Postfixes = [GontAspTkn,PvTkn,UnivTkn], (var(GoOver) -> os_postfix(Postfixes, TempF, GoOver) ; true), <- 'write.csv'(DfOveR, file=+GoOver, 'row.names'='FALSE'), % <- print( warnings() ), debuc( Self, 'Wrote: ~p', GoOver ). %% go_over_universe( +Token, +Org, +DEGenes, +NDEPrs, -Universe, +Opts ) % % Universe is the list of gene identifiers to be used as universe/background for GOstats. % % In human (=|Org=hs|=), this is a list of Entrez ids, and in =|Org=mouse|=, a list of Mgim identifiers.
% DEGenes is a list of deferentially expressed gene identifiers, and NDEPrs is a list of non-differential
% expressed Symbol-Pvalue pairs. % % ExpIdTkn identifies the type of ids coming in in NDEPrs, DEGens, are already in standard form for Org. go_over_universe( experiment, Org, DeGids, NDEPrs, Univ, Opts ) :- go_over_universe_exp( Org, DeGids, NDEPrs, Univ, Opts ). go_over_universe( genome, Org, _DEGids, _NDEPrs, Univ, _Opts ) :- go_over_universe_genome( Org, Univ ). go_over_universe( go, Org, _DEGids, _NDEPrs, Univ, _Opts ) :- go_over_universe_go( Org, Univ ). go_over_universe( go_exp, Org, DEGids, NDEPrs, Univ, Opts ) :- go_over_universe_go_exp( Org, DEGids, NDEPrs, Univ, Opts ). go_over_universe_genome( chicken, Univ ) :- findall( Ncbi, cgnc_galg_cgnc_ncbi(_Cgnc,Ncbi), Ncbis ), sort( Ncbis, Univ ). go_over_universe_genome( human, Univ ) :- findall( Ncbi, hgnc_homs_symb_ncbi(_Symb,Ncbi), Ncbis ), sort( Ncbis, Univ ). go_over_universe_genome( mouse, Univ ) :- findall( Mgim, mgim_musm_mgim_symb(Mgim,_Symb), Mgims ), sort( Mgims, Univ ). go_over_universe_genome( pig, Univ ) :- findall( Ncbi, ncbi_suss_ncbi_ensg(Ncbi,_), Ncbis ), sort( Ncbis, Univ ). go_over_universe_go( chicken, Univ ) :- findall( Ncbi, ( gont_galg_symb_gont(Symb,_Rl,_Ev,_Go), cgnc_galg_cgnc_symb(Cgnc,Symb), cgnc_galg_cgnc_ncbi(Cgnc,Ncbi) ), Ncbis ), sort( Ncbis, Univ ). go_over_universe_go( human, Univ ) :- findall( Ncbi, ( gont_homs_edge_symb(_Go,_En,Symb), hgnc_homs_symb_ncbi(Symb,Ncbi) ), Ncbis ), sort( Ncbis, Univ ). go_over_universe_go( mouse, Univ ) :- findall( Mgim, gont_musm_mgim_gont(Mgim,_E,_G), Mgims ), sort( Mgims, Univ ). go_over_universe_go( pig, Univ ) :- findall( Ncbi, (gont_suss_symb_gont(Symb,_,_,_),ense_suss_ensg_symb(EnsG,Symb),ncbi_suss_ncbi_ensg(Ncbi,EnsG)), Ncbis ), sort( Ncbis, Univ ). % fixme: give doc here, what is this for ? go_over_universe_exp( chicken, DeGids, NDEPrs, Univ ) :- findall( Ncbi, ( member(Symb-NDEPrs), cgnc_galg_cgnc_symb(Cgnc,Symb), cgnc_galg_cgnc_ncbi(Cgnc,Ncbi) ), NDENcbis ), append( DeGids, NDENcbis, Ncbis ), sort( Ncbis, Univ ). go_over_universe_exp( human, DeGids, NDEPrs, Univ ) :- findall( Ncbi, (member(Symb-_,NDEPrs),hgnc_homs_symb_ncbi(Symb,Ncbi)), NDENcbis ), % findall( Entz1, (member(Symb,DEGenes),hgnc_homs_symb_entz(Symb,Entz1)), DEEntzs ), % append( DEEntzs, NDEEntzs, Entzs ), append( DeGids, NDENcbis, Ncbis ), sort( Ncbis, Univ ). go_over_universe_exp( mouse, DeGids, NDEPrs, Univ ) :- findall( Mgim, (member(Symb-_,NDEPrs),mgim_musm_mgim_symb(Mgim,Symb)), NDEMgims ), append( DeGids, NDEMgims, Mgims ), sort( Mgims, Univ ). go_over_universe_exp( chicken, DEGids, NDEPrs, Univ, Opts ) :- go_over_universe_exp( gallus, DEGids, NDEPrs, Univ, Opts ). go_over_universe_exp( gallus, DEGids, NDEPrs, Univ, Opts ) :- findall( Id, member(Id-_,NDEPrs), Ids ), org_gid_map( Ids, NDGids, Opts ), append( DEGids, NDGids, ExpGids ), % fixme: we can add tests here, that the ids exist in some table ? bio_list_sort_ne( ExpGids, Univ ). go_over_universe_exp( human, DEGids, NDEPrs, Univ, Opts ) :- findall( Id, member(Id-_,NDEPrs), Ids ), org_gid_map( Ids, NDGids, Opts ), append( DEGids, NDGids, ExpGids ), % fixme: we can add tests here, that the ids exist in some table ? bio_list_sort_ne( ExpGids, Univ ). go_over_universe_exp( mouse, DEGids, NDEPrs, Univ, Opts ) :- findall( Id, member(Id-_,NDEPrs), Ids ), org_gid_map( Ids, NDGids, Opts ), append( DEGids, NDGids, ExpGids ), % fixme: we can add tests here, that the ids exist in some table ? bio_list_sort_ne( ExpGids, Univ ). go_over_universe_exp( pig, DEGids, NDEPrs, Univ, Opts ) :- findall( Id, member(Id-_,NDEPrs), Ids ), org_gid_map( Ids, NDGids, Opts ), append( DEGids, NDGids, ExpGids ), % fixme: we can add tests here, that the ids exist in some table ? bio_list_sort_ne( ExpGids, Univ ). go_over_universe_go_exp( chicken, DEGids, NDEPrs, Univ, Opts ) :- findall( Id, member(Id-_,NDEPrs), Ids ), org_gid_map( Ids, NDGids, Opts ), append( DEGids, NDGids, ExpGids ), findall( ExpGid,( member(ExpGid,ExpGids), % these are std form, here NCBI Gene ids cgnc_galg_cgnc_ncbi(Cgnc,ExpGid), cgnc_galg_cgnc_symb(Cgnc,Symb), once(gont_galg_symb_gont(Symb,_,_,_)) ), List ), bio_list_sort_ne( List, Univ ). go_over_universe_go_exp( human, DEGids, NDEPrs, Univ, Opts ) :- findall( Id, member(Id-_,NDEPrs), Ids ), org_gid_map( Ids, NDGids, Opts ), append( DEGids, NDGids, ExpGids ), findall( Ncbi, ( member(Ncbi,ExpGids), hgnc_homs_symb_ncbi(Symb,Ncbi), once(gont_homs_edge_symb(_,_,Symb)) ), List ), bio_list_sort_ne( List, Univ ). go_over_universe_go_exp( mouse, DEGids, NDEPrs, Univ, Opts ) :- findall( Id, member(Id-_,NDEPrs), Ids ), org_gid_map( Ids, NDGids, Opts ), append( DEGids, NDGids, ExpGids ), findall( Mgim, ( member(Mgim,ExpGids), mgim_musm_mgim_symb(Mgim,Symb), once(gont_musm_gont_symb(_,_,Symb)) ), List ), bio_list_sort_ne( List, Univ ). go_over_universe_go_exp( pig, DEGids, NDEPrs, Univ, Opts ) :- % fixme: we should be using go_exp probably, because it only comes gont only comes with symbs and unlikely to have a good coverage. findall( Id, member(Id-_,NDEPrs), Ids ), org_gid_map( Ids, NDGids, Opts ), append( DEGids, NDGids, ExpGids ), findall( Ncbi, ( member(Ncbi,ExpGids), ncbi_suss_ncbi_ensg(Ncbi,EnsG), ense_suss_ensg_symb(EnsG,Symb), gont_suss_symb_gont(Symb,_,_,_) ), Ncbis ), bio_list_sort_ne( Ncbis, Univ ). go_over_frame( bioc_ann_dbi, Org, GoFra, DbiOrg ) :- bio_conductor_annot_dbi_org( Org, DbiTkn, DbiOrg ), bio_conductor_annot_dbi_org_lib( DbiTkn, true, _DbiLib ), atom_string( DbiTknAtm, DbiTkn ), atomic_list_concat( [org,DbiTknAtm,'egGO'], '.', EgGO ), ggframe <- toTable(EgGO), GoFra <- 'data.frame'(ggframe$go_id, ggframe$'Evidence', ggframe$gene_id), !. go_over_frame( bio_db, Org, GoFra, DbiOrg ) :- go_over_frame_bio_db( Org, GoFra, DbiOrg ), !. go_over_frame( GFopt, OrgOpt, _GoFra, _DbiOrg ) :- throw( cannot_coordinate_exp_go_over_options(go_frame(GFopt),org(OrgOpt)) ). % fixme: these should go to src/bio_org.pl % go_over_frame_bio_db( chicken, GoFra, DbiOrg ) :- findall( row(Gid,E,Ncbi), ( gont_galg_symb_gont(Symb,_,E,G), go_id(Gid,G), cgnc_galg_cgnc_symb(Cgnc,Symb), cgnc_galg_cgnc_ncbi(Cgnc,Ncbi) ), Rows ), go_mtx_df( [row(go_id,evidence,gene_id)|Rows], GoFra, [] ), DbiOrg = "Gallus gallus". go_over_frame_bio_db( human, GoFra, DbiOrg ) :- findall( row(Gid,E,Ncbi), ( gont_homs_edge_symb(G,E,S), go_id(Gid,G), hgnc_homs_symb_ncbi(S,Ncbi) ), Rows ), go_mtx_df( [row(go_id,'Evidence',gene_id)|Rows], GoFra, [] ), DbiOrg = "Homo sapiens". go_over_frame_bio_db( mouse, GoFra, DbiOrg ) :- findall( row(Gid,E,M), ( gont_musm_mgim_gont(M,E,G), go_id(Gid,G) ), Rows ), go_mtx_df( [row(go_id,evidence,gene_id)|Rows], GoFra, [] ), DbiOrg = "Mus musculus". go_over_frame_bio_db( pig, GoFra, DbiOrg ) :- findall( row(Gid,Evi,Ncbi), ( gont_suss_symb_gont(Symb,_,Evi,G), go_id(Gid,G), ense_suss_ensg_symb(EnsG,Symb), ncbi_suss_ensg_ncbi(EnsG,Ncbi) ), Rows ), go_mtx_df( [row(go_id,evidence,gene_id)|Rows], GoFra, [] ), DbiOrg = "Sus scrofa". capit( Atom, Capit ) :- atom_codes( Atom, [A,B,C|_] ), atom_codes( Aat, [A] ), atom_codes( BCat, [B,C] ), upcase_atom( Aat, CapA ), downcase_atom( BCat, DwnBC ), atom_concat( CapA, DwnBC, Capit ). go_mtx_df_defaults([check_names(false)]). go_mtx_df( Csv, Df, Args ) :- options_append( go_mtx_df, Args, Opts ), mtx_lists( Csv, Lists ), findall( Head=Tail, ( member(List,Lists), List=[Head|Tail] ), Pairs ), Df <- Pairs, ( options(check_names(true),Opts) -> Check = 'T'; Check = 'F' ), Df <- 'data.frame'(Df,'check.names'=Check).