%% Blacklisted regions @misc{anshulkundaje_2014, title = {(2014) mod/mouse/{humanENCODE}: {Blacklisted} genomic regions for functional genomics analysis - {Anshul} {Kundaje}}, url = {https://sites.google.com/site/anshulkundaje/projects/blacklists}, urldate = {2016-08-26}, file = {(2014) mod/mouse/humanENCODE\: Blacklisted genomic regions for functional genomics analysis - Anshul Kundaje:/Users/steph/Documents/Zotero/storage/BD4QQH8K/blacklists.html:text/html} } %% bedtools @article{quinlan_bedtools:_2010, title = {{BEDTools}: a flexible suite of utilities for comparing genomic features}, volume = {26}, issn = {1367-4803}, shorttitle = {{BEDTools}}, url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2832824/}, doi = {10.1093/bioinformatics/btq033}, abstract = {Motivation: Testing for correlations between different sets of genomic features is a fundamental task in genomics research. However, searching for overlaps between features with existing web-based methods is complicated by the massive datasets that are routinely produced with current sequencing technologies. Fast and flexible tools are therefore required to ask complex questions of these data in an efficient manner., Results: This article introduces a new software suite for the comparison, manipulation and annotation of genomic features in Browser Extensible Data (BED) and General Feature Format (GFF) format. BEDTools also supports the comparison of sequence alignments in BAM format to both BED and GFF features. The tools are extremely efficient and allow the user to compare large datasets (e.g. next-generation sequencing data) with both public and custom genome annotation tracks. BEDTools can be combined with one another as well as with standard UNIX commands, thus facilitating routine genomics tasks as well as pipelines that can quickly answer intricate questions of large genomic datasets., Availability and implementation: BEDTools was written in C++. Source code and a comprehensive user manual are freely available at http://code.google.com/p/bedtools, Contact: aaronquinlan@gmail.com; imh4y@virginia.edu, Supplementary information: Supplementary data are available at Bioinformatics online.}, number = {6}, journal = {Bioinformatics}, author = {Quinlan, Aaron R. and Hall, Ira M.}, month = mar, year = {2010}, pmid = {20110278}, pmcid = {PMC2832824}, pages = {841--842}, file = {PubMed Central Full Text PDF:/Users/slegras/Library/Application Support/Zotero/Profiles/l2r21qzc.default/zotero/storage/P26FE2CK/Quinlan et Hall - 2010 - BEDTools a flexible suite of utilities for compar.pdf:application/pdf} } %% Bowtie @article{langmead_ultrafast_2009, title = {Ultrafast and memory-efficient alignment of short {DNA} sequences to the human genome}, volume = {10}, copyright = {2009 Langmead et al.; licensee BioMed Central Ltd.}, issn = {1465-6906}, url = {http://genomebiology.com/2009/10/3/R25/abstract}, doi = {10.1186/gb-2009-10-3-r25}, abstract = {Bowtie is an ultrafast, memory-efficient alignment program for aligning short DNA sequence reads to large genomes. For the human genome, Burrows-Wheeler indexing allows Bowtie to align more than 25 million reads per CPU hour with a memory footprint of approximately 1.3 gigabytes. Bowtie extends previous Burrows-Wheeler techniques with a novel quality-aware backtracking algorithm that permits mismatches. Multiple processor cores can be used simultaneously to achieve even greater alignment speeds. Bowtie is open source http://bowtie.cbcb.umd.edu.}, language = {en}, number = {3}, urldate = {2013-07-11}, journal = {Genome Biology}, author = {Langmead, Ben and Trapnell, Cole and Pop, Mihai and Salzberg, Steven L.}, month = mar, year = {2009}, pmid = {19261174}, pages = {R25}, file = {Full Text PDF:/Users/steph/Documents/Zotero/storage/UBJ7QSJI/Langmead et al. - 2009 - Ultrafast and memory-efficient alignment of short .pdf:application/pdf;Snapshot:/Users/steph/Documents/Zotero/storage/Q3UVVUK6/R25.html:text/html} } %% Homer @article{heinz_simple_2010, title = {Simple combinations of lineage-determining transcription factors prime cis-regulatory elements required for macrophage and B cell identities}, volume = {38}, issn = {1097-2765}, url = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2898526/}, doi = {10.1016/j.molcel.2010.05.004}, abstract = {Genome-scale studies have revealed extensive, cell type-specific co-localization of transcription factors, but the mechanisms underlying this phenomenon remain poorly understood. Here we demonstrate in macrophages and B cells that collaborative interactions of the common factor {PU.1} with small sets of macrophage- or B celllineage-determining transcription factors establish cell-specific binding sites that are associated with the majority of promoter-distal {H3K4me1-marked} genomic regions. {PU.1} binding initiates nucleosome remodeling followed by {H3K4} monomethylation at large numbers of genomic regions associated with both broadly and specifically expressed genes. These locations serve as beacons for additional factors, exemplified by liver X receptors, which drive both cell-specific gene expression and signal-dependent responses. Together with analyses of transcription factor binding and {H3K4me1} patterns in other cell types, these studies suggest that simple combinations of lineage-determining transcription factors can specify the genomic sites ultimately responsible for both cell identity and cell type-specific responses to diverse signaling inputs.}, number = {4}, urldate = {2013-07-11}, journal = {Molecular cell}, author = {Heinz, Sven and Benner, Christopher and Spann, Nathanael and Bertolino, Eric and Lin, Yin C. and Laslo, Peter and Cheng, Jason X. and Murre, Cornelis and Singh, Harinder and Glass, Christopher K.}, month = may, year = {2010}, note = {{PMID:} 20513432 {PMCID:} {PMC2898526}}, pages = {576--589}, file = {PubMed Central Full Text PDF:/Users/steph/Documents/Zotero/storage/DUM3FHZC/Heinz et al. - 2010 - Simple combinations of lineage-determining transcr.pdf:application/pdf} } %% Normalization @Article{AND2010, author = {Anders and Huber}, title = {Differential expression analysis for sequence count data}, journal = {Genome Biology}, year = {2010}, volume = {11} } %% htseq @article{anders_htseqpython_2015, title = {{HTSeq}—a {Python} framework to work with high-throughput sequencing data}, volume = {31}, issn = {1367-4803}, url = {https://academic.oup.com/bioinformatics/article/31/2/166/2366196/HTSeq-a-Python-framework-to-work-with-high}, doi = {10.1093/bioinformatics/btu638}, abstract = {Motivation: A large choice of tools exists for many standard tasks in the analysis of high-throughput sequencing (HTS) data. However, once a project deviates from standard workflows, custom scripts are needed. Results: We present HTSeq, a Python library to facilitate the rapid development of such scripts. HTSeq offers parsers for many common data formats in HTS projects, as well as classes to represent data, such as genomic coordinates, sequences, sequencing reads, alignments, gene model information and variant calls, and provides data structures that allow for querying via genomic coordinates. We also present htseq-count, a tool developed with HTSeq that preprocesses RNA-Seq data for differential expression analysis by counting the overlap of reads with genes. Availability and implementation: HTSeq is released as an open-source software under the GNU General Public Licence and available from http://www-huber.embl.de/HTSeq or from the Python Package Index at https://pypi.python.org/pypi/HTSeq . Contact:sanders@fs.tum.de}, number = {2}, urldate = {2017-08-18}, journal = {Bioinformatics}, author = {Anders, Simon and Pyl, Paul Theodor and Huber, Wolfgang}, month = jan, year = {2015}, pages = {166--169}, file = {Full Text PDF:/Users/slegras/Zotero/storage/435PCSI8/Anders et al. - 2015 - HTSeq—a Python framework to work with high-through.pdf:application/pdf;Snapshot:/Users/slegras/Zotero/storage/U23EHITU/btu638.html:text/html} } %% cutadapt @article{martin_cutadapt_2011, title = {Cutadapt removes adapter sequences from high-throughput sequencing reads}, volume = {17}, copyright = {Authors who publish with this journal agree to the following terms: Authors retain copyright and grant the journal right of first publication with the work simultaneously licensed under a Creative Commons Attribution License that allows others to share the work with an acknowledgement of the work's authorship and initial publication in this journal. Authors are able to enter into separate, additional contractual arrangements for the non-exclusive distribution of the journal's published version of the work (e.g., post it to an institutional repository or publish it in a book), with an acknowledgement of its initial publication in this journal. Authors are permitted and encouraged to post their work online (e.g., in institutional repositories or on their website) prior to and during the submission process, as it can lead to productive exchanges, as well as earlier and greater citation of published work (See The Effect of Open Access ).}, issn = {2226-6089}, url = {http://journal.embnet.org/index.php/embnetjournal/article/view/200}, language = {en}, number = {1}, urldate = {2017-10-06}, journal = {EMBnet.journal}, author = {Martin, Marcel}, month = may, year = {2011}, keywords = {adapter removal, microRNA, next generation sequencing, small RNA}, pages = {pp. 10--12}, file = {Full Text PDF:/Users/slegras/Library/Application Support/Zotero/Profiles/l2r21qzc.default/zotero/storage/TP2MNUNP/Martin - 2011 - Cutadapt removes adapter sequences from high-throu.pdf:application/pdf;Snapshot:/Users/slegras/Library/Application Support/Zotero/Profiles/l2r21qzc.default/zotero/storage/687E34MS/479.html:text/html} } @article{dobin_star:_2013, title = {{STAR}: ultrafast universal {RNA}-seq aligner}, volume = {29}, issn = {1367-4803}, shorttitle = {{STAR}}, url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3530905/}, doi = {10.1093/bioinformatics/bts635}, abstract = {Motivation: Accurate alignment of high-throughput RNA-seq data is a challenging and yet unsolved problem because of the non-contiguous transcript structure, relatively short read lengths and constantly increasing throughput of the sequencing technologies. Currently available RNA-seq aligners suffer from high mapping error rates, low mapping speed, read length limitation and mapping biases., Results: To align our large ({\textgreater}80 billon reads) ENCODE Transcriptome RNA-seq dataset, we developed the Spliced Transcripts Alignment to a Reference (STAR) software based on a previously undescribed RNA-seq alignment algorithm that uses sequential maximum mappable seed search in uncompressed suffix arrays followed by seed clustering and stitching procedure. STAR outperforms other aligners by a factor of {\textgreater}50 in mapping speed, aligning to the human genome 550 million 2 × 76 bp paired-end reads per hour on a modest 12-core server, while at the same time improving alignment sensitivity and precision. In addition to unbiased de novo detection of canonical junctions, STAR can discover non-canonical splices and chimeric (fusion) transcripts, and is also capable of mapping full-length RNA sequences. Using Roche 454 sequencing of reverse transcription polymerase chain reaction amplicons, we experimentally validated 1960 novel intergenic splice junctions with an 80–90\% success rate, corroborating the high precision of the STAR mapping strategy., Availability and implementation: STAR is implemented as a standalone C++ code. STAR is free open source software distributed under GPLv3 license and can be downloaded from http://code.google.com/p/rna-star/., Contact: dobin@cshl.edu.}, number = {1}, journal = {Bioinformatics}, author = {Dobin, Alexander and Davis, Carrie A. and Schlesinger, Felix and Drenkow, Jorg and Zaleski, Chris and Jha, Sonali and Batut, Philippe and Chaisson, Mark and Gingeras, Thomas R.}, month = jan, year = {2013}, pmid = {23104886}, pmcid = {PMC3530905}, pages = {15--21}, file = {PubMed Central Full Text PDF:/Users/slegras/Zotero/storage/J4M7GM4B/Dobin et al. - 2013 - STAR ultrafast universal RNA-seq aligner.pdf:application/pdf} } @article{laurette_transcription_2015, title = {Transcription factor {MITF} and remodeller {BRG1} define chromatin organisation at regulatory elements in melanoma cells}, volume = {4}, issn = {2050-084X}, url = {https://doi.org/10.7554/eLife.06857}, doi = {10.7554/eLife.06857}, abstract = {Microphthalmia-associated transcription factor (MITF) is the master regulator of the melanocyte lineage. To understand how MITF regulates transcription, we used tandem affinity purification and mass spectrometry to define a comprehensive MITF interactome identifying novel cofactors involved in transcription, DNA replication and repair, and chromatin organisation. We show that MITF interacts with a PBAF chromatin remodelling complex comprising BRG1 and CHD7. BRG1 is essential for melanoma cell proliferation in vitro and for normal melanocyte development in vivo. MITF and SOX10 actively recruit BRG1 to a set of MITF-associated regulatory elements (MAREs) at active enhancers. Combinations of MITF, SOX10, TFAP2A, and YY1 bind between two BRG1-occupied nucleosomes thus defining both a signature of transcription factors essential for the melanocyte lineage and a specific chromatin organisation of the regulatory elements they occupy. BRG1 also regulates the dynamics of MITF genomic occupancy. MITF-BRG1 interplay thus plays an essential role in transcription regulation in melanoma.}, urldate = {2021-05-10}, journal = {eLife}, author = {Laurette, Patrick and Strub, Thomas and Koludrovic, Dana and Keime, Céline and Le Gras, Stéphanie and Seberg, Hannah and Van Otterloo, Eric and Imrichova, Hana and Siddaway, Robert and Aerts, Stein and Cornell, Robert A and Mengus, Gabrielle and Davidson, Irwin}, editor = {Green, Michael R}, month = mar, year = {2015}, note = {Publisher: eLife Sciences Publications, Ltd}, keywords = {CHD7, chromatin remodelling, enhancer, SOX10, TFAP2A, YY1}, pages = {e06857}, file = {Full Text PDF:/Users/slegras/Zotero/storage/D5MUK6XM/Laurette et al. - 2015 - Transcription factor MITF and remodeller BRG1 defi.pdf:application/pdf} } %% DEG @Article{Robinson2010, title = {edgeR: a Bioconductor package for differential expression analysis of digital gene expression data}, author = {Mark D Robinson and Davis J McCarthy and Gordon K Smyth}, journal = {Bioinformatics}, volume = {26}, number = {1}, pages = {139-140}, year = {2010}, doi = {10.1093/bioinformatics/btp616}, } %% GRanges @Article{pmid23950696, Author="Lawrence, M. and Huber, W. and Pages, H. and Aboyoun, P. and Carlson, M. and Gentleman, R. and Morgan, M. T. and Carey, V. J. ", Title="{{S}oftware for computing and annotating genomic ranges}", Journal="PLoS Comput. Biol.", Year="2013", Volume="9", Number="8", Pages="e1003118", Note={[PubMed Central:\href{http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3738458}{PMC3738458}] [DOI:\href{http://dx.doi.org/10.1371/journal.pcbi.1003118}{10.1371/journal.pcbi.1003118}] [PubMed:\href{http://www.ncbi.nlm.nih.gov/pubmed/23950696}{23950696}] } } %% TFBSTools @Article{pmid26794315, Author="Tan, G. and Lenhard, B. ", Title="{{T}{F}{B}{S}{T}ools: an {R}/bioconductor package for transcription factor binding site analysis}", Journal="Bioinformatics", Year="2016", Volume="32", Number="10", Pages="1555--1556", Month="05" } %% TFFM @ARTICLE{pmid24039567, title = "The next generation of transcription factor binding site prediction", author = "Mathelier, Anthony and Wasserman, Wyeth W", abstract = "Finding where transcription factors (TFs) bind to the DNA is of key importance to decipher gene regulation at a transcriptional level. Classically, computational prediction of TF binding sites (TFBSs) is based on basic position weight matrices (PWMs) which quantitatively score binding motifs based on the observed nucleotide patterns in a set of TFBSs for the corresponding TF. Such models make the strong assumption that each nucleotide participates independently in the corresponding DNA-protein interaction and do not account for flexible length motifs. We introduce transcription factor flexible models (TFFMs) to represent TF binding properties. Based on hidden Markov models, TFFMs are flexible, and can model both position interdependence within TFBSs and variable length motifs within a single dedicated framework. The availability of thousands of experimentally validated DNA-TF interaction sequences from ChIP-seq allows for the generation of models that perform as well as PWMs for stereotypical TFs and can improve performance for TFs with flexible binding characteristics. We present a new graphical representation of the motifs that convey properties of position interdependence. TFFMs have been assessed on ChIP-seq data sets coming from the ENCODE project, revealing that they can perform better than both PWMs and the dinucleotide weight matrix extension in discriminating ChIP-seq from background sequences. Under the assumption that ChIP-seq signal values are correlated with the affinity of the TF-DNA binding, we find that TFFM scores correlate with ChIP-seq peak signals. Moreover, using available TF-DNA affinity measurements for the Max TF, we demonstrate that TFFMs constructed from ChIP-seq data correlate with published experimentally measured DNA-binding affinities. Finally, TFFMs allow for the straightforward computation of an integrated TF occupancy score across a sequence. These results demonstrate the capacity of TFFMs to accurately model DNA-protein interactions, while providing a single unified framework suitable for the next generation of TFBS prediction.", journal = "PLoS Comput. Biol.", publisher = "Public Library of Science (PLoS)", volume = 9, number = 9, pages = "e1003214", month = sep, year = 2013, language = "en" } %% SummarizedExperiment @Article{pmid25633503, Author="Huber, W. and Carey, V. J. and Gentleman, R. and Anders, S. and Carlson, M. and Carvalho, B. S. and Bravo, H. C. and Davis, S. and Gatto, L. and Girke, T. and Gottardo, R. and Hahne, F. and Hansen, K. D. and Irizarry, R. A. and Lawrence, M. and Love, M. I. and MacDonald, J. and Obenchain, V. and Ole, A. K. and Pages, H. and Reyes, A. and Shannon, P. and Smyth, G. K. and Tenenbaum, D. and Waldron, L. and Morgan, M. ", Title="{{O}rchestrating high-throughput genomic analysis with {B}ioconductor}", Journal="Nat. Methods", Year="2015", Volume="12", Number="2", Pages="115--121", Month="Jan", Note={[DOI:\href{http://dx.doi.org/10.1038/nmeth.3252}{10.1038/nmeth.3252}] [PubMed:\href{http://www.ncbi.nlm.nih.gov/pubmed/25633503}{25633503}] } } %% rtracklayer @Article{pmid19468054, Author="Lawrence, M. and Gentleman, R. and Carey, V. ", Title="{rtracklayer: an {R} package for interfacing with genome browsers}", Journal="Bioinformatics", Year="2009", Volume="25", Number="14", Pages="1841--1842", Month="Jul", Note={[PubMed Central:\href{http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2705236}{PMC2705236}] [DOI:\href{http://dx.doi.org/10.1093/bioinformatics/btp328}{10.1093/bioinformatics/btp328}] [PubMed:\href{http://www.ncbi.nlm.nih.gov/pubmed/19468054}{19468054}] } } %% ChIPseeker @Article{pmid36286622, Title = "Exploring epigenomic datasets by {ChIPseeker}", Author = "Wang, Qianwen and Li, Ming and Wu, Tianzhi and Zhan, Li and Li, Lin and Chen, Meijun and Xie, Wenqin and Xie, Zijing and Hu, Erqiang and Xu, Shuangbin and Yu, Guangchuang", Journal = "Curr. Protoc.", Publisher = "Wiley", Volume = 2, Number = 10, Pages = "e585", Month = oct, Year = 2022, language = "en" }