2018
Oti, Martin; Sammeth, Michael
Comparative Genomics in Homo sapiens Journal Article
In: Methods Mol Biol, vol. 1704, pp. 451–472, 2018, ISSN: 1940-6029.
@article{pmid29277878,
title = {Comparative Genomics in Homo sapiens},
author = {Martin Oti and Michael Sammeth},
doi = {10.1007/978-1-4939-7463-4_18},
issn = {1940-6029},
year = {2018},
date = {2018-01-01},
journal = {Methods Mol Biol},
volume = {1704},
pages = {451--472},
abstract = {Genomes can be compared at different levels of divergence, either between species or within species. Within species genomes can be compared between different subpopulations, such as human subpopulations from different continents. Investigating the genomic differences between different human subpopulations is important when studying complex diseases that are affected by many genetic variants, as the variants involved can differ between populations. The 1000 Genomes Project collected genome-scale variation data for 2504 human individuals from 26 different populations, enabling a systematic comparison of variation between human subpopulations. In this chapter, we present step-by-step a basic protocol for the identification of population-specific variants employing the 1000 Genomes data. These variants are subsequently further investigated for those that affect the proteome or RNA splice sites, to investigate potentially biologically relevant differences between the populations.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Oti, Martin; Pane, Attilio; Sammeth, Michael
Comparative Genomics in Drosophila Journal Article
In: Methods Mol Biol, vol. 1704, pp. 433–450, 2018, ISSN: 1940-6029.
@article{pmid29277877,
title = {Comparative Genomics in Drosophila},
author = {Martin Oti and Attilio Pane and Michael Sammeth},
doi = {10.1007/978-1-4939-7463-4_17},
issn = {1940-6029},
year = {2018},
date = {2018-01-01},
journal = {Methods Mol Biol},
volume = {1704},
pages = {433--450},
abstract = {Since the pioneering studies of Thomas Hunt Morgan and coworkers at the dawn of the twentieth century, Drosophila melanogaster and its sister species have tremendously contributed to unveil the rules underlying animal genetics, development, behavior, evolution, and human disease. Recent advances in DNA sequencing technologies launched Drosophila into the post-genomic era and paved the way for unprecedented comparative genomics investigations. The complete sequencing and systematic comparison of the genomes from 12 Drosophila species represents a milestone achievement in modern biology, which allowed a plethora of different studies ranging from the annotation of known and novel genomic features to the evolution of chromosomes and, ultimately, of entire genomes. Despite the efforts of countless laboratories worldwide, the vast amount of data that were produced over the past 15 years is far from being fully explored.In this chapter, we will review some of the bioinformatic approaches that were developed to interrogate the genomes of the 12 Drosophila species. Setting off from alignments of the entire genomic sequences, the degree of conservation can be separately evaluated for every region of the genome, providing already first hints about elements that are under purifying selection and therefore likely functional. Furthermore, the careful analysis of repeated sequences sheds light on the evolutionary dynamics of transposons, an enigmatic and fascinating class of mobile elements housed in the genomes of animals and plants. Comparative genomics also aids in the computational identification of the transcriptionally active part of the genome, first and foremost of protein-coding loci, but also of transcribed nevertheless apparently noncoding regions, which were once considered "junk" DNA. Eventually, the synergy between functional and comparative genomics also facilitates in silico and in vivo studies on cis-acting regulatory elements, like transcription factor binding sites, that due to the high degree of sequence variability usually impose increased challenges for bioinformatics approaches.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2017
Saha, Ashis; Kim, Yungil; Gewirtz, Ariel D H; Jo, Brian; Gao, Chuan; McDowell, Ian C; GTEx Consortium, incl. Michael Sammeth; Engelhardt, Barbara E; Battle, Alexis
Co-expression networks reveal the tissue-specific regulation of transcription and splicing Journal Article
In: Genome Res, vol. 27, no. 11, pp. 1843–1858, 2017, ISSN: 1549-5469.
@article{pmid29021288,
title = {Co-expression networks reveal the tissue-specific regulation of transcription and splicing},
author = {Ashis Saha and Yungil Kim and Ariel D H Gewirtz and Brian Jo and Chuan Gao and Ian C McDowell and {GTEx Consortium}, {incl. Michael Sammeth} and Barbara E Engelhardt and Alexis Battle},
doi = {10.1101/gr.216721.116},
issn = {1549-5469},
year = {2017},
date = {2017-11-01},
urldate = {2017-11-01},
journal = {Genome Res},
volume = {27},
number = {11},
pages = {1843--1858},
abstract = {Gene co-expression networks capture biologically important patterns in gene expression data, enabling functional analyses of genes, discovery of biomarkers, and interpretation of genetic variants. Most network analyses to date have been limited to assessing correlation between total gene expression levels in a single tissue or small sets of tissues. Here, we built networks that additionally capture the regulation of relative isoform abundance and splicing, along with tissue-specific connections unique to each of a diverse set of tissues. We used the Genotype-Tissue Expression (GTEx) project v6 RNA sequencing data across 50 tissues and 449 individuals. First, we developed a framework called Transcriptome-Wide Networks (TWNs) for combining total expression and relative isoform levels into a single sparse network, capturing the interplay between the regulation of splicing and transcription. We built TWNs for 16 tissues and found that hubs in these networks were strongly enriched for splicing and RNA binding genes, demonstrating their utility in unraveling regulation of splicing in the human transcriptome. Next, we used a Bayesian biclustering model that identifies network edges unique to a single tissue to reconstruct Tissue-Specific Networks (TSNs) for 26 distinct tissues and 10 groups of related tissues. Finally, we found genetic variants associated with pairs of adjacent nodes in our networks, supporting the estimated network structures and identifying 20 genetic variants with distant regulatory impact on transcription and splicing. Our networks provide an improved understanding of the complex relationships of the human transcriptome across tissues.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Yang, Fan; Wang, Jiebiao; GTEx Consortium, incl. Michael Sammeth; Pierce, Brandon L; Chen, Lin S
Identifying -mediators for -eQTLs across many human tissues using genomic mediation analysis Journal Article
In: Genome Res, vol. 27, no. 11, pp. 1859–1871, 2017, ISSN: 1549-5469.
@article{pmid29021290,
title = {Identifying -mediators for -eQTLs across many human tissues using genomic mediation analysis},
author = {Fan Yang and Jiebiao Wang and {GTEx Consortium}, {incl. Michael Sammeth} and Brandon L Pierce and Lin S Chen},
doi = {10.1101/gr.216754.116},
issn = {1549-5469},
year = {2017},
date = {2017-11-01},
urldate = {2017-11-01},
journal = {Genome Res},
volume = {27},
number = {11},
pages = {1859--1871},
abstract = {The impact of inherited genetic variation on gene expression in humans is well-established. The majority of known expression quantitative trait loci (eQTLs) impact expression of local genes (-eQTLs). More research is needed to identify effects of genetic variation on distant genes (-eQTLs) and understand their biological mechanisms. One common -eQTLs mechanism is "mediation" by a local () transcript. Thus, mediation analysis can be applied to genome-wide SNP and expression data in order to identify transcripts that are "-mediators" of -eQTLs, including those "-hubs" involved in regulation of many -genes. Identifying such mediators helps us understand regulatory networks and suggests biological mechanisms underlying -eQTLs, both of which are relevant for understanding susceptibility to complex diseases. The multitissue expression data from the Genotype-Tissue Expression (GTEx) program provides a unique opportunity to study -mediation across human tissue types. However, the presence of complex hidden confounding effects in biological systems can make mediation analyses challenging and prone to confounding bias, particularly when conducted among diverse samples. To address this problem, we propose a new method: Genomic Mediation analysis with Adaptive Confounding adjustment (GMAC). It enables the search of a very large pool of variables, and adaptively selects potential confounding variables for each mediation test. Analyses of simulated data and GTEx data demonstrate that the adaptive selection of confounders by GMAC improves the power and precision of mediation analysis. Application of GMAC to GTEx data provides new insights into the observed patterns of -hubs and -eQTL regulation across tissue types.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Tukiainen, Taru; Villani, Alexandra-Chloé; Yen, Angela; Rivas, Manuel A; Marshall, Jamie L; Satija, Rahul; Aguirre, Matt; Gauthier, Laura; Fleharty, Mark; Kirby, Andrew; Cummings, Beryl B; Castel, Stephane E; Karczewski, Konrad J; Aguet, François; Byrnes, Andrea; GTEx Consortium, incl. Michael Sammeth
Landscape of X chromosome inactivation across human tissues Journal Article
In: Nature, vol. 550, no. 7675, pp. 244–248, 2017, ISSN: 1476-4687.
@article{pmid29022598,
title = {Landscape of X chromosome inactivation across human tissues},
author = {Taru Tukiainen and Alexandra-Chloé Villani and Angela Yen and Manuel A Rivas and Jamie L Marshall and Rahul Satija and Matt Aguirre and Laura Gauthier and Mark Fleharty and Andrew Kirby and Beryl B Cummings and Stephane E Castel and Konrad J Karczewski and François Aguet and Andrea Byrnes and {GTEx Consortium}, {incl. Michael Sammeth} },
doi = {10.1038/nature24265},
issn = {1476-4687},
year = {2017},
date = {2017-10-01},
urldate = {2017-10-01},
journal = {Nature},
volume = {550},
number = {7675},
pages = {244--248},
abstract = {X chromosome inactivation (XCI) silences transcription from one of the two X chromosomes in female mammalian cells to balance expression dosage between XX females and XY males. XCI is, however, incomplete in humans: up to one-third of X-chromosomal genes are expressed from both the active and inactive X chromosomes (Xa and Xi, respectively) in female cells, with the degree of 'escape' from inactivation varying between genes and individuals. The extent to which XCI is shared between cells and tissues remains poorly characterized, as does the degree to which incomplete XCI manifests as detectable sex differences in gene expression and phenotypic traits. Here we describe a systematic survey of XCI, integrating over 5,500 transcriptomes from 449 individuals spanning 29 tissues from GTEx (v6p release) and 940 single-cell transcriptomes, combined with genomic sequence data. We show that XCI at 683 X-chromosomal genes is generally uniform across human tissues, but identify examples of heterogeneity between tissues, individuals and cells. We show that incomplete XCI affects at least 23% of X-chromosomal genes, identify seven genes that escape XCI with support from multiple lines of evidence and demonstrate that escape from XCI results in sex biases in gene expression, establishing incomplete XCI as a mechanism that is likely to introduce phenotypic diversity. Overall, this updated catalogue of XCI across human tissues helps to increase our understanding of the extent and impact of the incompleteness in the maintenance of XCI.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
GTEx Consortium, incl. Michael Sammeth
Genetic effects on gene expression across human tissues Journal Article
In: Nature, vol. 550, no. 7675, pp. 204–213, 2017, ISSN: 1476-4687.
@article{pmid29022597,
title = {Genetic effects on gene expression across human tissues},
author = {{GTEx Consortium}, {incl. Michael Sammeth}},
doi = {10.1038/nature24277},
issn = {1476-4687},
year = {2017},
date = {2017-10-01},
urldate = {2017-10-01},
journal = {Nature},
volume = {550},
number = {7675},
pages = {204--213},
abstract = {Characterization of the molecular function of the human genome and its variation across individuals is essential for identifying the cellular mechanisms that underlie human genetic traits and diseases. The Genotype-Tissue Expression (GTEx) project aims to characterize variation in gene expression levels across individuals and diverse tissues of the human body, many of which are not easily accessible. Here we describe genetic effects on gene expression levels across 44 human tissues. We find that local genetic variation affects gene expression levels for the majority of genes, and we further identify inter-chromosomal genetic effects for 93 genes and 112 loci. On the basis of the identified genetic effects, we characterize patterns of tissue specificity, compare local and distal effects, and evaluate the functional properties of the genetic effects. We also demonstrate that multi-tissue, multi-individual data can be used to identify genes and pathways affected by human disease-associated variation, enabling a mechanistic interpretation of gene regulation and the genetic basis of disease.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Tan, Meng How; Li, Qin; Shanmugam, Raghuvaran; Piskol, Robert; Kohler, Jennefer; Young, Amy N; Liu, Kaiwen Ivy; Zhang, Rui; Ramaswami, Gokul; Ariyoshi, Kentaro; Gupte, Ankita; Keegan, Liam P; George, Cyril X; Ramu, Avinash; Huang, Ni; Pollina, Elizabeth A; Leeman, Dena S; Rustighi, Alessandra; Goh, Y P Sharon; GTEx Consortium, incl. Michael Sammeth
Dynamic landscape and regulation of RNA editing in mammals Journal Article
In: Nature, vol. 550, no. 7675, pp. 249–254, 2017, ISSN: 1476-4687.
@article{pmid29022589,
title = {Dynamic landscape and regulation of RNA editing in mammals},
author = {Meng How Tan and Qin Li and Raghuvaran Shanmugam and Robert Piskol and Jennefer Kohler and Amy N Young and Kaiwen Ivy Liu and Rui Zhang and Gokul Ramaswami and Kentaro Ariyoshi and Ankita Gupte and Liam P Keegan and Cyril X George and Avinash Ramu and Ni Huang and Elizabeth A Pollina and Dena S Leeman and Alessandra Rustighi and Y P Sharon Goh and {GTEx Consortium}, {incl. Michael Sammeth}},
doi = {10.1038/nature24041},
issn = {1476-4687},
year = {2017},
date = {2017-10-01},
urldate = {2017-10-01},
journal = {Nature},
volume = {550},
number = {7675},
pages = {249--254},
abstract = {Adenosine-to-inosine (A-to-I) RNA editing is a conserved post-transcriptional mechanism mediated by ADAR enzymes that diversifies the transcriptome by altering selected nucleotides in RNA molecules. Although many editing sites have recently been discovered, the extent to which most sites are edited and how the editing is regulated in different biological contexts are not fully understood. Here we report dynamic spatiotemporal patterns and new regulators of RNA editing, discovered through an extensive profiling of A-to-I RNA editing in 8,551 human samples (representing 53 body sites from 552 individuals) from the Genotype-Tissue Expression (GTEx) project and in hundreds of other primate and mouse samples. We show that editing levels in non-repetitive coding regions vary more between tissues than editing levels in repetitive regions. Globally, ADAR1 is the primary editor of repetitive sites and ADAR2 is the primary editor of non-repetitive coding sites, whereas the catalytically inactive ADAR3 predominantly acts as an inhibitor of editing. Cross-species analysis of RNA editing in several tissues revealed that species, rather than tissue type, is the primary determinant of editing levels, suggesting stronger cis-directed regulation of RNA editing for most sites, although the small set of conserved coding sites is under stronger trans-regulation. In addition, we curated an extensive set of ADAR1 and ADAR2 targets and showed that many editing sites display distinct tissue-specific regulation by the ADAR enzymes in vivo. Further analysis of the GTEx data revealed several potential regulators of editing, such as AIMP2, which reduces editing in muscles by enhancing the degradation of the ADAR proteins. Collectively, our work provides insights into the complex cis- and trans-regulation of A-to-I editing.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Li, Xin; Kim, Yungil; Tsang, Emily K; Davis, Joe R; Damani, Farhan N; Chiang, Colby; Hess, Gaelen T; Zappala, Zachary; Strober, Benjamin J; Scott, Alexandra J; Li, Amy; Ganna, Andrea; Bassik, Michael C; Merker, Jason D; GTEx Consortium, incl. Michael Sammeth
The impact of rare variation on gene expression across tissues Journal Article
In: Nature, vol. 550, no. 7675, pp. 239–243, 2017, ISSN: 1476-4687.
@article{pmid29022581,
title = {The impact of rare variation on gene expression across tissues},
author = {Xin Li and Yungil Kim and Emily K Tsang and Joe R Davis and Farhan N Damani and Colby Chiang and Gaelen T Hess and Zachary Zappala and Benjamin J Strober and Alexandra J Scott and Amy Li and Andrea Ganna and Michael C Bassik and Jason D Merker and {GTEx Consortium}, {incl. Michael Sammeth}},
doi = {10.1038/nature24267},
issn = {1476-4687},
year = {2017},
date = {2017-10-01},
urldate = {2017-10-01},
journal = {Nature},
volume = {550},
number = {7675},
pages = {239--243},
abstract = {Rare genetic variants are abundant in humans and are expected to contribute to individual disease risk. While genetic association studies have successfully identified common genetic variants associated with susceptibility, these studies are not practical for identifying rare variants. Efforts to distinguish pathogenic variants from benign rare variants have leveraged the genetic code to identify deleterious protein-coding alleles, but no analogous code exists for non-coding variants. Therefore, ascertaining which rare variants have phenotypic effects remains a major challenge. Rare non-coding variants have been associated with extreme gene expression in studies using single tissues, but their effects across tissues are unknown. Here we identify gene expression outliers, or individuals showing extreme expression levels for a particular gene, across 44 human tissues by using combined analyses of whole genomes and multi-tissue RNA-sequencing data from the Genotype-Tissue Expression (GTEx) project v6p release. We find that 58% of underexpression and 28% of overexpression outliers have nearby conserved rare variants compared to 8% of non-outliers. Additionally, we developed RIVER (RNA-informed variant effect on regulation), a Bayesian statistical model that incorporates expression data to predict a regulatory effect for rare variants with higher accuracy than models using genomic annotations alone. Overall, we demonstrate that rare variants contribute to large gene expression changes across tissues and provide an integrative method for interpretation of rare variants in individual genomes.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2016
Schwartze, Volker U; Winter, Sascha; Shelest, Ekaterina; Marcet-Houben, Marina; Horn, Fabian; Wehner, Stefanie; Linde, Jörg; Valiante, Vito; Sammeth, Michael; Riege, Konstantin; Nowrousian, Minou; Kaerger, Kerstin; Jacobsen, Ilse D; Marz, Manja; Brakhage, Axel A; Gabaldón, Toni; Böcker, Sebastian; Voigt, Kerstin
2016, ISSN: 1553-7404.
@misc{pmid27918580,
title = {Correction: Gene Expansion Shapes Genome Architecture in the Human Pathogen Lichtheimia corymbifera: An Evolutionary Genomics Analysis in the Ancient Terrestrial Mucorales (Mucoromycotina)},
author = {Volker U Schwartze and Sascha Winter and Ekaterina Shelest and Marina Marcet-Houben and Fabian Horn and Stefanie Wehner and Jörg Linde and Vito Valiante and Michael Sammeth and Konstantin Riege and Minou Nowrousian and Kerstin Kaerger and Ilse D Jacobsen and Manja Marz and Axel A Brakhage and Toni Gabaldón and Sebastian Böcker and Kerstin Voigt},
doi = {10.1371/journal.pgen.1006491},
issn = {1553-7404},
year = {2016},
date = {2016-12-01},
journal = {PLoS Genet},
volume = {12},
number = {12},
pages = {e1006491},
abstract = {[This corrects the article DOI: 10.1371/journal.pgen.1004496.].},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Ferreira, Pedro G; Oti, Martin; Barann, Matthias; Wieland, Thomas; Ezquina, Suzana; Friedländer, Marc R; Rivas, Manuel A; Esteve-Codina, Anna; ; Rosenstiel, Philip; Strom, Tim M; Lappalainen, Tuuli; Guigó, Roderic; Sammeth, Michael
Sequence variation between 462 human individuals fine-tunes functional sites of RNA processing Journal Article
In: Sci Rep, vol. 6, pp. 32406, 2016, ISSN: 2045-2322.
@article{pmid27617755,
title = {Sequence variation between 462 human individuals fine-tunes functional sites of RNA processing},
author = {Pedro G Ferreira and Martin Oti and Matthias Barann and Thomas Wieland and Suzana Ezquina and Marc R Friedländer and Manuel A Rivas and Anna Esteve-Codina and and Philip Rosenstiel and Tim M Strom and Tuuli Lappalainen and Roderic Guigó and Michael Sammeth},
doi = {10.1038/srep32406},
issn = {2045-2322},
year = {2016},
date = {2016-09-01},
journal = {Sci Rep},
volume = {6},
pages = {32406},
abstract = {Recent advances in the cost-efficiency of sequencing technologies enabled the combined DNA- and RNA-sequencing of human individuals at the population-scale, making genome-wide investigations of the inter-individual genetic impact on gene expression viable. Employing mRNA-sequencing data from the Geuvadis Project and genome sequencing data from the 1000 Genomes Project we show that the computational analysis of DNA sequences around splice sites and poly-A signals is able to explain several observations in the phenotype data. In contrast to widespread assessments of statistically significant associations between DNA polymorphisms and quantitative traits, we developed a computational tool to pinpoint the molecular mechanisms by which genetic markers drive variation in RNA-processing, cataloguing and classifying alleles that change the affinity of core RNA elements to their recognizing factors. The in silico models we employ further suggest RNA editing can moonlight as a splicing-modulator, albeit less frequently than genomic sequence diversity. Beyond existing annotations, we demonstrate that the ultra-high resolution of RNA-Seq combined from 462 individuals also provides evidence for thousands of bona fide novel elements of RNA processing-alternative splice sites, introns, and cleavage sites-which are often rare and lowly expressed but in other characteristics similar to their annotated counterparts.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}