2012
Gonzàlez-Porta, Mar; Calvo, Miquel; Sammeth, Michael; Guigó, Roderic
Estimation of alternative splicing variability in human populations Journal Article
In: Genome Res, vol. 22, no. 3, pp. 528–538, 2012, ISSN: 1549-5469.
@article{pmid22113879,
title = {Estimation of alternative splicing variability in human populations},
author = {Mar Gonzàlez-Porta and Miquel Calvo and Michael Sammeth and Roderic Guigó},
doi = {10.1101/gr.121947.111},
issn = {1549-5469},
year = {2012},
date = {2012-03-01},
journal = {Genome Res},
volume = {22},
number = {3},
pages = {528--538},
abstract = {DNA arrays have been widely used to perform transcriptome-wide analysis of gene expression, and many methods have been developed to measure gene expression variability and to compare gene expression between conditions. Because RNA-seq is also becoming increasingly popular for transcriptome characterization, the possibility exists for further quantification of individual alternative transcript isoforms, and therefore for estimating the relative ratios of alternative splice forms within a given gene. Changes in splicing ratios, even without changes in overall gene expression, may have important phenotypic effects. Here we have developed statistical methodology to measure variability in splicing ratios within conditions, to compare it between conditions, and to identify genes with condition-specific splicing ratios. Furthermore, we have developed methodology to deconvolute the relative contribution of variability in gene expression versus variability in splicing ratios to the overall variability of transcript abundances. As a proof of concept, we have applied this methodology to estimates of transcript abundances obtained from RNA-seq experiments in lymphoblastoid cells from Caucasian and Yoruban individuals. We have found that protein-coding genes exhibit low splicing variability within populations, with many genes exhibiting constant ratios across individuals. When comparing these two populations, we have found that up to 10% of the studied protein-coding genes exhibit population-specific splicing ratios. We estimate that ~60% of the total variability observed in the abundance of transcript isoforms can be explained by variability in transcription. A large fraction of the remaining variability can likely result from variability in splicing. Finally, we also detected that variability in splicing is uncommon without variability in transcription.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Eizirik, Décio L; Sammeth, Michael; Bouckenooghe, Thomas; Bottu, Guy; Sisino, Giorgia; Igoillo-Esteve, Mariana; Ortis, Fernanda; Santin, Izortze; Colli, Maikel L; Barthson, Jenny; Bouwens, Luc; Hughes, Linda; Gregory, Lorna; Lunter, Gerton; Marselli, Lorella; Marchetti, Piero; McCarthy, Mark I; Cnop, Miriam
The human pancreatic islet transcriptome: expression of candidate genes for type 1 diabetes and the impact of pro-inflammatory cytokines Journal Article
In: PLoS Genet, vol. 8, no. 3, pp. e1002552, 2012, ISSN: 1553-7404.
@article{pmid22412385,
title = {The human pancreatic islet transcriptome: expression of candidate genes for type 1 diabetes and the impact of pro-inflammatory cytokines},
author = {Décio L Eizirik and Michael Sammeth and Thomas Bouckenooghe and Guy Bottu and Giorgia Sisino and Mariana Igoillo-Esteve and Fernanda Ortis and Izortze Santin and Maikel L Colli and Jenny Barthson and Luc Bouwens and Linda Hughes and Lorna Gregory and Gerton Lunter and Lorella Marselli and Piero Marchetti and Mark I McCarthy and Miriam Cnop},
doi = {10.1371/journal.pgen.1002552},
issn = {1553-7404},
year = {2012},
date = {2012-01-01},
journal = {PLoS Genet},
volume = {8},
number = {3},
pages = {e1002552},
abstract = {Type 1 diabetes (T1D) is an autoimmune disease in which pancreatic beta cells are killed by infiltrating immune cells and by cytokines released by these cells. Signaling events occurring in the pancreatic beta cells are decisive for their survival or death in diabetes. We have used RNA sequencing (RNA-seq) to identify transcripts, including splice variants, expressed in human islets of Langerhans under control conditions or following exposure to the pro-inflammatory cytokines interleukin-1β (IL-1β) and interferon-γ (IFN-γ). Based on this unique dataset, we examined whether putative candidate genes for T1D, previously identified by GWAS, are expressed in human islets. A total of 29,776 transcripts were identified as expressed in human islets. Expression of around 20% of these transcripts was modified by pro-inflammatory cytokines, including apoptosis- and inflammation-related genes. Chemokines were among the transcripts most modified by cytokines, a finding confirmed at the protein level by ELISA. Interestingly, 35% of the genes expressed in human islets undergo alternative splicing as annotated in RefSeq, and cytokines caused substantial changes in spliced transcripts. Nova1, previously considered a brain-specific regulator of mRNA splicing, is expressed in islets and its knockdown modified splicing. 25/41 of the candidate genes for T1D are expressed in islets, and cytokines modified expression of several of these transcripts. The present study doubles the number of known genes expressed in human islets and shows that cytokines modify alternative splicing in human islet cells. Importantly, it indicates that more than half of the known T1D candidate genes are expressed in human islets. This, and the production of a large number of chemokines and cytokines by cytokine-exposed islets, reinforces the concept of a dialog between pancreatic islets and the immune system in T1D. This dialog is modulated by candidate genes for the disease at both the immune system and beta cell level.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Mundry, Marvin; Bornberg-Bauer, Erich; Sammeth, Michael; Feulner, Philine G D
Evaluating characteristics of de novo assembly software on 454 transcriptome data: a simulation approach Journal Article
In: PLoS One, vol. 7, no. 2, pp. e31410, 2012, ISSN: 1932-6203.
@article{pmid22384018,
title = {Evaluating characteristics of de novo assembly software on 454 transcriptome data: a simulation approach},
author = {Marvin Mundry and Erich Bornberg-Bauer and Michael Sammeth and Philine G D Feulner},
doi = {10.1371/journal.pone.0031410},
issn = {1932-6203},
year = {2012},
date = {2012-01-01},
journal = {PLoS One},
volume = {7},
number = {2},
pages = {e31410},
abstract = {BACKGROUND: The quantity of transcriptome data is rapidly increasing for non-model organisms. As sequencing technology advances, focus shifts towards solving bioinformatic challenges, of which sequence read assembly is the first task. Recent studies have compared the performance of different software to establish a best practice for transcriptome assembly. Here, we adapted a simulation approach to evaluate specific features of assembly programs on 454 data. The novelty of our study is that the simulation allows us to calculate a model assembly as reference point for comparison.nnFINDINGS: The simulation approach allows us to compare basic metrics of assemblies computed by different software applications (CAP3, MIRA, Newbler, and Oases) to a known optimal solution. We found MIRA and CAP3 are conservative in merging reads. This resulted in comparably high number of short contigs. In contrast, Newbler more readily merged reads into longer contigs, while Oases produced the overall shortest assembly. Due to the simulation approach, reads could be traced back to their correct placement within the transcriptome. Together with mapping reads onto the assembled contigs, we were able to evaluate ambiguity in the assemblies. This analysis further supported the conservative nature of MIRA and CAP3, which resulted in low proportions of chimeric contigs, but high redundancy. Newbler produced less redundancy, but the proportion of chimeric contigs was higher.nnCONCLUSION: Our evaluation of four assemblers suggested that MIRA and Newbler slightly outperformed the other programs, while showing contrasting characteristics. Oases did not perform very well on the 454 reads. Our evaluation indicated that the software was either conservative (MIRA) or liberal (Newbler) about merging reads into contigs. This suggested that in choosing an assembly program researchers should carefully consider their follow up analysis and consequences of the chosen approach to gain an assembly.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2011
ENCODE Project Consortium, incl. Michael Sammeth
A user's guide to the encyclopedia of DNA elements (ENCODE) Journal Article
In: PLoS Biol, vol. 9, no. 4, pp. e1001046, 2011, ISSN: 1545-7885.
@article{pmid21526222,
title = {A user's guide to the encyclopedia of DNA elements (ENCODE)},
author = { {ENCODE Project Consortium}, {incl. Michael Sammeth}},
doi = {10.1371/journal.pbio.1001046},
issn = {1545-7885},
year = {2011},
date = {2011-04-01},
urldate = {2011-04-01},
journal = {PLoS Biol},
volume = {9},
number = {4},
pages = {e1001046},
abstract = {The mission of the Encyclopedia of DNA Elements (ENCODE) Project is to enable the scientific and medical communities to interpret the human genome sequence and apply it to understand human biology and improve health. The ENCODE Consortium is integrating multiple technologies and approaches in a collective effort to discover and define the functional elements encoded in the human genome, including genes, transcripts, and transcriptional regulatory regions, together with their attendant chromatin states and DNA methylation patterns. In the process, standards to ensure high-quality data have been implemented, and novel algorithms have been developed to facilitate analysis. Data and derived results are made available through a freely accessible database. Here we provide an overview of the project and the resources it is generating and illustrate the application of ENCODE data to interpret the human genome.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2010
Montgomery, Stephen B; Sammeth, Micha; Gutierrez-Arcelus, Maria; Lach, Radoslaw P; Ingle, Catherine; Nisbett, James; Guigo, Roderic; Dermitzakis, Emmanouil T
Transcriptome genetics using second generation sequencing in a Caucasian population Journal Article
In: Nature, vol. 464, no. 7289, pp. 773–777, 2010, ISSN: 1476-4687.
@article{pmid20220756,
title = {Transcriptome genetics using second generation sequencing in a Caucasian population},
author = {Stephen B Montgomery and Micha Sammeth and Maria Gutierrez-Arcelus and Radoslaw P Lach and Catherine Ingle and James Nisbett and Roderic Guigo and Emmanouil T Dermitzakis},
doi = {10.1038/nature08903},
issn = {1476-4687},
year = {2010},
date = {2010-04-01},
journal = {Nature},
volume = {464},
number = {7289},
pages = {773--777},
abstract = {Gene expression is an important phenotype that informs about genetic and environmental effects on cellular state. Many studies have previously identified genetic variants for gene expression phenotypes using custom and commercially available microarrays. Second generation sequencing technologies are now providing unprecedented access to the fine structure of the transcriptome. We have sequenced the mRNA fraction of the transcriptome in 60 extended HapMap individuals of European descent and have combined these data with genetic variants from the HapMap3 project. We have quantified exon abundance based on read depth and have also developed methods to quantify whole transcript abundance. We have found that approximately 10 million reads of sequencing can provide access to the same dynamic range as arrays with better quantification of alternative and highly abundant transcripts. Correlation with SNPs (small nucleotide polymorphisms) leads to a larger discovery of eQTLs (expression quantitative trait loci) than with arrays. We also detect a substantial number of variants that influence the structure of mature transcripts indicating variants responsible for alternative splicing. Finally, measures of allele-specific expression allowed the identification of rare eQTLs and allelic differences in transcript structure. This analysis shows that high throughput sequencing technologies reveal new properties of genetic effects on the transcriptome and allow the exploration of genetic effects in cellular processes.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Horner, David Stephen; Pavesi, Giulio; Castrignanò, Tiziana; Meo, Paolo D'Onorio De; Liuni, Sabino; Sammeth, Michael; Picardi, Ernesto; Pesole, Graziano
Bioinformatics approaches for genomics and post genomics applications of next-generation sequencing Journal Article
In: Brief Bioinform, vol. 11, no. 2, pp. 181–197, 2010, ISSN: 1477-4054.
@article{pmid19864250,
title = {Bioinformatics approaches for genomics and post genomics applications of next-generation sequencing},
author = {David Stephen Horner and Giulio Pavesi and Tiziana Castrignanò and Paolo D'Onorio De Meo and Sabino Liuni and Michael Sammeth and Ernesto Picardi and Graziano Pesole},
doi = {10.1093/bib/bbp046},
issn = {1477-4054},
year = {2010},
date = {2010-03-01},
journal = {Brief Bioinform},
volume = {11},
number = {2},
pages = {181--197},
abstract = {Technical advances such as the development of molecular cloning, Sanger sequencing, PCR and oligonucleotide microarrays are key to our current capacity to sequence, annotate and study complete organismal genomes. Recent years have seen the development of a variety of so-called 'next-generation' sequencing platforms, with several others anticipated to become available shortly. The previously unimaginable scale and economy of these methods, coupled with their enthusiastic uptake by the scientific community and the potential for further improvements in accuracy and read length, suggest that these technologies are destined to make a huge and ongoing impact upon genomic and post-genomic biology. However, like the analysis of microarray data and the assembly and annotation of complete genome sequences from conventional sequencing data, the management and analysis of next-generation sequencing data requires (and indeed has already driven) the development of informatics tools able to assemble, map, and interpret huge quantities of relatively or extremely short nucleotide sequence data. Here we provide a broad overview of bioinformatics approaches that have been introduced for several genomics and functional genomics applications of next-generation sequencing.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2009
Tilgner, Hagen; Nikolaou, Christoforos; Althammer, Sonja; Sammeth, Michael; Beato, Miguel; Valcárcel, Juan; Guigó, Roderic
Nucleosome positioning as a determinant of exon recognition Journal Article
In: Nat Struct Mol Biol, vol. 16, no. 9, pp. 996–1001, 2009, ISSN: 1545-9985.
@article{pmid19684599,
title = {Nucleosome positioning as a determinant of exon recognition},
author = {Hagen Tilgner and Christoforos Nikolaou and Sonja Althammer and Michael Sammeth and Miguel Beato and Juan Valcárcel and Roderic Guigó},
doi = {10.1038/nsmb.1658},
issn = {1545-9985},
year = {2009},
date = {2009-09-01},
journal = {Nat Struct Mol Biol},
volume = {16},
number = {9},
pages = {996--1001},
abstract = {Chromatin structure influences transcription, but its role in subsequent RNA processing is unclear. Here we present analyses of high-throughput data that imply a relationship between nucleosome positioning and exon definition. First, we have found stable nucleosome occupancy within human and Caenorhabditis elegans exons that is stronger in exons with weak splice sites. Conversely, we have found that pseudoexons--intronic sequences that are not included in mRNAs but are flanked by strong splice sites--show nucleosome depletion. Second, the ratio between nucleosome occupancy within and upstream from the exons correlates with exon-inclusion levels. Third, nucleosomes are positioned central to exons rather than proximal to splice sites. These exonic nucleosomal patterns are also observed in non-expressed genes, suggesting that nucleosome marking of exons exists in the absence of transcription. Our analysis provides a framework that contributes to the understanding of splicing on the basis of chromatin architecture.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Sammeth, Michael
Complete alternative splicing events are bubbles in splicing graphs Journal Article
In: J Comput Biol, vol. 16, no. 8, pp. 1117–1140, 2009, ISSN: 1557-8666.
@article{pmid19689216,
title = {Complete alternative splicing events are bubbles in splicing graphs},
author = {Michael Sammeth},
doi = {10.1089/cmb.2009.0108},
issn = {1557-8666},
year = {2009},
date = {2009-08-01},
journal = {J Comput Biol},
volume = {16},
number = {8},
pages = {1117--1140},
abstract = {Eukaryotic splicing structures are known to involve a high degree of alternative forms derived from a premature transcript by alternative splicing (AS). With the advent of new sequencing technologies, evidence for new splice forms becomes increasingly available-bit by bit revealing that the true splicing diversity of "AS events" often comprises more than two alternatives and therefore cannot be sufficiently described by pairwise comparisons as conducted in analyzes hitherto. Especially, I emphasize on "complete" AS events which include all hitherto known variants of a splicing variation. Challenges emerge from the richness of data (millions of transcripts) and artifacts introduced during the technical process of obtaining transcript sequences ("noise")-especially when dealing with single-read sequences known as expressed sequence tags (ESTs). Herein, I describe a novel method to efficiently predict AS events in different resolutions ("dimensions") from transcript annotations that allows for combination of fragmented EST data with full-length cDNAs and can cope with large datasets containing noise. At the doorstep of many new splice forms becoming available by novel high-throughput sequencing technologies, the presented method helps to dynamically update AS databases. Applying this method to estimate the real complexity of alternative splicing, I found in human and murine annotations thousands of novel AS events that either have been disregarded or mischaracterized in earlier works. The growth of evidence for such events suggests that the number still keeps climbing. When considering complete events, the majority of exons that are observed as "mutually exclusive" in pairwise comparisons in fact involves at least one other alternative splice form that disagrees with their mutual exclusion. Similar observations also hold for the alternative skipping of two subsequent exons. Results suggest that the systematical analysis of complete AS events on large scale provides subtle insights in the mechanisms that drive (alternative) splicing.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2008
Sammeth, Michael; Foissac, Sylvain; Guigó, Roderic
A general definition and nomenclature for alternative splicing events Journal Article
In: PLoS Comput Biol, vol. 4, no. 8, pp. e1000147, 2008, ISSN: 1553-7358.
@article{pmid18688268,
title = {A general definition and nomenclature for alternative splicing events},
author = {Michael Sammeth and Sylvain Foissac and Roderic Guigó},
doi = {10.1371/journal.pcbi.1000147},
issn = {1553-7358},
year = {2008},
date = {2008-08-01},
journal = {PLoS Comput Biol},
volume = {4},
number = {8},
pages = {e1000147},
abstract = {Understanding the molecular mechanisms responsible for the regulation of the transcriptome present in eukaryotic cells is one of the most challenging tasks in the postgenomic era. In this regard, alternative splicing (AS) is a key phenomenon contributing to the production of different mature transcripts from the same primary RNA sequence. As a plethora of different transcript forms is available in databases, a first step to uncover the biology that drives AS is to identify the different types of reflected splicing variation. In this work, we present a general definition of the AS event along with a notation system that involves the relative positions of the splice sites. This nomenclature univocally and dynamically assigns a specific "AS code" to every possible pattern of splicing variation. On the basis of this definition and the corresponding codes, we have developed a computational tool (AStalavista) that automatically characterizes the complete landscape of AS events in a given transcript annotation of a genome, thus providing a platform to investigate the transcriptome diversity across genes, chromosomes, and species. Our analysis reveals that a substantial part--in human more than a quarter-of the observed splicing variations are ignored in common classification pipelines. We have used AStalavista to investigate and to compare the AS landscape of different reference annotation sets in human and in other metazoan species and found that proportions of AS events change substantially depending on the annotation protocol, species-specific attributes, and coding constraints acting on the transcripts. The AStalavista system therefore provides a general framework to conduct specific studies investigating the occurrence, impact, and regulation of AS.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Sammeth, Michael; Valiente, Gabriel; Guigó, Roderic
Bubbles: Alternative Splicing Events of Arbitrary Dimension in Splicing Graphs Proceedings Article
In: Vingron, Martin; Wong, Limsoon (Ed.): Research in Computational Molecular Biology, pp. 372–395, Springer Berlin Heidelberg, Berlin, Heidelberg, 2008, ISBN: 978-3-540-78839-3.
@inproceedings{10.1007/978-3-540-78839-3_32,
title = {Bubbles: Alternative Splicing Events of Arbitrary Dimension in Splicing Graphs},
author = {Michael Sammeth and Gabriel Valiente and Roderic Guigó},
editor = {Martin Vingron and Limsoon Wong},
isbn = {978-3-540-78839-3},
year = {2008},
date = {2008-01-01},
booktitle = {Research in Computational Molecular Biology},
pages = {372–395},
publisher = {Springer Berlin Heidelberg},
address = {Berlin, Heidelberg},
abstract = {Eukaryotic splicing structures are known to involve a high degree of alternative forms derived from a premature transcript by alternative splicing (AS). With the advent of new sequencing technologies, evidence for new splice forms becomes more and more easily available—bit by bit revealing that the true splicing diversity of ``AS events'' often comprises more than two alternatives and therefore cannot be sufficiently described by pairwise comparisons as conducted in analyzes hitherto. Further challenges emerge from the richness of data (millions of transcripts) and artifacts introduced during the technical process of obtaining transcript sequences (noise)—especially when dealing with single-read sequences known as expressed sequence tags (ESTs). We describe a novel method to efficiently predict AS events in different resolutions (i.e., dimensions) from transcript annotations that allows for combination of fragmented EST data with full-length cDNAs and can cope with large datasets containing noise. Applying this method to estimate the real complexity of alternative splicing, we found in human thousands of novel AS events that either have been disregarded or mischaracterized in earlier works. In fact, the majority of exons that are observed as ``mutually exclusive'' in pairwise comparisons truly involve at least one other alternative splice form that disagrees with their mutual exclusion. We identified four major classes that contain such ``optional'' neighboring exons and show that they clearly differ from each other in characteristics, especially in the length distribution of the middle intron.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}