2009
Sammeth, Michael
Complete alternative splicing events are bubbles in splicing graphs Journal Article
In: J Comput Biol, vol. 16, no. 8, pp. 1117–1140, 2009, ISSN: 1557-8666.
@article{pmid19689216,
title = {Complete alternative splicing events are bubbles in splicing graphs},
author = {Michael Sammeth},
doi = {10.1089/cmb.2009.0108},
issn = {1557-8666},
year = {2009},
date = {2009-08-01},
journal = {J Comput Biol},
volume = {16},
number = {8},
pages = {1117--1140},
abstract = {Eukaryotic splicing structures are known to involve a high degree of alternative forms derived from a premature transcript by alternative splicing (AS). With the advent of new sequencing technologies, evidence for new splice forms becomes increasingly available-bit by bit revealing that the true splicing diversity of "AS events" often comprises more than two alternatives and therefore cannot be sufficiently described by pairwise comparisons as conducted in analyzes hitherto. Especially, I emphasize on "complete" AS events which include all hitherto known variants of a splicing variation. Challenges emerge from the richness of data (millions of transcripts) and artifacts introduced during the technical process of obtaining transcript sequences ("noise")-especially when dealing with single-read sequences known as expressed sequence tags (ESTs). Herein, I describe a novel method to efficiently predict AS events in different resolutions ("dimensions") from transcript annotations that allows for combination of fragmented EST data with full-length cDNAs and can cope with large datasets containing noise. At the doorstep of many new splice forms becoming available by novel high-throughput sequencing technologies, the presented method helps to dynamically update AS databases. Applying this method to estimate the real complexity of alternative splicing, I found in human and murine annotations thousands of novel AS events that either have been disregarded or mischaracterized in earlier works. The growth of evidence for such events suggests that the number still keeps climbing. When considering complete events, the majority of exons that are observed as "mutually exclusive" in pairwise comparisons in fact involves at least one other alternative splice form that disagrees with their mutual exclusion. Similar observations also hold for the alternative skipping of two subsequent exons. Results suggest that the systematical analysis of complete AS events on large scale provides subtle insights in the mechanisms that drive (alternative) splicing.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2008
Sammeth, Michael; Foissac, Sylvain; Guigó, Roderic
A general definition and nomenclature for alternative splicing events Journal Article
In: PLoS Comput Biol, vol. 4, no. 8, pp. e1000147, 2008, ISSN: 1553-7358.
@article{pmid18688268,
title = {A general definition and nomenclature for alternative splicing events},
author = {Michael Sammeth and Sylvain Foissac and Roderic Guigó},
doi = {10.1371/journal.pcbi.1000147},
issn = {1553-7358},
year = {2008},
date = {2008-08-01},
journal = {PLoS Comput Biol},
volume = {4},
number = {8},
pages = {e1000147},
abstract = {Understanding the molecular mechanisms responsible for the regulation of the transcriptome present in eukaryotic cells is one of the most challenging tasks in the postgenomic era. In this regard, alternative splicing (AS) is a key phenomenon contributing to the production of different mature transcripts from the same primary RNA sequence. As a plethora of different transcript forms is available in databases, a first step to uncover the biology that drives AS is to identify the different types of reflected splicing variation. In this work, we present a general definition of the AS event along with a notation system that involves the relative positions of the splice sites. This nomenclature univocally and dynamically assigns a specific "AS code" to every possible pattern of splicing variation. On the basis of this definition and the corresponding codes, we have developed a computational tool (AStalavista) that automatically characterizes the complete landscape of AS events in a given transcript annotation of a genome, thus providing a platform to investigate the transcriptome diversity across genes, chromosomes, and species. Our analysis reveals that a substantial part--in human more than a quarter-of the observed splicing variations are ignored in common classification pipelines. We have used AStalavista to investigate and to compare the AS landscape of different reference annotation sets in human and in other metazoan species and found that proportions of AS events change substantially depending on the annotation protocol, species-specific attributes, and coding constraints acting on the transcripts. The AStalavista system therefore provides a general framework to conduct specific studies investigating the occurrence, impact, and regulation of AS.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Sammeth, Michael; Valiente, Gabriel; Guigó, Roderic
Bubbles: Alternative Splicing Events of Arbitrary Dimension in Splicing Graphs Proceedings Article
In: Vingron, Martin; Wong, Limsoon (Ed.): Research in Computational Molecular Biology, pp. 372–395, Springer Berlin Heidelberg, Berlin, Heidelberg, 2008, ISBN: 978-3-540-78839-3.
@inproceedings{10.1007/978-3-540-78839-3_32,
title = {Bubbles: Alternative Splicing Events of Arbitrary Dimension in Splicing Graphs},
author = {Michael Sammeth and Gabriel Valiente and Roderic Guigó},
editor = {Martin Vingron and Limsoon Wong},
isbn = {978-3-540-78839-3},
year = {2008},
date = {2008-01-01},
booktitle = {Research in Computational Molecular Biology},
pages = {372–395},
publisher = {Springer Berlin Heidelberg},
address = {Berlin, Heidelberg},
abstract = {Eukaryotic splicing structures are known to involve a high degree of alternative forms derived from a premature transcript by alternative splicing (AS). With the advent of new sequencing technologies, evidence for new splice forms becomes more and more easily available—bit by bit revealing that the true splicing diversity of ``AS events'' often comprises more than two alternatives and therefore cannot be sufficiently described by pairwise comparisons as conducted in analyzes hitherto. Further challenges emerge from the richness of data (millions of transcripts) and artifacts introduced during the technical process of obtaining transcript sequences (noise)—especially when dealing with single-read sequences known as expressed sequence tags (ESTs). We describe a novel method to efficiently predict AS events in different resolutions (i.e., dimensions) from transcript annotations that allows for combination of fragmented EST data with full-length cDNAs and can cope with large datasets containing noise. Applying this method to estimate the real complexity of alternative splicing, we found in human thousands of novel AS events that either have been disregarded or mischaracterized in earlier works. In fact, the majority of exons that are observed as ``mutually exclusive'' in pairwise comparisons truly involve at least one other alternative splice form that disagrees with their mutual exclusion. We identified four major classes that contain such ``optional'' neighboring exons and show that they clearly differ from each other in characteristics, especially in the length distribution of the middle intron.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Lacroix, Vincent; Sammeth, Michael; Guigo, Roderic; Bergeron, Anne
Exact Transcriptome Reconstruction from Short Sequence Reads Proceedings Article
In: Crandall, Keith A.; Lagergren, Jens (Ed.): Algorithms in Bioinformatics, pp. 50–63, Springer Berlin Heidelberg, Berlin, Heidelberg, 2008, ISBN: 978-3-540-87361-7.
@inproceedings{10.1007/978-3-540-87361-7_5,
title = {Exact Transcriptome Reconstruction from Short Sequence Reads},
author = {Vincent Lacroix and Michael Sammeth and Roderic Guigo and Anne Bergeron},
editor = {Keith A. Crandall and Jens Lagergren},
isbn = {978-3-540-87361-7},
year = {2008},
date = {2008-01-01},
booktitle = {Algorithms in Bioinformatics},
pages = {50–63},
publisher = {Springer Berlin Heidelberg},
address = {Berlin, Heidelberg},
abstract = {In this paper we address the problem of characterizing the RNA complement of a given cell type, that is, the set of RNA species and their relative copy number, from a large set of short sequence reads which have been randomly sampled from the cell's RNA sequences through a sequencing experiment. We refer to this problem as the transcriptome reconstruction problem, and we specifically investigate, both theoretically and practically, the conditions under which the problem can be solved. We demonstrate that, even under the assumption of exact information, neither single read nor paired-end read sequences guarantee theoretically that the reconstruction problem has a unique solution. However, by investigating the behavior of the best annotated human gene set, we also show that, in practice, paired-end reads – but not single reads – may be sufficient to solve the vast majority of the transcript variants species and abundances. We finally show that, when we assume that the RNA species existing in the cell are known, single read sequences can effectively be used to infer transcript variant abundances.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
2007
Mellmann, Alexander; Weniger, Thomas; Berssenbrügge, Christoph; Rothgänger, Jörg; Sammeth, Michael; Stoye, Jens; Harmsen, Dag
In: BMC Microbiol, vol. 7, pp. 98, 2007, ISSN: 1471-2180.
@article{pmid17967176,
title = {Based Upon Repeat Pattern (BURP): an algorithm to characterize the long-term evolution of Staphylococcus aureus populations based on spa polymorphisms},
author = {Alexander Mellmann and Thomas Weniger and Christoph Berssenbrügge and Jörg Rothgänger and Michael Sammeth and Jens Stoye and Dag Harmsen},
doi = {10.1186/1471-2180-7-98},
issn = {1471-2180},
year = {2007},
date = {2007-10-01},
journal = {BMC Microbiol},
volume = {7},
pages = {98},
abstract = {BACKGROUND: For typing of Staphylococcus aureus, DNA sequencing of the repeat region of the protein A (spa) gene is a well established discriminatory method for outbreak investigations. Recently, it was hypothesized that this region also reflects long-term epidemiology. However, no automated and objective algorithm existed to cluster different repeat regions. In this study, the Based Upon Repeat Pattern (BURP) implementation that is a heuristic variant of the newly described EDSI algorithm was investigated to infer the clonal relatedness of different spa types. For calibration of BURP parameters, 400 representative S. aureus strains with different spa types were characterized by MLST and clustered using eBURST as "gold standard" for their phylogeny. Typing concordance analysis between eBURST and BURP clustering (spa-CC) were performed using all possible BURP parameters to determine their optimal combination. BURP was subsequently evaluated with a strain collection reflecting the breadth of diversity of S. aureus (JCM 2002; 40:4544).nnRESULTS: In total, the 400 strains exhibited 122 different MLST types. eBURST grouped them into 23 clonal complexes (CC; 354 isolates) and 33 singletons (46 isolates). BURP clustering of spa types using all possible parameter combinations and subsequent comparison with eBURST CCs resulted in concordances ranging from 8.2 to 96.2%. However, 96.2% concordance was reached only if spa types shorter than 8 repeats were excluded, which resulted in 37% excluded spa types. Therefore, the optimal combination of the BURP parameters was "exclude spa types shorter than 5 repeats" and "cluster spa types into spa-CC if cost distances are less than 4" exhibiting 95.3% concordance to eBURST. This algorithm identified 24 spa-CCs, 40 singletons, and excluded only 7.8% spa types. Analyzing the natural population with these parameters, the comparison of whole-genome micro-array groupings (at the level of 0.31 Pearson correlation index) and spa-CCs gave a concordance of 87.1%; BURP spa-CCs vs. manually grouped spa types resulted in 95.7% concordance.nnCONCLUSION: BURP is the first automated and objective tool to infer clonal relatedness from spa repeat regions. It is able to extract an evolutionary signal rather congruent to MLST and micro-array data.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Foissac, Sylvain; Sammeth, Michael
ASTALAVISTA: dynamic and flexible analysis of alternative splicing events in custom gene datasets Journal Article
In: Nucleic Acids Res, vol. 35, no. Web Server issue, pp. W297–W299, 2007, ISSN: 1362-4962.
@article{pmid17485470,
title = {ASTALAVISTA: dynamic and flexible analysis of alternative splicing events in custom gene datasets},
author = {Sylvain Foissac and Michael Sammeth},
doi = {10.1093/nar/gkm311},
issn = {1362-4962},
year = {2007},
date = {2007-07-01},
journal = {Nucleic Acids Res},
volume = {35},
number = {Web Server issue},
pages = {W297--W299},
abstract = {In the process of establishing more and more complete annotations of eukaryotic genomes, a constantly growing number of alternative splicing (AS) events has been reported over the last decade. Consequently, the increasing transcript coverage also revealed the real complexity of some variations in the exon-intron structure between transcript variants and the need for computational tools to address 'complex' AS events. ASTALAVISTA (alternative splicing transcriptional landscape visualization tool) employs an intuitive and complete notation system to univocally identify such events. The method extracts AS events dynamically from custom gene annotations, classifies them into groups of common types and visualizes a comprehensive picture of the resulting AS landscape. Thus, ASTALAVISTA can characterize AS for whole transcriptome data from reference annotations (GENCODE, REFSEQ, ENSEMBL) as well as for genes selected by the user according to common functional/structural attributes of interest: http://genome.imim.es/astalavista.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2006
Sammeth, Michael; Heringa, Jaap
Global multiple-sequence alignment with repeats Journal Article
In: Proteins, vol. 64, no. 1, pp. 263–274, 2006, ISSN: 1097-0134.
@article{pmid16609972,
title = {Global multiple-sequence alignment with repeats},
author = {Michael Sammeth and Jaap Heringa},
doi = {10.1002/prot.20957},
issn = {1097-0134},
year = {2006},
date = {2006-07-01},
journal = {Proteins},
volume = {64},
number = {1},
pages = {263--274},
abstract = {Repeating fragments in biological sequences are often essential for structure and function. Over the years, many methods have been developed to recognize repeats or to multiply align protein sequences. However, the integration of these two methodologies has been largely unexplored to date. Here, we present a new method capable of globally aligning multiple input sequences under the constraints of a given repeat analysis. The method supports different stringency modes to adapt to various levels of detail and reliability of the repeat information available.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Sammeth, Michael; Griebel, Thasso; Tille, Felix; Stoye, Jens
Panta rhei (QAlign2): an open graphical environment for sequence analysis Journal Article
In: Bioinformatics, vol. 22, no. 7, pp. 889–890, 2006, ISSN: 1367-4803.
@article{pmid16418234,
title = {Panta rhei (QAlign2): an open graphical environment for sequence analysis},
author = {Michael Sammeth and Thasso Griebel and Felix Tille and Jens Stoye},
doi = {10.1093/bioinformatics/btl007},
issn = {1367-4803},
year = {2006},
date = {2006-04-01},
journal = {Bioinformatics},
volume = {22},
number = {7},
pages = {889--890},
abstract = {MOTIVATION: The first version of the graphical multiple sequence alignment environment QAlign was published in 2003. Heavy response from the molecular-biological user community clearly demonstrated the need for such a platform.nnRESULTS: Panta rhei extends QAlign by several features. Major redesigns on the user interface, for instance, allow users to flexibily compose views for multiple projects. The new sequence viewer handles datasets with arbitrarily many and arbitrarily large sequences that may still be edited by guided block moving. More distance-based algorithms are available to interactively reconstruct phylogenetic trees which can now also be zoomed and navigated graphicaly.nnAVAILABILITY: Executables and the JAVA source code are available under the Apache license at http://gi.cebitec.uni-bielefeld.de/qalignnnCONTACT: qalign@cebitec.uni-bielefeld.de.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Sammeth, Michael; Stoye, Jens
Comparing tandem repeats with duplications and excisions of variable degree Journal Article
In: IEEE/ACM Trans Comput Biol Bioinform, vol. 3, no. 4, pp. 395–407, 2006, ISSN: 1545-5963.
@article{pmid17085848,
title = {Comparing tandem repeats with duplications and excisions of variable degree},
author = {Michael Sammeth and Jens Stoye},
doi = {10.1109/TCBB.2006.46},
issn = {1545-5963},
year = {2006},
date = {2006-01-01},
journal = {IEEE/ACM Trans Comput Biol Bioinform},
volume = {3},
number = {4},
pages = {395--407},
abstract = {Traditional sequence comparison by alignment employs a mutation model comprised of two events, substitutions and indels (insertions or deletions) of single positions. However, modern genetic analysis knows a variety of more complex mutation events (e.g., duplications, excisions, and rearrangements), especially regarding DNA. With ever more DNA sequence data becoming available, the need to accurately compare sequences which have clearly undergone more complicated types of mutational processes is becoming critical. Herein we introduce a new method for pairwise alignment and comparison of sequences with respect to the special evolution of tandem repeats: substitutions and indels of single positions and, additionally, duplications and excisions of variable degree (i.e., of one or more repeat copies simultaneously) are taken into account. To evaluate our method, we apply it to the spa VNTR (variable number of tandem repeats) cluster of Staphylococcus aureus, a bacterium of high medical importance.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2005
Sammeth, Michael; Weniger, Thomas; Harmsen, Dag; Stoye, Jens
Alignment of Tandem Repeats with Excision, Duplication, Substitution and Indels (EDSI) Proceedings Article
In: Casadio, Rita; Myers, Gene (Ed.): Algorithms in Bioinformatics, pp. 276–290, Springer Berlin Heidelberg, Berlin, Heidelberg, 2005, ISBN: 978-3-540-31812-5.
@inproceedings{10.1007/11557067_23,
title = {Alignment of Tandem Repeats with Excision, Duplication, Substitution and Indels (EDSI)},
author = {Michael Sammeth and Thomas Weniger and Dag Harmsen and Jens Stoye},
editor = {Rita Casadio and Gene Myers},
isbn = {978-3-540-31812-5},
year = {2005},
date = {2005-01-01},
booktitle = {Algorithms in Bioinformatics},
pages = {276–290},
publisher = {Springer Berlin Heidelberg},
address = {Berlin, Heidelberg},
abstract = {Traditional sequence comparison by alignment applies a mutation model comprising two events, substitutions and indels (insertions or deletions) of single positions (SI). However, modern genetic analysis knows a variety of more complex mutation events (e.g., duplications, excisions and rearrangements), especially regarding DNA. With the ever more DNA sequence data becoming available, the need to accurately compare sequences which have clearly undergone more complicated types of mutational processes is becoming critical.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}