2008
Lacroix, Vincent; Sammeth, Michael; Guigo, Roderic; Bergeron, Anne
Exact Transcriptome Reconstruction from Short Sequence Reads Proceedings Article
In: Crandall, Keith A.; Lagergren, Jens (Ed.): Algorithms in Bioinformatics, pp. 50–63, Springer Berlin Heidelberg, Berlin, Heidelberg, 2008, ISBN: 978-3-540-87361-7.
@inproceedings{10.1007/978-3-540-87361-7_5,
title = {Exact Transcriptome Reconstruction from Short Sequence Reads},
author = {Vincent Lacroix and Michael Sammeth and Roderic Guigo and Anne Bergeron},
editor = {Keith A. Crandall and Jens Lagergren},
isbn = {978-3-540-87361-7},
year = {2008},
date = {2008-01-01},
booktitle = {Algorithms in Bioinformatics},
pages = {50–63},
publisher = {Springer Berlin Heidelberg},
address = {Berlin, Heidelberg},
abstract = {In this paper we address the problem of characterizing the RNA complement of a given cell type, that is, the set of RNA species and their relative copy number, from a large set of short sequence reads which have been randomly sampled from the cell's RNA sequences through a sequencing experiment. We refer to this problem as the transcriptome reconstruction problem, and we specifically investigate, both theoretically and practically, the conditions under which the problem can be solved. We demonstrate that, even under the assumption of exact information, neither single read nor paired-end read sequences guarantee theoretically that the reconstruction problem has a unique solution. However, by investigating the behavior of the best annotated human gene set, we also show that, in practice, paired-end reads – but not single reads – may be sufficient to solve the vast majority of the transcript variants species and abundances. We finally show that, when we assume that the RNA species existing in the cell are known, single read sequences can effectively be used to infer transcript variant abundances.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
2007
Mellmann, Alexander; Weniger, Thomas; Berssenbrügge, Christoph; Rothgänger, Jörg; Sammeth, Michael; Stoye, Jens; Harmsen, Dag
In: BMC Microbiol, vol. 7, pp. 98, 2007, ISSN: 1471-2180.
@article{pmid17967176,
title = {Based Upon Repeat Pattern (BURP): an algorithm to characterize the long-term evolution of Staphylococcus aureus populations based on spa polymorphisms},
author = {Alexander Mellmann and Thomas Weniger and Christoph Berssenbrügge and Jörg Rothgänger and Michael Sammeth and Jens Stoye and Dag Harmsen},
doi = {10.1186/1471-2180-7-98},
issn = {1471-2180},
year = {2007},
date = {2007-10-01},
journal = {BMC Microbiol},
volume = {7},
pages = {98},
abstract = {BACKGROUND: For typing of Staphylococcus aureus, DNA sequencing of the repeat region of the protein A (spa) gene is a well established discriminatory method for outbreak investigations. Recently, it was hypothesized that this region also reflects long-term epidemiology. However, no automated and objective algorithm existed to cluster different repeat regions. In this study, the Based Upon Repeat Pattern (BURP) implementation that is a heuristic variant of the newly described EDSI algorithm was investigated to infer the clonal relatedness of different spa types. For calibration of BURP parameters, 400 representative S. aureus strains with different spa types were characterized by MLST and clustered using eBURST as "gold standard" for their phylogeny. Typing concordance analysis between eBURST and BURP clustering (spa-CC) were performed using all possible BURP parameters to determine their optimal combination. BURP was subsequently evaluated with a strain collection reflecting the breadth of diversity of S. aureus (JCM 2002; 40:4544).nnRESULTS: In total, the 400 strains exhibited 122 different MLST types. eBURST grouped them into 23 clonal complexes (CC; 354 isolates) and 33 singletons (46 isolates). BURP clustering of spa types using all possible parameter combinations and subsequent comparison with eBURST CCs resulted in concordances ranging from 8.2 to 96.2%. However, 96.2% concordance was reached only if spa types shorter than 8 repeats were excluded, which resulted in 37% excluded spa types. Therefore, the optimal combination of the BURP parameters was "exclude spa types shorter than 5 repeats" and "cluster spa types into spa-CC if cost distances are less than 4" exhibiting 95.3% concordance to eBURST. This algorithm identified 24 spa-CCs, 40 singletons, and excluded only 7.8% spa types. Analyzing the natural population with these parameters, the comparison of whole-genome micro-array groupings (at the level of 0.31 Pearson correlation index) and spa-CCs gave a concordance of 87.1%; BURP spa-CCs vs. manually grouped spa types resulted in 95.7% concordance.nnCONCLUSION: BURP is the first automated and objective tool to infer clonal relatedness from spa repeat regions. It is able to extract an evolutionary signal rather congruent to MLST and micro-array data.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Foissac, Sylvain; Sammeth, Michael
ASTALAVISTA: dynamic and flexible analysis of alternative splicing events in custom gene datasets Journal Article
In: Nucleic Acids Res, vol. 35, no. Web Server issue, pp. W297–W299, 2007, ISSN: 1362-4962.
@article{pmid17485470,
title = {ASTALAVISTA: dynamic and flexible analysis of alternative splicing events in custom gene datasets},
author = {Sylvain Foissac and Michael Sammeth},
doi = {10.1093/nar/gkm311},
issn = {1362-4962},
year = {2007},
date = {2007-07-01},
journal = {Nucleic Acids Res},
volume = {35},
number = {Web Server issue},
pages = {W297--W299},
abstract = {In the process of establishing more and more complete annotations of eukaryotic genomes, a constantly growing number of alternative splicing (AS) events has been reported over the last decade. Consequently, the increasing transcript coverage also revealed the real complexity of some variations in the exon-intron structure between transcript variants and the need for computational tools to address 'complex' AS events. ASTALAVISTA (alternative splicing transcriptional landscape visualization tool) employs an intuitive and complete notation system to univocally identify such events. The method extracts AS events dynamically from custom gene annotations, classifies them into groups of common types and visualizes a comprehensive picture of the resulting AS landscape. Thus, ASTALAVISTA can characterize AS for whole transcriptome data from reference annotations (GENCODE, REFSEQ, ENSEMBL) as well as for genes selected by the user according to common functional/structural attributes of interest: http://genome.imim.es/astalavista.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2006
Sammeth, Michael; Heringa, Jaap
Global multiple-sequence alignment with repeats Journal Article
In: Proteins, vol. 64, no. 1, pp. 263–274, 2006, ISSN: 1097-0134.
@article{pmid16609972,
title = {Global multiple-sequence alignment with repeats},
author = {Michael Sammeth and Jaap Heringa},
doi = {10.1002/prot.20957},
issn = {1097-0134},
year = {2006},
date = {2006-07-01},
journal = {Proteins},
volume = {64},
number = {1},
pages = {263--274},
abstract = {Repeating fragments in biological sequences are often essential for structure and function. Over the years, many methods have been developed to recognize repeats or to multiply align protein sequences. However, the integration of these two methodologies has been largely unexplored to date. Here, we present a new method capable of globally aligning multiple input sequences under the constraints of a given repeat analysis. The method supports different stringency modes to adapt to various levels of detail and reliability of the repeat information available.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Sammeth, Michael; Griebel, Thasso; Tille, Felix; Stoye, Jens
Panta rhei (QAlign2): an open graphical environment for sequence analysis Journal Article
In: Bioinformatics, vol. 22, no. 7, pp. 889–890, 2006, ISSN: 1367-4803.
@article{pmid16418234,
title = {Panta rhei (QAlign2): an open graphical environment for sequence analysis},
author = {Michael Sammeth and Thasso Griebel and Felix Tille and Jens Stoye},
doi = {10.1093/bioinformatics/btl007},
issn = {1367-4803},
year = {2006},
date = {2006-04-01},
journal = {Bioinformatics},
volume = {22},
number = {7},
pages = {889--890},
abstract = {MOTIVATION: The first version of the graphical multiple sequence alignment environment QAlign was published in 2003. Heavy response from the molecular-biological user community clearly demonstrated the need for such a platform.nnRESULTS: Panta rhei extends QAlign by several features. Major redesigns on the user interface, for instance, allow users to flexibily compose views for multiple projects. The new sequence viewer handles datasets with arbitrarily many and arbitrarily large sequences that may still be edited by guided block moving. More distance-based algorithms are available to interactively reconstruct phylogenetic trees which can now also be zoomed and navigated graphicaly.nnAVAILABILITY: Executables and the JAVA source code are available under the Apache license at http://gi.cebitec.uni-bielefeld.de/qalignnnCONTACT: qalign@cebitec.uni-bielefeld.de.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Sammeth, Michael; Stoye, Jens
Comparing tandem repeats with duplications and excisions of variable degree Journal Article
In: IEEE/ACM Trans Comput Biol Bioinform, vol. 3, no. 4, pp. 395–407, 2006, ISSN: 1545-5963.
@article{pmid17085848,
title = {Comparing tandem repeats with duplications and excisions of variable degree},
author = {Michael Sammeth and Jens Stoye},
doi = {10.1109/TCBB.2006.46},
issn = {1545-5963},
year = {2006},
date = {2006-01-01},
journal = {IEEE/ACM Trans Comput Biol Bioinform},
volume = {3},
number = {4},
pages = {395--407},
abstract = {Traditional sequence comparison by alignment employs a mutation model comprised of two events, substitutions and indels (insertions or deletions) of single positions. However, modern genetic analysis knows a variety of more complex mutation events (e.g., duplications, excisions, and rearrangements), especially regarding DNA. With ever more DNA sequence data becoming available, the need to accurately compare sequences which have clearly undergone more complicated types of mutational processes is becoming critical. Herein we introduce a new method for pairwise alignment and comparison of sequences with respect to the special evolution of tandem repeats: substitutions and indels of single positions and, additionally, duplications and excisions of variable degree (i.e., of one or more repeat copies simultaneously) are taken into account. To evaluate our method, we apply it to the spa VNTR (variable number of tandem repeats) cluster of Staphylococcus aureus, a bacterium of high medical importance.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2005
Sammeth, Michael; Weniger, Thomas; Harmsen, Dag; Stoye, Jens
Alignment of Tandem Repeats with Excision, Duplication, Substitution and Indels (EDSI) Proceedings Article
In: Casadio, Rita; Myers, Gene (Ed.): Algorithms in Bioinformatics, pp. 276–290, Springer Berlin Heidelberg, Berlin, Heidelberg, 2005, ISBN: 978-3-540-31812-5.
@inproceedings{10.1007/11557067_23,
title = {Alignment of Tandem Repeats with Excision, Duplication, Substitution and Indels (EDSI)},
author = {Michael Sammeth and Thomas Weniger and Dag Harmsen and Jens Stoye},
editor = {Rita Casadio and Gene Myers},
isbn = {978-3-540-31812-5},
year = {2005},
date = {2005-01-01},
booktitle = {Algorithms in Bioinformatics},
pages = {276–290},
publisher = {Springer Berlin Heidelberg},
address = {Berlin, Heidelberg},
abstract = {Traditional sequence comparison by alignment applies a mutation model comprising two events, substitutions and indels (insertions or deletions) of single positions (SI). However, modern genetic analysis knows a variety of more complex mutation events (e.g., duplications, excisions and rearrangements), especially regarding DNA. With the ever more DNA sequence data becoming available, the need to accurately compare sequences which have clearly undergone more complicated types of mutational processes is becoming critical.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
2003
Harmsen, Dag; Dostal, Stefan; Roth, Andreas; Niemann, Stefan; Rothgänger, Jörg; Sammeth, Michael; Albert, Jürgen; Frosch, Matthias; Richter, Elvira
RIDOM: comprehensive and public sequence database for identification of Mycobacterium species Journal Article
In: BMC Infect Dis, vol. 3, pp. 26, 2003, ISSN: 1471-2334.
@article{pmid14611664,
title = {RIDOM: comprehensive and public sequence database for identification of Mycobacterium species},
author = {Dag Harmsen and Stefan Dostal and Andreas Roth and Stefan Niemann and Jörg Rothgänger and Michael Sammeth and Jürgen Albert and Matthias Frosch and Elvira Richter},
doi = {10.1186/1471-2334-3-26},
issn = {1471-2334},
year = {2003},
date = {2003-11-01},
journal = {BMC Infect Dis},
volume = {3},
pages = {26},
abstract = {BACKGROUND: Molecular identification of Mycobacterium species has two primary advantages when compared to phenotypic identification: rapid turn-around time and improved accuracy. The information content of the 5' end of the 16S ribosomal RNA gene (16S rDNA) is sufficient for identification of most bacterial species. However, reliable sequence-based identification is hampered by many faulty and some missing sequence entries in publicly accessible databases.nnMETHODS: In order to establish an improved 16S rDNA sequence database for the identification of clinical and environmental isolates, we sequenced both strands of the 5' end of 16S rDNA (Escherichia coli positions 54 to 510) from 199 mycobacterial culture collection isolates. All validly described species (n = 89; up to March 21, 2000) and nearly all published sequevar variants were included. If the 16S rDNA sequences were not discriminatory, the internal transcribed spacer (ITS) region sequences (n = 84) were also determined.nnRESULTS: Using 5'-16S rDNA sequencing a total of 64 different mycobacterial species (71.9%) could be identified. With the additional input of the ITS sequence, a further 16 species or subspecies could be differentiated. Only Mycobacterium tuberculosis complex species, M. marinum/M. ulcerans and the M. avium subspecies could not be differentiated using 5'-16S rDNA or ITS sequencing. A total of 77 culture collection strain sequences, exhibiting an overlap of at least 80% and identical by strain number to the isolates used in this study, were found in the GenBank. Comparing these with our sequences revealed that an average of 4.31 nucleotide differences (SD +/- 0.57) were present.nnCONCLUSIONS: The data from this analysis show that it is possible to differentiate most mycobacterial species by sequence analysis of partial 16S rDNA. The high-quality sequences reported here, together with ancillary information (e.g., taxonomic, medical), are available in a public database, which is currently being expanded in the RIDOM project http://www.ridom-rdna.de), for similarity searches.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Sammeth, Michael; Morgenstern, Burkhard; Stoye, Jens
Divide-and-conquer multiple alignment with segment-based constraints Journal Article
In: Bioinformatics, vol. 19 Suppl 2, pp. ii189–ii195, 2003, ISSN: 1367-4811.
@article{pmid14534189,
title = {Divide-and-conquer multiple alignment with segment-based constraints},
author = {Michael Sammeth and Burkhard Morgenstern and Jens Stoye},
doi = {10.1093/bioinformatics/btg1077},
issn = {1367-4811},
year = {2003},
date = {2003-10-01},
journal = {Bioinformatics},
volume = {19 Suppl 2},
pages = {ii189--ii195},
abstract = {A large number of methods for multiple sequence alignment are currently available. Recent benchmarking tests demonstrated that strengths and drawbacks of these methods differ substantially. Global strategies can be outperformed by approaches based on local similarities and vice versa, depending on the characteristics of the input sequences. In recent years, mixed approaches that include both global and local features have shown promising results. Herein, we introduce a new algorithm for multiple sequence alignment that integrates the global divide-and-conquer approach with the local segment-based approach, thereby combining the strengths of those two strategies.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Sammeth, M; Rothgänger, J; Esser, W; Albert, J; Stoye, J; Harmsen, D
QAlign: quality-based multiple alignments with dynamic phylogenetic analysis Journal Article
In: Bioinformatics, vol. 19, no. 12, pp. 1592–1593, 2003, ISSN: 1367-4803.
@article{pmid12912847,
title = {QAlign: quality-based multiple alignments with dynamic phylogenetic analysis},
author = {M Sammeth and J Rothgänger and W Esser and J Albert and J Stoye and D Harmsen},
doi = {10.1093/bioinformatics/btg197},
issn = {1367-4803},
year = {2003},
date = {2003-08-01},
journal = {Bioinformatics},
volume = {19},
number = {12},
pages = {1592--1593},
abstract = {Integrating different alignment strategies, a layout editor and tools deriving phylogenetic trees in a 'multiple alignment environment' helps to investigate and enhance results of multiple sequence alignment by hand. QAlign combines algorithms for fast progressive and accurate simultaneous multiple alignment with a versatile editor and a dynamic phylogenetic analysis in a convenient graphical user interface.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}