publications.bib
@inproceedings{GambinEtAlBIOSTEC11,
author = {Gambin, Anna and Lasota, S{\l}awomir and Startek, Micha{\l} and Sykulski, Macieij and No{\'e}, Laurent and Kucherov, Gregory},
title = {Subset seed extension to {P}rotein {BLAST}},
booktitle = {Proceedings of the International Conference on Bioinformatics Models, Methods and Algorithms (BIOINFORMATICS 2011), January 26-29 2011, Rome (Italy)},
pages = {149--158},
month = {January},
year = {2011},
abstract = {The seeding technique became central in the theory of
sequence alignment and there are several efficient tools applying seeds to DNA
homology search. Recently, a concept of subset seeds has been proposed for
similarity search in protein sequences. We experimentally evaluate the
applicability of subset seeds to protein homology search. We advocate the use
of multiple subset seeds derived from a hierarchical tree of amino acid
residues. Our method computes, by an evolutionary algorithm, seeds that are
specifically designed for a given protein family. The representation of seeds
by deterministic finite automata (DFAs) is developed and built into the
NCBI-BLAST software. This extended tool, named SeedBLAST, is compared to the
original NCBI-BLAST on the GPCR protein family. Our results demonstrate a clear
superiority of SeedBLAST in terms of efficiency, especially in the case of
twilight zone hits. SeedBLAST is an open source software freely available
http://bioputer.mimuw.edu.pl/papers/sblast. Supplementary material and user
manual are also provided.},
doi = {10.5220/0003147601490158},
pdf = {http://www.lifl.fr/~noe/files/pp_BIOSTEC11.pdf},
hal-url = {http://hal.inria.fr/inria-00609791/en/},
opteditor = {},
optvolume = {},
optnumber = {},
optseries = {},
optaddress = {},
optorganization = {},
publisher = {{S}ci{T}e{P}ress Digital Library},
optnote = {},
optannote = {}
}
@inproceedings{NoeGirdeaKucherovRECOMB10,
author = {No{\'e}, Laurent and G{\^i}rdea, Marta and Kucherov, Gregory},
title = {Seed design framework for mapping {SOLiD} reads},
booktitle = {Proceedings of the 14th Annual International Conference on Research in Computational Molecular Biology ({RECOMB}), April 25-28, 2010, Lisbon (Portugal)},
pages = {384--396},
year = {2010},
month = {April},
editor = {Berger, B.},
volume = {6044},
series = {Lecture Notes in Computer Science},
doi = {10.1007/978-3-642-12683-3_25},
url = {http://www.springerlink.com/content/41535x341gu34131/},
pdf = {http://www.lifl.fr/~noe/files/pp_RECOMB10.pdf},
postscript = {http://www.lifl.fr/~noe/files/pp_RECOMB10.ps.gz},
hal-url = {http://hal.inria.fr/inria-00484642/en/},
optpubmed-url = {},
eprint = {1006.2625},
abstract = {The advent of high-throughput sequencing technologies
constituted a major advance in genomic studies, offering new prospects in a
wide range of applications. We propose a rigorous and flexible algorithmic
solution to mapping SOLiD color-space reads to a reference genome. The solution
relies on an advanced method of seed design that uses a faithful probabilistic
model of read matches and, on the other hand, a novel seeding principle
especially adapted to read mapping. Our method can handle both lossy and
lossless frameworks and is able to distinguish, at the level of seed design,
between SNPs and reading errors. We illustrate our approach by several seed
designs and demonstrate their efficiency.},
publisher = {Springer},
inria = {Sequoia},
labo = {dans},
x-editorial-board = {yes},
x-international-audience = {yes},
x-pays = {RU},
aeres = {ACT},
selectif = {oui},
optnote = {(submitted: 176, accepted: 36, acceptance rate: 0.20)}
}
@inproceedings{GirdeaNoeKucherovWABI09,
author = {G{\^i}rdea, Marta and Kucherov, Gregory and No{\'e}, Laurent},
title = {Back-translation for discovering distant protein homologies},
booktitle = {Proceedings of the 9th International Workshop in Algorithms in Bioinformatics ({WABI}), Philadelphia ({USA})},
pages = {108--120},
year = {2009},
month = {September},
editor = {Salzberg, S.L. and Warnow, T.},
volume = {5724},
series = {Lecture Notes in Computer Science},
publisher = {Springer},
doi = {10.1007/978-3-642-04241-6_10},
url = {http://www.springerlink.com/content/3236004m84465n7j/},
pdf = {http://www.lifl.fr/~noe/files/pp_WABI09.pdf},
postscript = {http://www.lifl.fr/~noe/files/pp_WABI09.ps.gz},
hal-url = {http://hal.inria.fr/inria-00448741/en/},
eprint = {1001.4603},
abstract = {Frameshift mutations in protein-coding DNA sequences produce
a drastic change in the resulting protein sequence, which prevents classic
protein alignment methods from revealing the proteins' common origin. Moreover,
when a large number of substitutions are additionally involved in the
divergence, the homology detection becomes difficult even at the DNA level. To
cope with this situation, we propose a novel method to infer distant homology
relations of two proteins, that accounts for frameshift and point mutations
that may have affected the coding sequences. We design a dynamic programming
alignment algorithm over memory-efficient graph representations of the complete
set of putative DNA sequences of each protein, with the goal of determining the
two putative DNA sequences which have the best scoring alignment under a
powerful scoring system designed to reflect the most probable evolutionary
process. This allows us to uncover evolutionary information that is not
captured by traditional alignment methods, which is confirmed by biologically
significant examples.},
inria = {Sequoia},
labo = {dans},
x-editorial-board = {yes},
x-international-audience = {yes},
x-proceedings = {yes},
aeres = {ACT},
selectif = {oui},
optnote = {(submitted: 90, accepted: 34, acceptance rate: 0.38)}
}
@inproceedings{UricaruEtAlJOBIM09,
author = {Uricaru, Raluca and Michotey, C{\'e}lia and No{\'e}, Laurent and Chiapello, H{\'e}l{\`e}ne and Rivals, {\'E}ric},
title = {Improved sensitivity and reliability of anchor based genome alignment},
booktitle = {Proceedings of the 10th Open Days in Biology, Computer Science and Mathematics (JOBIM), June 9-11, 2009, Nantes (France)},
pages = {31--36},
year = {2009},
month = {June},
opturl = {},
pdf = {http://www.lirmm.fr/~uricaru/articles/jobim26.pdf},
hal-url = {http://hal.inria.fr/lirmm-00407215/en/},
opteprint = {},
abstract = {Whole genome alignment is a challenging problem in
computational comparative genomics. It is essential for the functional
annotation of genomes, the understanding of their evolution, and for
phylogenomics. Many global alignment programs are heuristic variations on the
anchor based strategy, which relies on the initial detection of similarities
and their selection in an ordered chain. Considering that alignment tools fail
to align some pairs of bacterial strains, we investigate whether this is
intrinsically due to the strategy or to a lack of sensitivity of the similarity
detection method. For this, we implement and compare 6 programs based on three
different detection methods (from exact matches to local alignments) on a large
benchmark set. Our results suggest that the sensitivity of well known methods,
like {MGA} or {Mauve}, can be greatly improved in the case of divergent genomes
if one exploits spaced seeds at the detection phase. In other cases, such
methods yield alignments that cover nearly the whole genome. Then, we focus on
global reliability of alignments: should an aligned pair of segments be
included in the global genome alignment? We investigate this reliability
according to both the segment ”alignability” and to inclusion of orthologs.
Again, we provide evidence that for both close and divergent genomes, one of
our programs, {YH}, achieves alignments with sometimes a lower coverage, but a
higher inclusion of orthologs. It opens the way to the first reliable
alignments for some highly divergent species like Buchnera aphidicola or
Prochlorococcus marinus.},
optnote = {},
optannote = {}
}
@inproceedings{RoytbergEtAlALBIO08,
author = {Roytberg, Mikhail A. and Gambin, Anna and No{\'e}, Laurent and Lasota, S{\l}awomir and Furletova, Eugenia and Szczurek, Ewa and Kucherov, Gregory},
title = {Efficient seeding techniques for protein similarity search},
booktitle = {Bioinformatics Research and Development, Proceedings of the 2nd International Conference BIRD 2008, Vienna (Austria), July 7-9, 2008},
pages = {466--478},
year = {2008},
month = {July},
editor = {Elloumi, M and K\"{u}ng, J. and Linial, M. and Murphy, R.F. and Schneider, K. and Toma, C.},
volume = {13},
series = {Communications in Computer and Information Science},
publisher = {Springer},
doi = {10.1007/978-3-540-70600-7_36},
url = {http://www.springerlink.com/content/m3560l36r573xjr5},
hal-url = {http://hal.inria.fr/inria-00335564/en/},
eprint = {0810.5434},
pdf = {http://www.lifl.fr/~noe/files/pp_ALBIO08.pdf},
postscript = {http://www.lifl.fr/~noe/files/pp_ALBIO08.ps.gz},
abstract = {We apply the concept of subset seeds proposed in [A unifying
framework for seed sensitivity and its application to subset seeds] to
similarity search in protein sequences. The main question studied is the
design of efficient seed alphabets to construct seeds with optimal sensitivity/
selectivity trade-offs. We propose several different design methods
and use them to construct several alphabets.We then perform an analysis
of seeds built over those alphabet and compare them with the standard
Blastp seeding method [2,3], as well as with the family of vector seeds
proposed in [4]. While the formalism of subset seed is less expressive
(but less costly to implement) than the accumulative principle used in
Blastp and vector seeds, our seeds show a similar or even better performance
than Blastp on Bernoulli models of proteins compatible with
the common BLOSUM62 matrix.},
inria = {Sequoia},
labo = {dans},
x-editorial-board = {yes},
x-international-audience = {yes},
x-proceedings = {yes},
x-pays = {RU,PL},
aeres = {ACT},
selectif = {non},
optnote = {(submitted: 61, accepted: 30, acceptance rate: 0.50)}
}
@inproceedings{KucherovNoeRoytbergCIAA07,
author = {Kucherov, Gregory and No{\'e}, Laurent and Roytberg, Mikhail A.},
title = {Subset Seed Automaton},
booktitle = {Proceedings of the 12th International {C}onference on
{I}mplementation and {A}pplication of {A}utomata ({CIAA}), July 16-18, 2007, Prague (Czech Republic)},
pages = {180--191},
year = {2007},
month = {July},
editor = {Holub, J. and Zdarek, J.},
volume = {4783},
series = {Lecture Notes in Computer Science},
publisher = {Springer},
doi = {10.1007/978-3-540-76336-9_18},
url = {http://www.springerlink.com/content/y824l20554002756},
pdf = {http://www.lifl.fr/~noe/files/pp_CIAA07.pdf},
postscript = {http://www.lifl.fr/~noe/files/pp_CIAA07.ps.gz},
hal-url = {http://hal.inria.fr/inria-00170414/en/},
opteprint = {},
abstract = {We study the pattern matching automaton introduced in
[Kucherov-Noe-Roytberg-JBCB-06] for the purpose of seed-based similarity
search. We show that our definition provides a compact automaton, much smaller
than the one obtained by applying the Aho-Corasick construction. We study
properties of this automaton and present an efficient implementation of the
automaton construction. We also present some experimental results and show that
this automaton can be successfully applied to more general situations.},
inria = {Sequoia},
labo = {dans},
x-editorial-board = {yes},
x-international-audience = {yes},
x-proceedings = {yes},
x-pays = {RU},
aeres = {ACT},
selectif = {oui},
optnote = {(submitted: 79, accepted: 23, acceptance rate: 0.29)}
}
@inproceedings{PeterlongoEtAlPBC07,
author = {Peterlongo, Pierre and No{\'e}, Laurent and Lavenier, Dominique and Georges, Gilles and Jacques, Julien and Kucherov, Gregory and Giraud, Mathieu},
title = {Protein similarity search with subset seeds on a dedicated reconfigurable hardware},
booktitle = {Proceedings of the 2nd Workshop on {P}arallel {B}io-{C}omputing (PBC), September 9-12, 2007 Gdansk (Poland)},
pages = {1240--1248},
year = {2008},
month = {September},
editor = {Wyrzykowski, R. and Dongarra, J. and Karczewski, K. and Wasniewski, J.},
volume = {4967},
series = {Lecture Notes in Computer Science},
publisher = {Springer},
doi = {10.1007/978-3-540-68111-3},
pdf = {http://www.lifl.fr/~giraud/publis/peterlongo-pbc-07.pdf},
url = {http://www.springerlink.com/content/2280v0131631528r},
hal-url = {http://hal.inria.fr/inria-00178325/en/},
opteprint = {},
abstract = {With a sharp increase of available DNA and protein sequence
data, new precise and fast similarity search methods are needed for largescale
genome and proteome comparisons. Modern seed-based techniques
of similarity search (spaced seeds, multiple seeds, subset seeds) provide
a better sensitivity/specificity ratio. We present an implementation of
such a seed-based technique on a parallel specialized hardware embedding
reconfigurable architecture (FPGA), where the FPGA is tightly
connected to large capacity Flash memories. This parallel system allows
large databases to be fully indexed and rapidly accessed. Compared to
traditional approaches presented by the Blastp software, we obtain both
a significant speed-up and better results. To the best of our knowledge,
this is the first attempt to exploit efficient seed-based algorithms for
parallelizing the sequence similarity search.},
inria = {Sequoia},
labo = {dans},
x-editorial-board = {yes},
x-international-audience = {yes},
x-proceedings = {yes},
aeres = {ACT},
selectif = {oui}
}
@inproceedings{KucherovNoeRoytbergWABI05,
author = {Kucherov, Gregory and No{\'e}, Laurent and Roytberg, Mikhail A.},
title = {A unifying framework for seed sensitivity and its application to subset seeds},
booktitle = {Proceedings of the 5th International Workshop on Algorithms in Bioinformatics ({WABI}), October 3-6, 2005, Mallorca (Spain)},
pages = {251--263},
year = {2005},
month = {October},
editor = {Casadio, R and Myers, G},
volume = {3692},
series = {Lecture Notes in Computer Science},
publisher = {Springer},
doi = {10.1007/11557067_21},
url = {http://springerlink.metapress.com/content/72384x40t0v2j254/},
hal-url = {http://hal.inria.fr/inria-00001164/en/},
eprint = {cs.OH/0603106},
pdf = {http://www.lifl.fr/~noe/files/pp_WABI05.pdf},
postscript = {http://www.lifl.fr/~noe/files/pp_WABI05.ps.gz},
abstract = {We propose a general approach to compute the seed sensitivity,
that can be applied to different definitions of seeds. It treats separately
three components of the seed sensitivity problem -- a set of target alignments,
an associated probability distribution, and a seed model -- that are specified
by distinct finite automata. The approach is then applied to a new concept of
{\em subset seeds} for which we propose an efficient automaton construction.
Experimental results confirm that sensitive subset seeds can be efficiently
designed using our approach, and can then be used in similarity search producing
better results than ordinary spaced seeds.},
inria = {ADAGE},
labo = {hors},
x-editorial-board = {yes},
x-international-audience = {yes},
x-proceedings = {yes},
x-pays = {RU},
aeres = {ACT},
selectif = {oui},
optnote = {(submitted: 94, accepted: 35, acceptance rate: 0.37)}
}
@inproceedings{KucherovNoeRoytbergCPM04,
author = {Kucherov, Gregory and No{\'e}, Laurent and Roytberg, Mikhail A.},
title = {Multi-seed lossless filtration},
booktitle = {Proceedings of the 15th Annual Combinatorial Pattern Matching Symposium (CPM), July 5-7, 2004, Istanbul (Turkey)},
pages = {297--310},
year = {2004},
month = {July},
editor = {Sahinalp, S.C. and Muthukrishnan, S. and Dogrusoz, U.},
volume = {3109},
series = {Lecture Notes in Computer Science},
publisher = {Springer},
doi = {10.1007/978-3-540-27801-6_22},
url = {http://www.springerlink.com/content/hrw7dvbn8ua9q6ly},
pdf = {http://www.lifl.fr/~noe/files/pp_CPM04.pdf},
postscript = {http://www.lifl.fr/~noe/files/pp_CPM04.ps.gz},
hal-url = {http://hal.inria.fr/inria-00001162/en/},
opteprint = {},
isnb = {3-540-22341-X},
abstract = {We study a method of seed-based lossless filtration for
approximate string matching and related bioinformatics applications. The method
is based on a simultaneous use of several spaced seeds rather than a single
seed as studied by Burkhardt and Karkkainen. We present algorithms to compute
several important parameters of seed families, study their combinatorial
properties, and describe several techniques to construct efficient families. We
also report a large-scale application of the proposed technique to the problem
of oligonucleotide selection for an {EST} sequence database.},
inria = {ADAGE},
labo = {hors},
x-editorial-board = {yes},
x-international-audience = {yes},
x-proceedings = {yes},
aeres = {ACT},
selectif = {oui},
optnote = {(submitted: 79, accepted: 36, acceptance rate: 0.46)}
}
@inproceedings{KucherovNoePontyBIBE04,
author = {Kucherov, Gregory and No{\'e}, Laurent and Ponty, Yann},
title = {Estimating seed sensitivity on homogeneous alignments},
booktitle = {Proceedings of the IEEE 4th Symposium on Bioinformatics and Bioengineering (BIBE), May 19-21, 2004, Taichung (Taiwan)},
pages = {387--394},
year = {2004},
month = {April},
opteditor = {},
optvolume = {},
optnumber = {},
optseries = {the IEEE 4th Symposium on Bioinformatics and Bioengineering - BIBE'2004},
optaddress = {},
optorganization = {},
publisher = {IEEE Computer Society Press},
doi = {10.1109/BIBE.2004.1317369},
url = {http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber=1317369},
url = {http://doi.ieeecomputersociety.org/10.1109/BIBE.2004.1317369},
pdf = {http://www.lifl.fr/~noe/files/pp_BIBE04.pdf},
postscript = {http://www.lifl.fr/~noe/files/pp_BIBE04.ps.gz},
hal-url = {http://hal.inria.fr/inria-00001163/en/},
eprint = {cs.OH/0603106},
abstract = {We address the problem of estimating the sensitivity of
seed-based similarity search algorithms. In contrast to approaches based on
Markov models [Faster and more sensitive homology search, Designing seeds for
similarity search in genomic DNA, Optimal spaced seeds for Hidden Markov Models,
with application to homologous coding regions, Vector seeds: an extension to
spaced seeds allows substantial improvements in sensitivity and specificity,
Sensitivity analysis and efficient method for identifying optimal spaced seeds],
we study the estimation based on homogeneous alignments. We describe an
algorithm for counting and random generation of those alignments and an
algorithm for exact computation of the sensitivity for a broad class of seed
strategies. We provide experimental results demonstrating a bias introduced by
ignoring the homogeneousness condition.},
inria = {ADAGE},
labo = {hors},
x-editorial-board = {yes},
x-international-audience = {yes},
x-proceedings = {yes},
aeres = {ACT},
selectif = {oui},
optnote = {(submitted: 145, accepted: 71, acceptance rate: 0.49)}
}
@inproceedings{NoeKucherovJOBIM04,
author = {No{\'e}, Laurent and Kucherov, Gregory},
title = {Improved hit criteria for {DNA} local alignment},
booktitle = {Proceedings of the 5th Open Days in Biology, Computer Science and Mathematics (JOBIM), June 28-30, 2004, Montr\'eal (Canada)},
year = {2004},
month = {June},
pdf = {http://www.lifl.fr/~noe/files/pp_JOBIM04.pdf},
postscript = {http://www.lifl.fr/~noe/files/pp_JOBIM04.ps.gz},
hal-url = {http://hal.inria.fr/inria-00099999/en/},
abstract = {The hit criterion is a key component of heuristic local
alignment algorithms. It specifies a class of patterns assumed to witness a
potential similarity, and this choice is decisive for the selectivity and
sensitivity of the whole method. In this paper, we propose two ways to improve
the hit criterion. First, we define the group criterion combining the advantages
of the single-seed and double-seed approaches used in existing algorithms.
Second, we introduce transition-constrained seeds that extend spaced seeds by
the possibility of distinguishing transition and transversion mismatches. We
provide analytical data as well as experimental results, obtained with our YASS
software, supporting both improvements.}
}
This file was generated by bibtex2html 1.96. |