publications.bib

@article{KopylovaNoeTouzetBIOINFORMATICS12,
  author = {Kopylova, Evguenia and No{\'e}, Laurent and Touzet, H{\'e}l{\`e}ne},
  title = {{S}ort{M}e{RNA}: Fast and accurate filtering of ribosomal {RNA}s in metatranscriptomic data},
  journal = {Bioinformatics},
  year = {2012},
  volume = {28},
  number = {24},
  pages = {3211--3217},
  pubmed-url = {http://www.ncbi.nlm.nih.gov/pubmed/23071270},
  optpmc-url = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC?????},
  url = {http://bioinformatics.oxfordjournals.org/content/28/24/3211},
  pdf = {http://bioinformatics.oxfordjournals.org/content/28/24/3211.full.pdf},
  doi = {10.1093/bioinformatics/bts611}
}
@article{StartekEtAl12,
  author = {Startek, Micha{\l} and Lasota, S{\l}awomir and Sykulski, Macieij and Bu{\l}ak, Adam and No{\'e}, Laurent and Kucherov, Gregory and Gambin, Anna},
  title = {Efficient alternatives to {PSI-BLAST}},
  journal = {Bulletin of the Polish Academy of Sciences: Technical Sciences},
  year = {2012},
  abstract = {In this paper we present two algorithms that may serve as efficient alternatives to the well-known PSI BLAST tool: SeedBLAST and CTX-PSI Blast. Both may benefit from the knowledge about amino acid composition specific to a given protein family: SeedBLAST uses a advisedly designed seed, while CTX-PSI BLAST extends PSI BLAST with the context-specific substitution model.

The seeding technique became central in the theory of sequence alignment. There are several efficient tools applying seeds to DNA homology search, but not to protein homology search. In this paper we fill this gap. We advocate the use of multiple subset seeds derived from a hierarchical tree of amino acid residues. Our method computes, by an evolutionary algorithm, seeds that are specifically designed for a given protein family. The seeds are represented by deterministic finite automata (DFAs) and built into the NCBI-BLAST software. This extended tool, named SeedBLAST, is compared to the original BLAST and PSI-BLAST on several protein families. Our results demonstrate a superiority of SeedBLAST in terms of efficiency, especially in the case of twilight zone hits.

The contextual substitution model has been proven to increase sensitivity of protein alignment. In this paper we perform a next step in the contextual alignment program. We announce a contextual version of the PSI-BLAST algorithm, an iterative version of the NCBI-BLAST tool. The experimental evaluation has been performed demonstrating a significantly higher sensitivity compared to the ordinary PSI-BLAST algorithm.},
  volume = {60},
  number = {3},
  pages = {495--505},
  month = {December},
  url = {http://www.degruyter.com/view/j/bpasts.2012.60.issue-3/v10175-012-0063-0/v10175-012-0063-0.xml},
  pdf = {http://www.degruyter.com/dg/viewarticle.fullcontentlink:pdfeventlink/$002fj$002fbpasts.2012.60.issue-3$002fv10175-012-0063-0$002fv10175-012-0063-0.xml?t:ac=j$002fbpasts.2012.60.issue-3$002fv10175-012-0063-0$002fv10175-012-0063-0.xml#$.pdf},
  doi = {10.2478/v10175-012-0063-0}
}
@article{NoeGirdeaKucherovABI10,
  author = {No{\'e}, Laurent and G{\^i}rdea, Marta and Kucherov, Gregory},
  title = {Designing efficient spaced seeds for {SOLiD} read mapping},
  journal = {Advances in Bioinformatics},
  year = {2010},
  month = {July},
  volume = {2010},
  pages = {ID 708501},
  doi = {10.1155/2010/708501},
  url = {http://www.hindawi.com/journals/abi/2010/708501/},
  pdf = {http://downloads.hindawi.com/journals/abi/2010/708501.pdf},
  hal-url = {http://hal.inria.fr/inria-00527029/en/},
  pubmed-url = {http://www.ncbi.nlm.nih.gov/pubmed/20936175},
  pmc-url = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2945724},
  opteprint = {},
  abstract = {The advent of high-throughput sequencing technologies 
constituted a major advance in genomic studies, offering new prospects in a 
wide range of applications.We propose a rigorous and flexible algorithmic 
solution to mapping SOLiD color-space reads to a reference genome. The solution
relies on an advanced method of seed design that uses a faithful probabilistic
model of read matches and, on the other hand, a novel seeding principle
especially adapted to read mapping. Our method can handle both lossy and
lossless frameworks and is able to distinguish, at the level of seed design,
between SNPs and reading errors. We illustrate our approach by several seed
designs and demonstrate their efficiency.},
  inria = {Sequoia},
  labo = {dans},
  x-editorial-board = {yes},
  x-international-audience = {yes},
  x-pays = {RU},
  aeres = {ACL},
  selectif = {oui}
}
@article{GirdeaNoeKucherovAMB10,
  author = {G{\^i}rdea, Marta and Kucherov, Gregory and No{\'e}, Laurent},
  title = {Back-translation for discovering distant protein homologies in the presence of frameshift mutations},
  journal = {Algorithms for Molecular Biology},
  year = {2010},
  month = {January},
  volume = {5},
  number = {6},
  doi = {10.1186/1748-7188-5-6},
  url = {http://www.almob.org/content/5/1/6},
  pdf = {http://www.almob.org/content/pdf/1748-7188-5-6.pdf},
  hal-url = {http://hal.inria.fr/inria-00456458/en/},
  pubmed-url = {http://www.ncbi.nlm.nih.gov/pubmed/20047662},
  pmc-url = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2821327},
  opteprint = {},
  abstract = {Frameshift mutations in protein-coding DNA sequences produce
a drastic change in the resulting protein sequence, which prevents classic
protein alignment methods from revealing the proteins' common origin. Moreover,
when a large number of substitutions are additionally involved in the
divergence, the homology detection becomes difficult even at the DNA level. We
developed a novel method to infer distant homology relations of two proteins,
that accounts for frameshift and point mutations that may have affected the
coding sequences. We design a dynamic programming alignment algorithm over
memory-efficient graph representations of the complete set of putative DNA
sequences of each protein, with the goal of determining the two putative DNA
sequences which have the best scoring alignment under a powerful scoring system
designed to reflect the most probable evolutionary process. Our implementation
is freely available at [http://bioinfo.lifl.fr/path/]. Our approach allows to
uncover evolutionary information that is not captured by traditional alignment
methods, which is confirmed by biologically significant examples.},
  inria = {Sequoia},
  labo = {dans},
  x-editorial-board = {yes},
  x-international-audience = {yes},
  x-pays = {RU},
  aeres = {ACL},
  selectif = {oui}
}
@article{RoytbergEtAlTCBB09,
  author = {Roytberg, Mikhail A. and Gambin, Anna and No{\'e}, Laurent and
               Lasota, S{\l}awomir and Furletova, Eugenia and Szczurek, Ewa and
               Kucherov, Gregory},
  title = {On subset seeds for protein alignment},
  journal = {IEEE/ACM Transactions on Computational Biology and Bioinformatics (TCBB)},
  year = {2009},
  month = {July},
  pages = {483--494},
  volume = {6},
  number = {3},
  doi = {10.1109/TCBB.2009.4},
  url = {http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber=4752807},
  url = {http://doi.ieeecomputersociety.org/10.1109/tcbb.2009.4},
  pdf = {http://www.lifl.fr/~noe/files/pp_TCBB09_preprint.pdf},
  postscript = {http://www.lifl.fr/~noe/files/pp_TCBB09_preprint.ps.gz},
  hal-url = {http://hal.inria.fr/inria-00354773/en/},
  pubmed-url = {http://www.ncbi.nlm.nih.gov/pubmed/19644175},
  eprint = {0901.3198},
  abstract = {We apply the concept of subset seeds proposed in [1] to
similarity search in protein sequences. The main question studied is the design
of efficient seed alphabets to construct seeds with optimal
sensitivity/selectivity trade-offs. We propose several different design methods
and use them to construct several alphabets. We then perform a comparative
analysis of seeds built over those alphabets and compare them with the standard
BLASTP seeding method [2], [3], as well as with the family of vector seeds
proposed in [4]. While the formalism of subset seeds is less expressive (but
less costly to implement) than the cumulative principle used in BLASTP and
vector seeds, our seeds show a similar or even better performance than BLASTP
on Bernoulli models of proteins compatible with the common BLOSUM62 matrix.
Finally, we perform a large-scale benchmarking of our seeds against several
main databases of protein alignments. Here again, the results show a comparable
or better performance of our seeds vs. BLASTP.},
  inria = {Sequoia},
  labo = {dans},
  x-editorial-board = {yes},
  x-international-audience = {yes},
  x-pays = {RU,PL},
  aeres = {ACL},
  selectif = {oui}
}
@article{PeterlongoEtAlBMCBioinformatics08,
  author = {Peterlongo, Pierre and No{\'e}, Laurent and Lavenier, Dominique and Nguyen, Van-Hoa and Kucherov, Gregory and Giraud, Mathieu},
  title = {Optimal neighborhood indexing for protein similarity search},
  year = {2008},
  journal = {BMC Bioinformatics},
  month = {December},
  volume = {9},
  pages = {534},
  doi = {10.1186/1471-2105-9-534},
  url = {http://www.biomedcentral.com/1471-2105/9/534},
  pdf = {http://www.biomedcentral.com/content/pdf/1471-2105-9-534.pdf},
  hal-url = {http://hal.inria.fr/inria-00340510/en/},
  pubmed-url = {http://www.ncbi.nlm.nih.gov/pubmed/19087280},
  pmc-url = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2640386},
  opteprint = {},
  abstract = {Similarity inference, one of the main bioinformatics tasks, has
to face an exponential growth of the biological data. A classical approach used
to cope with this data flow involves heuristics with large seed indexes. In
order to speed up this technique, the index can be enhanced by storing
additional information to limit the number of random memory accesses. However,
this improvement leads to a larger index that may become a bottleneck. In the
case of protein similarity search, we propose to decrease the index size by
reducing the amino acid alphabet.},
  inria = {Sequoia},
  labo = {dans},
  x-editorial-board = {yes},
  x-international-audience = {yes},
  aeres = {ACL},
  selectif = {oui}
}
@article{CsurosNoeKucherovTRENDSINGENETICS07,
  author = {Cs{\H{u}}r{\"o}s, Mikl{\'o}s and No{\'e}, Laurent and Kucherov, Gregory},
  title = {Reconsidering the significance of genomic word frequencies},
  journal = {Trends in Genetics},
  year = {2007},
  month = {November},
  volume = {23},
  number = {11},
  pages = {543--546},
  doi = {10.1016/j.tig.2007.07.008},
  url = {http://linkinghub.elsevier.com/retrieve/pii/S0168952507002983},
  hal-url = {http://hal.inria.fr/inria-00448737/en/},
  pubmed-url = {http://www.ncbi.nlm.nih.gov/pubmed/17964682},
  eprint = {q-bio/0609022},
  pdf = {http://www.iro.umontreal.ca/~csuros/papers/spectrum-DPL-full.pdf},
  abstract = {By conventional wisdom, a feature that occurs too often or too
rarely in a genome can indicate a functional element. To infer functionality
from frequency, it is crucial to precisely characterize occurrences in neutrally
evolving DNA. We find that the frequency of oligonucleotides in a genomic
sequence follows primarily a Pareto-lognormal distribution, which encapsulates
lognormal and power-law features found across all known genomes. Such a
distribution may be the result of completely random evolution by a copying
process. Our characterization of the entire frequency distribution of genomic
words opens a way to a more accurate reasoning about their over- and 
under-representation in genomic sequences.},
  inria = {Sequoia},
  labo = {dans},
  x-editorial-board = {yes},
  x-international-audience = {yes},
  x-pays = {CA},
  aeres = {ACL},
  selectif = {oui},
  impactfactor = {9.95,Journal Citation Report}
}
@article{KucherovNoeRoytbergJBCB06,
  author = {Kucherov, Gregory and No{\'e}, Laurent and Roytberg, Mikhail A.},
  title = {A unifying framework for seed sensitivity and its application to subset seeds},
  journal = {Journal of Bioinformatics and Computational Biology},
  year = {2006},
  month = {November},
  volume = {4},
  number = {2},
  pages = {553--569},
  doi = {10.1142/S0219720006001977},
  url = {http://www.worldscinet.com/jbcb/04/0402/S0219720006001977.html},
  hal-url = {http://hal.archives-ouvertes.fr/hal-00018114},
  pubmed-url = {http://www.ncbi.nlm.nih.gov/pubmed/16819802},
  pmc-url = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2824148},
  eprint = {cs.DS/0601116},
  postscript = {http://www.lifl.fr/~noe/files/pp_JBCB06_preprint.ps.gz},
  pdf = {http://www.lifl.fr/~noe/files/pp_JBCB06_preprint.pdf},
  abstract = {We propose a general approach to compute the seed sensitivity,
that can be applied to different definitions of seeds. It treats separately
three components of the seed sensitivity problem --  a set of target alignments,
an associated probability distribution, and a seed model -- that are specified
by distinct finite automata. The approach is then applied to a new concept of
{\em subset seeds} for which we propose an efficient automaton construction.
Experimental results confirm that sensitive subset seeds can be efficiently
designed using our approach, and can then be used in similarity search producing
better results than ordinary spaced seeds.},
  inria = {Sequoia},
  labo = {dans},
  x-editorial-board = {yes},
  x-international-audience = {yes},
  x-pays = {RU},
  aeres = {ACL},
  selectif = {oui}
}
@article{NoeKucherovNAR05,
  author = {No{\'e}, Laurent and Kucherov, Gregory},
  title = {{YASS}: enhancing the sensitivity of {DNA} similarity search},
  journal = {Nucleic Acids Research},
  year = {2005},
  month = {April},
  volume = {33 (web-server issue)},
  number = {suppl\_2},
  pages = {W540--W543},
  doi = {10.1093/nar/gki478},
  url = {http://nar.oxfordjournals.org/cgi/content/abstract/33/suppl_2/W540},
  pdf = {http://nar.oxfordjournals.org/cgi/reprint/33/suppl_2/W540.pdf},
  hal-url = {http://hal.inria.fr/inria-00448742/en/},
  pubmed-url = {http://www.ncbi.nlm.nih.gov/pubmed/15980530},
  pmc-url = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC1160238},
  opteprint = {},
  abstract = {YASS is a DNA local alignment tool based on an efficient and
sensitive filtering algorithm. It applies transition-constrained seeds to
specify the most probable conserved motifs between homologous sequences,
combined with a flexible hit criterion used to identify groups of seeds that are
likely to exhibit significant alignments. A web interface
(http://www.loria.fr/projects/YASS/) is available to upload input sequences in
fasta format, query the program and visualize the results obtained in several
forms (dot-plot, tabular output and others). A standalone version is available
for download from the web page.},
  inria = {ADAGE},
  labo = {hors},
  x-editorial-board = {yes},
  x-international-audience = {yes},
  aeres = {ACL},
  selectif = {oui}
}
@article{KucherovNoeRoytbergTCBB05,
  author = {Kucherov, Gregory and No{\'e}, Laurent and Roytberg, Mikhail A.},
  title = {Multiseed lossless filtration},
  journal = {IEEE/ACM Transactions on Computational Biology and Bioinformatics (TCBB)},
  pages = {51--61},
  year = {2005},
  month = {January},
  key = {ISSN:1545-5963},
  volume = {2},
  number = {1},
  doi = {10.1109/tcbb.2005.12},
  url = {http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber=1416851},
  url = {http://doi.ieeecomputersociety.org/10.1109/tcbb.2005.12},
  hal-url = {http://hal.inria.fr/inria-00354810/en/},
  pubmed-url = {http://www.ncbi.nlm.nih.gov/pubmed/17044164},
  eprint = {0901.3215},
  pdf = {http://www.lifl.fr/~noe/files/pp_TCBB05_preprint.pdf},
  postscript = {http://www.lifl.fr/~noe/files/pp_TCBB05_preprint.ps.gz},
  abstract = {We study a method of seed-based lossless filtration for
approximate string matching and related bioinformatics applications. The method
is based on a simultaneous use of several spaced seeds rather than a single seed
as studied by Burkhardt and Karkkainen. We present algorithms to compute several
important parameters of seed families, study their combinatorial properties, and
describe several techniques to construct efficient families. We also report a
large-scale application of the proposed technique to the problem of
oligonucleotide selection for an {EST} sequence database.},
  inria = {ADAGE},
  labo = {hors},
  x-editorial-board = {yes},
  x-international-audience = {yes},
  aeres = {ACL},
  selectif = {oui}
}
@article{NoeKucherovBMCBioinformatics04,
  author = {No{\'e}, Laurent and Kucherov, Gregory},
  title = {Improved hit criteria for {DNA} local alignment},
  journal = {{BMC} {B}ioinformatics},
  year = {2004},
  volume = {5},
  pages = {149},
  month = {October},
  doi = {10.1186/1471-2105-5-149},
  url = {http://www.biomedcentral.com/1471-2105/5/149},
  pdf = {http://www.biomedcentral.com/content/pdf/1471-2105-5-149.pdf},
  hal-url = {http://hal.inria.fr/inria-00448743/en/},
  pubmed-url = {http://www.ncbi.nlm.nih.gov/pubmed/15485572},
  pmc-url = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC526756},
  opteprint = {},
  abstract = {Background: the hit criterion is a key component of heuristic
local alignment algorithms. It specifies a class of patterns assumed to witness
a potential similarity, and this choice is decisive for the selectivity and
sensitivity of the whole method. 

Results : in this paper, we propose two ways to improve the hit criterion.
First, we define the group criterion combining the advantages of the
single-seed and double-seed approaches used in existing algorithms. Second, we
introduce transition-constrained seeds that extend spaced seeds by the
possibility of distinguishing transition and transversion mismatches. We provide
analytical data as well as experimental results, obtained with the YASS
software, supporting both improvements.

Conclusions : proposed algorithmic ideas allow to obtain a significant gain in
sensitivity of similarity search without increase in execution time. The method
has been implemented in YASS software available at
http://www.loria.fr/projects/YASS/},
  inria = {ADAGE},
  labo = {hors},
  x-editorial-board = {yes},
  x-international-audience = {yes},
  aeres = {ACL},
  selectif = {oui}
}

This file was generated by bibtex2html 1.96.