RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
SubstructMatch.h
Go to the documentation of this file.
1//
2// Copyright (C) 2001-2025 Greg Landrum and other RDKit contributors
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#include <RDGeneral/export.h>
11#ifndef RD_SUBSTRUCTMATCH_H
12#define RD_SUBSTRUCTMATCH_H
13
14// std bits
15#include <vector>
16
17#include <unordered_set>
18#include <functional>
19#include <unordered_map>
20#include <cstdint>
21#include <string>
22#include <span>
23
24#include <boost/dynamic_bitset.hpp>
25#if BOOST_VERSION >= 107100
26#define RDK_INTERNAL_BITSET_HAS_HASH
27#endif
28
30
31namespace RDKit {
32class ROMol;
33class Atom;
34class Bond;
36class MolBundle;
37
38//! \brief used to return matches from substructure searching,
39//! The format is (queryAtomIdx, molAtomIdx)
40typedef std::vector<std::pair<int, int>> MatchVectType;
41
43 bool useChirality = false; //!< Use chirality in determining whether or not
44 //!< atoms/bonds match
45 bool useEnhancedStereo = false; //!< Use enhanced stereochemistry in
46 //!< determining whether atoms/bonds match
47 bool aromaticMatchesConjugated = false; //!< Aromatic and conjugated bonds
48 //!< match each other
49 bool useQueryQueryMatches = false; //!< Consider query-query matches, not
50 //!< just simple matches
51 bool useGenericMatchers = false; //!< Looks for generic atoms in the query
52 //!< and uses them as part of the matching
53 bool recursionPossible = true; //!< Allow recursive queries
54 bool uniquify = true; //!< uniquify (by atom index) match results
55 unsigned int maxMatches = 1000; //!< maximum number of matches to return
56 int numThreads = 1; //!< number of threads to use when multi-threading
57 //!< is possible. 0 selects the number of
58 //!< concurrent threads supported by the hardware
59 //!< negative values are added to the number of
60 //!< concurrent threads supported by the hardware
61 std::vector<std::string> atomProperties; //!< atom properties that must be
62 //!< equivalent in order to match
63 std::vector<std::string> bondProperties; //!< bond properties that must be
64 //!< equivalent in order to match
65 std::function<bool(const ROMol &mol,
66 std::span<const unsigned int> match)>
67 extraFinalCheck; //!< a function to be called at the end to validate a
68 //!< match
69 unsigned int maxRecursiveMatches =
70 1000; //!< maximum number of matches that the recursive substructure
71 //!< matching should return
73 false; //!< If set, query atoms and bonds with specified stereochemistry
74 //!< will match atoms and bonds with unspecified stereochemistry
75 bool aromaticMatchesSingleOrDouble = false; //!< Aromatic bonds match single
76 //!< or double bonds
77 std::function<bool(const Atom &queryAtom, const Atom &molAtom)>
78 extraAtomCheck; //!< a function to be called after other atom comparisons
79 //!< have passed
81 false; //!< if set, only the extraAtomCheck will be used to determine
82 //!< whether or not atoms match
83 std::function<bool(const Bond &queryBond, const Bond &molBond)>
84 extraBondCheck; //!< a function to be called after other bond comparisons
85 //!< have passed
87 false; //!< if set, only the extraBondCheck will be used to determine
88 //!< whether or not bonds match
90};
91
93 SubstructMatchParameters &params, const std::string &json);
95 const SubstructMatchParameters &params);
96
97//! Find a substructure match for a query in a molecule
98/*!
99 \param mol The ROMol to be searched
100 \param query The query ROMol
101 \param matchParams Parameters controlling the matching
102
103 \return The matches, if any
104
105*/
107 const ROMol &mol, const ROMol &query,
109
110//! Count substructure matches for a query in a molecule without materializing
111//! the full match vectors.
112/*!
113
114
115
116 \param mol The ROMol to be searched
117 \param query The query ROMol
118 \param matchParams Parameters controlling the matching
119
120 \return The number of matches found (capped by params.maxMatches)
121
122*/
124 const ROMol &mol, const ROMol &query,
126
127//! Find all substructure matches for a query in a ResonanceMolSupplier object
128/*!
129 \param resMolSuppl The ResonanceMolSupplier object to be searched
130 \param query The query ROMol
131 \param matchParams Parameters controlling the matching
132
133 \return The matches, if any
134
135*/
137 ResonanceMolSupplier &resMolSuppl, const ROMol &query,
139
141 const MolBundle &bundle, const ROMol &query,
144 const ROMol &mol, const MolBundle &query,
147 const MolBundle &bundle, const MolBundle &query,
149
150//! Find a substructure match for a query
151/*!
152 \param mol The object to be searched
153 \param query The query
154 \param matchVect Used to return the match
155 (pre-existing contents will be deleted)
156 \param recursionPossible flags whether or not recursive matches are allowed
157 \param useChirality use atomic CIP codes as part of the comparison
158 \param useQueryQueryMatches if set, the contents of atom and bond queries
159 will be used as part of the matching
160
161 \return whether or not a match was found
162
163*/
164template <typename T1, typename T2>
165bool SubstructMatch(T1 &mol, const T2 &query, MatchVectType &matchVect,
166 bool recursionPossible = true, bool useChirality = false,
167 bool useQueryQueryMatches = false) {
169 params.recursionPossible = recursionPossible;
170 params.useChirality = useChirality;
171 params.useQueryQueryMatches = useQueryQueryMatches;
172 params.maxMatches = 1;
173 std::vector<MatchVectType> matchVects = SubstructMatch(mol, query, params);
174 if (matchVects.size()) {
175 matchVect = matchVects.front();
176 } else {
177 matchVect.clear();
178 }
179 return matchVect.size() != 0;
180};
181
182//! Find all substructure matches for a query
183/*!
184 \param mol The object to be searched
185 \param query The query
186 \param matchVect Used to return the matches
187 (pre-existing contents will be deleted)
188 \param uniquify Toggles uniquification (by atom index) of the results
189 \param recursionPossible flags whether or not recursive matches are allowed
190 \param useChirality use atomic CIP codes as part of the comparison
191 \param useQueryQueryMatches if set, the contents of atom and bond queries
192 will be used as part of the matching
193 \param maxMatches The maximum number of matches that will be returned.
194 In high-symmetry cases with medium-sized molecules, it is
195 very
196 easy to end up with a combinatorial explosion in the
197 number of
198 possible matches. This argument prevents that from having
199 unintended consequences
200
201 \return the number of matches found
202
203*/
204template <typename T1, typename T2>
205unsigned int SubstructMatch(T1 &mol, const T2 &query,
206 std::vector<MatchVectType> &matchVect,
207 bool uniquify = true, bool recursionPossible = true,
208 bool useChirality = false,
209 bool useQueryQueryMatches = false,
210 unsigned int maxMatches = 1000,
211 int numThreads = 1) {
213 params.uniquify = uniquify;
214 params.recursionPossible = recursionPossible;
215 params.useChirality = useChirality;
216 params.useQueryQueryMatches = useQueryQueryMatches;
217 params.maxMatches = maxMatches;
218 params.numThreads = numThreads;
219 matchVect = SubstructMatch(mol, query, params);
220 return static_cast<unsigned int>(matchVect.size());
221};
222
223// ----------------------------------------------
224//
225// find one match in ResonanceMolSupplier object
226//
227template <>
228inline bool SubstructMatch(ResonanceMolSupplier &resMolSupplier,
229 const ROMol &query, MatchVectType &matchVect,
230 bool recursionPossible, bool useChirality,
231 bool useQueryQueryMatches) {
233 params.recursionPossible = recursionPossible;
234 params.useChirality = useChirality;
235 params.useQueryQueryMatches = useQueryQueryMatches;
236 params.maxMatches = 1;
237 std::vector<MatchVectType> matchVects =
238 SubstructMatch(resMolSupplier, query, params);
239 if (matchVects.size()) {
240 matchVect = matchVects.front();
241 } else {
242 matchVect.clear();
243 }
244 return matchVect.size() != 0;
245}
246
247template <>
248inline unsigned int SubstructMatch(ResonanceMolSupplier &resMolSupplier,
249 const ROMol &query,
250 std::vector<MatchVectType> &matchVect,
251 bool uniquify, bool recursionPossible,
252 bool useChirality, bool useQueryQueryMatches,
253 unsigned int maxMatches, int numThreads) {
255 params.uniquify = uniquify;
256 params.recursionPossible = recursionPossible;
257 params.useChirality = useChirality;
258 params.useQueryQueryMatches = useQueryQueryMatches;
259 params.maxMatches = maxMatches;
260 params.numThreads = numThreads;
261 matchVect = SubstructMatch(resMolSupplier, query, params);
262 return static_cast<unsigned int>(matchVect.size());
263};
264
265//! Class used as a final step to confirm whether or not a given atom->atom
266//! mapping is a valid substructure match.
268 public:
269 MolMatchFinalCheckFunctor(const ROMol &query, const ROMol &mol,
270 const SubstructMatchParameters &ps);
271
272 bool operator()(const std::uint32_t q_c[], const std::uint32_t m_c[]);
273
274 private:
275 const ROMol &d_query;
276 const ROMol &d_mol;
277 const SubstructMatchParameters &d_params;
278 std::unordered_map<unsigned int, StereoGroup const *> d_molStereoGroups;
279#ifdef RDK_INTERNAL_BITSET_HAS_HASH
280 // Boost 1.71 added support for std::hash with dynamic_bitset.
281 using HashedStorageType = boost::dynamic_bitset<>;
282#else
283 // otherwise we use a less elegant solution
284 using HashedStorageType = std::string;
285#endif
286 std::unordered_set<HashedStorageType> matchesSeen;
287};
288
290 int d_refConfId = -1;
292 double d_tol2 = 1e-8; //< squared distance tolerance
293 AtomCoordsMatchFunctor(int refConfId = -1, int queryConfId = -1,
294 double tol = 1e-4)
295 : d_refConfId(refConfId),
296 d_queryConfId(queryConfId),
297 d_tol2(tol * tol) {};
298
299 bool operator()(const Atom &queryAtom, const Atom &targetAtom) const;
300};
301
302} // namespace RDKit
303
304#endif
Defines the class StereoGroup which stores relationships between the absolute configurations of atoms...
The class for representing atoms.
Definition Atom.h:74
class for representing a bond
Definition Bond.h:46
MolBundle contains a collection of related ROMols.
Definition MolBundle.h:59
MolMatchFinalCheckFunctor(const ROMol &query, const ROMol &mol, const SubstructMatchParameters &ps)
bool operator()(const std::uint32_t q_c[], const std::uint32_t m_c[])
#define RDKIT_SUBSTRUCTMATCH_EXPORT
Definition export.h:577
Std stuff.
RDKIT_SUBSTRUCTMATCH_EXPORT unsigned int SubstructMatchCount(const ROMol &mol, const ROMol &query, const SubstructMatchParameters &params=SubstructMatchParameters())
std::vector< std::pair< int, int > > MatchVectType
used to return matches from substructure searching, The format is (queryAtomIdx, molAtomIdx)
RDKIT_SUBSTRUCTMATCH_EXPORT std::vector< MatchVectType > SubstructMatch(const ROMol &mol, const ROMol &query, const SubstructMatchParameters &params=SubstructMatchParameters())
Find a substructure match for a query in a molecule.
RDKIT_SUBSTRUCTMATCH_EXPORT void updateSubstructMatchParamsFromJSON(SubstructMatchParameters &params, const std::string &json)
RDKIT_SUBSTRUCTMATCH_EXPORT std::string substructMatchParamsToJSON(const SubstructMatchParameters &params)
bool operator()(const Atom &queryAtom, const Atom &targetAtom) const
AtomCoordsMatchFunctor(int refConfId=-1, int queryConfId=-1, double tol=1e-4)
std::function< bool(const ROMol &mol, std::span< const unsigned int > match)> extraFinalCheck
unsigned int maxMatches
maximum number of matches to return
bool uniquify
uniquify (by atom index) match results
std::vector< std::string > atomProperties
std::vector< std::string > bondProperties
bool recursionPossible
Allow recursive queries.
unsigned int maxRecursiveMatches
matching should return
std::function< bool(const Atom &queryAtom, const Atom &molAtom)> extraAtomCheck
std::function< bool(const Bond &queryBond, const Bond &molBond)> extraBondCheck