Coverage for geometric_kernels / spaces / hamming_graph.py: 87%
94 statements
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-15 13:41 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-15 13:41 +0000
1"""
2This module provides the :class:`HammingGraph` space and the respective
3:class:`~.eigenfunctions.Eigenfunctions` subclass :class:`VilenkinFunctions`.
4"""
6from math import comb
8import lab as B
9import numpy as np
10from beartype.typing import List, Optional
12from geometric_kernels.lab_extras import dtype_integer, float_like
13from geometric_kernels.spaces.base import DiscreteSpectrumSpace
14from geometric_kernels.spaces.eigenfunctions import (
15 Eigenfunctions,
16 EigenfunctionsWithAdditionTheorem,
17)
18from geometric_kernels.utils.special_functions import generalized_kravchuk_normalized
19from geometric_kernels.utils.utils import chain, hamming_distance, log_binomial
22class VilenkinFunctions(EigenfunctionsWithAdditionTheorem):
23 r"""
24 Eigenfunctions of the graph Laplacian on the q-ary Hamming graph $H(d,q)$, whose
25 nodes are indexed by categorical vectors in $\{0, 1, ..., q-1\}^d$.
27 These eigenfunctions are the Vilenkin functions (also called Vilenkin-Chrestenson
28 functions), which generalize the binary Walsh functions to q-ary alphabets. They
29 map vertices to complex values via products of characters on cyclic groups.
31 For the special case $q = 2$, the Vilenkin functions reduce to the Walsh functions
32 on the binary hypercube $\{0, 1\}^d$.
34 .. note::
35 The Vilenkin functions can be indexed by "character patterns" - choices of
36 coordinates and non-identity characters at those coordinates. Each eigenspace
37 (level) $j$ has dimension $\binom{d}{j}(q-1)^j$, corresponding to choosing
38 $j$ coordinates and assigning $(q-1)$ possible non-identity characters to each.
40 Levels are the whole eigenspaces, comprising all Vilenkin functions with the
41 same number of coordinates having non-identity characters. The addition theorem
42 for these is based on generalized Kravchuk polynomials, i.e. discrete orthogonal
43 polynomials on the q-ary Hamming scheme.
45 :param dim:
46 Dimension $d$ of the q-ary Hamming graph $H(d,q)$.
48 :param n_cat:
49 Number of categories $q \geq 2$ in the q-ary alphabet $\{0, 1, ..., q-1\}$.
51 :param num_levels:
52 Specifies the number of levels (eigenspaces) of the Vilenkin functions to use.
53 """
55 def __init__(self, dim: int, n_cat: int, num_levels: int) -> None:
56 if num_levels > dim + 1:
57 raise ValueError("The number of levels should be at most `dim`+1.")
58 self.dim = dim
59 self.n_cat = n_cat
60 self._num_levels = num_levels
61 self._num_eigenfunctions: Optional[int] = None # To be computed when needed.
63 if n_cat < 2:
64 raise ValueError("n_cat must be at least 2.")
66 def __call__(self, X: B.Int, **kwargs) -> B.Float:
67 raise NotImplementedError
69 def _addition_theorem(
70 self, X: B.Numeric, X2: Optional[B.Numeric] = None, **kwargs
71 ) -> B.Numeric:
73 if X2 is None:
74 X2 = X
76 hamming_distances = hamming_distance(X, X2)
78 values = []
80 kravchuk_normalized_j_minus_1, kravchuk_normalized_j_minus_2 = None, None
81 for level in range(self.num_levels):
82 cur_kravchuk_normalized = generalized_kravchuk_normalized(
83 self.dim,
84 level,
85 hamming_distances,
86 self.n_cat,
87 kravchuk_normalized_j_minus_1,
88 kravchuk_normalized_j_minus_2,
89 ) # [N, N2]
90 kravchuk_normalized_j_minus_2 = kravchuk_normalized_j_minus_1
91 kravchuk_normalized_j_minus_1 = cur_kravchuk_normalized
93 values.append(
94 comb(self.dim, level)
95 * (self.n_cat - 1) ** level
96 * cur_kravchuk_normalized[..., None]
97 ) # [N, N2, 1]
99 return B.concat(*values, axis=-1) # [N, N2, L]
101 def _addition_theorem_diag(self, X: B.Numeric, **kwargs) -> B.Numeric:
102 """
103 These are certain easy to compute constants.
104 """
105 values = [
106 comb(self.dim, level)
107 * (self.n_cat - 1) ** level
108 * B.ones(float_like(X), *X.shape[:-1], 1) # [N, 1]
109 for level in range(self.num_levels)
110 ]
111 return B.concat(*values, axis=1) # [N, L]
113 def weighted_outerproduct(
114 self,
115 weights: B.Numeric,
116 X: B.Numeric,
117 X2: Optional[B.Numeric] = None, # type: ignore
118 **kwargs,
119 ) -> B.Numeric:
120 if X2 is None:
121 X2 = X
123 hamming_distances = hamming_distance(X, X2)
125 result = B.zeros(B.dtype(weights), X.shape[0], X2.shape[0]) # [N, N2]
126 kravchuk_normalized_j_minus_1, kravchuk_normalized_j_minus_2 = None, None
127 for level in range(self.num_levels):
128 cur_kravchuk_normalized = generalized_kravchuk_normalized(
129 self.dim,
130 level,
131 hamming_distances,
132 self.n_cat,
133 kravchuk_normalized_j_minus_1,
134 kravchuk_normalized_j_minus_2,
135 )
136 kravchuk_normalized_j_minus_2 = kravchuk_normalized_j_minus_1
137 kravchuk_normalized_j_minus_1 = cur_kravchuk_normalized
139 # Instead of multiplying weights by binomial coefficients, we sum their
140 # logs and then exponentiate the result for numerical stability.
141 # Furthermore, we save the computed Kravchuk polynomials for next iterations.
142 result += (
143 B.exp(
144 B.log(weights[level])
145 + log_binomial(self.dim, level)
146 + level * B.log(self.n_cat - 1)
147 )
148 * cur_kravchuk_normalized
149 )
151 return result # [N, N2]
153 def weighted_outerproduct_diag(
154 self, weights: B.Numeric, X: B.Numeric, **kwargs
155 ) -> B.Numeric:
157 # Instead of multiplying weights by binomial coefficients, we sum their
158 # logs and then exponentiate the result for numerical stability.
159 result = sum(
160 B.exp(
161 B.log(weights[level])
162 + log_binomial(self.dim, level)
163 + level * B.log(self.n_cat - 1)
164 )
165 * B.ones(float_like(X), *X.shape[:-1], 1)
166 for level in range(self.num_levels)
167 ) # [N, 1]
169 return B.reshape(result, *result.shape[:-1]) # [N,]
171 @property
172 def num_eigenfunctions(self) -> int:
173 if self._num_eigenfunctions is None:
174 self._num_eigenfunctions = sum(self.num_eigenfunctions_per_level)
175 return self._num_eigenfunctions
177 @property
178 def num_levels(self) -> int:
179 return self._num_levels
181 @property
182 def num_eigenfunctions_per_level(self) -> List[int]:
183 return [
184 comb(self.dim, level) * (self.n_cat - 1) ** level
185 for level in range(self.num_levels)
186 ]
189class HammingGraph(DiscreteSpectrumSpace):
190 r"""
191 The GeometricKernels space representing the q-ary Hamming graph
192 $H(d,q) = \{0, 1, ..., q-1\}^d$, the combinatorial space of categorical
193 vectors (with $q$ categories) of length $d$.
195 The elements of this space are represented by d-dimensional categorical vectors
196 (with $q$ categories) taking integer values in $\{0, 1, ..., q-1\}$.
198 Levels are the whole eigenspaces.
200 .. note::
201 If you need a kernel operating on categorical vectors where $q$ varies
202 between dimensions, you can use `HammingGraph` in conjunction with
203 :class:`ProductGeometricKernel` or :class:`ProductDiscreteSpectrumSpace`.
205 .. note::
206 For the special case $q = 2$, this reduces to the binary hypercube graph,
207 also available as :class:`HypercubeGraph`.
209 .. note::
210 A tutorial on how to use this space is available in the
211 :doc:`HammingGraph.ipynb </examples/HammingGraph>` notebook.
213 .. note::
214 Since the degree matrix is a constant multiple of the identity, all
215 types of the graph Laplacian coincide on the Hamming graph up to a
216 constant, we choose the normalized Laplacian for numerical stability.
218 :param dim:
219 Dimension $d$ of the Hamming graph $H(d,q)$, a positive integer.
221 :param n_cat:
222 Number of categories $q$ of the Hamming graph $H(d,q)$, a positive
223 integer $q \geq 2$.
225 .. admonition:: Citation
227 If you use this GeometricKernels space in your research, please consider
228 citing :cite:t:`borovitskiy2023` and :cite:t:`doumont2025`.
229 """
231 def __init__(self, dim: int, n_cat: int):
232 if dim < 1:
233 raise ValueError("dim must be a positive integer.")
234 if n_cat < 1:
235 raise ValueError("n_cat must be a positive integer.")
236 self.dim = dim
237 self.n_cat = n_cat
239 def __str__(self):
240 return f"HammingGraph({self.dim},{self.n_cat})"
242 @property
243 def dimension(self) -> int:
244 """
245 Returns d, the `dim` parameter that was passed down to `__init__`.
247 .. note:
248 Although this is a graph, and graphs are generally treated as
249 0-dimensional throughout GeometricKernels, we make an exception for
250 HammingGraph. This is because it helps maintain good behavior of
251 Matérn kernels with the usual values of the smoothness parameter
252 nu, i.e. nu = 1/2, nu = 3/2, nu = 5/2.
253 """
254 return self.dim
256 def get_eigenfunctions(self, num: int) -> Eigenfunctions:
257 """
258 Returns the :class:`~.VilenkinFunctions` object with `num` levels.
260 :param num:
261 Number of levels.
262 """
263 return VilenkinFunctions(self.dim, self.n_cat, num)
265 def get_eigenvalues(self, num: int) -> B.Numeric:
266 eigenvalues = np.array(
267 [
268 (self.n_cat * level)
269 / (
270 self.dim * (self.n_cat - 1)
271 ) # we assume normalized Laplacian (for numerical stability)
272 for level in range(num)
273 ]
274 )
275 return B.reshape(eigenvalues, -1, 1) # [num, 1]
277 def get_repeated_eigenvalues(self, num: int) -> B.Numeric:
278 eigenvalues_per_level = self.get_eigenvalues(num)
280 eigenfunctions = VilenkinFunctions(self.dim, self.n_cat, num)
281 eigenvalues = chain(
282 B.squeeze(eigenvalues_per_level),
283 eigenfunctions.num_eigenfunctions_per_level,
284 ) # [J,]
285 return B.reshape(eigenvalues, -1, 1) # [J, 1]
287 def random(self, key: B.RandomState, number: int) -> B.Numeric:
288 r"""
289 Sample uniformly random points on the Hamming graph $H(d,q)$.
291 Always returns [N, D] integer array of the `key`'s backend with values
292 in $\{0, 1, ..., q-1\}$.
294 :param key:
295 Either `np.random.RandomState`, `tf.random.Generator`,
296 `torch.Generator` or `jax.tensor` (representing random state).
297 :param number:
298 Number N of samples to draw.
300 :return:
301 An array of `number` uniformly random samples on the space.
302 """
303 key, random_points = B.random.randint(
304 key, dtype_integer(key), number, self.dimension, lower=0, upper=self.n_cat
305 )
307 return key, random_points
309 @property
310 def element_shape(self):
311 """
312 :return:
313 [d].
314 """
315 return [self.dimension]
317 @property
318 def element_dtype(self):
319 """
320 :return:
321 B.Int.
322 """
323 return B.Int