Coverage for geometric_kernels/spaces/hamming

1"""

2This module provides the :class:`HammingGraph` space and the respective

3:class:`~.eigenfunctions.Eigenfunctions` subclass :class:`VilenkinFunctions`.

4"""

6from math import comb

8import lab as B

9import numpy as np

10from beartype.typing import List, Optional

12from geometric_kernels.lab_extras import dtype_integer, float_like

13from geometric_kernels.spaces.base import DiscreteSpectrumSpace

14from geometric_kernels.spaces.eigenfunctions import (

15 Eigenfunctions,

16 EigenfunctionsWithAdditionTheorem,

17)

18from geometric_kernels.utils.special_functions import generalized_kravchuk_normalized

19from geometric_kernels.utils.utils import chain, hamming_distance, log_binomial

22class VilenkinFunctions(EigenfunctionsWithAdditionTheorem):

23 r"""

24 Eigenfunctions of the graph Laplacian on the q-ary Hamming graph $H(d,q)$, whose

25 nodes are indexed by categorical vectors in $\{0, 1, ..., q-1\}^d$.

27 These eigenfunctions are the Vilenkin functions (also called Vilenkin-Chrestenson

28 functions), which generalize the binary Walsh functions to q-ary alphabets. They

29 map vertices to complex values via products of characters on cyclic groups.

31 For the special case $q = 2$, the Vilenkin functions reduce to the Walsh functions

32 on the binary hypercube $\{0, 1\}^d$.

34 .. note::

35 The Vilenkin functions can be indexed by "character patterns" - choices of

36 coordinates and non-identity characters at those coordinates. Each eigenspace

37 (level) $j$ has dimension $\binom{d}{j}(q-1)^j$, corresponding to choosing

38 $j$ coordinates and assigning $(q-1)$ possible non-identity characters to each.

40 Levels are the whole eigenspaces, comprising all Vilenkin functions with the

41 same number of coordinates having non-identity characters. The addition theorem

42 for these is based on generalized Kravchuk polynomials, i.e. discrete orthogonal

43 polynomials on the q-ary Hamming scheme.

45 :param dim:

46 Dimension $d$ of the q-ary Hamming graph $H(d,q)$.

48 :param n_cat:

49 Number of categories $q \geq 2$ in the q-ary alphabet $\{0, 1, ..., q-1\}$.

51 :param num_levels:

52 Specifies the number of levels (eigenspaces) of the Vilenkin functions to use.

53 """

55 def __init__(self, dim: int, n_cat: int, num_levels: int) -> None:

56 if num_levels > dim + 1:

57 raise ValueError("The number of levels should be at most `dim`+1.")

58 self.dim = dim

59 self.n_cat = n_cat

60 self._num_levels = num_levels

61 self._num_eigenfunctions: Optional[int] = None # To be computed when needed.

63 if n_cat < 2:

64 raise ValueError("n_cat must be at least 2.")

66 def __call__(self, X: B.Int, **kwargs) -> B.Float:

67 raise NotImplementedError

69 def _addition_theorem(

70 self, X: B.Numeric, X2: Optional[B.Numeric] = None, **kwargs

71 ) -> B.Numeric:

73 if X2 is None:

74 X2 = X

76 hamming_distances = hamming_distance(X, X2)

78 values = []

80 kravchuk_normalized_j_minus_1, kravchuk_normalized_j_minus_2 = None, None

81 for level in range(self.num_levels):

82 cur_kravchuk_normalized = generalized_kravchuk_normalized(

83 self.dim,

84 level,

85 hamming_distances,

86 self.n_cat,

87 kravchuk_normalized_j_minus_1,

88 kravchuk_normalized_j_minus_2,

89 ) # [N, N2]

90 kravchuk_normalized_j_minus_2 = kravchuk_normalized_j_minus_1

91 kravchuk_normalized_j_minus_1 = cur_kravchuk_normalized

93 values.append(

94 comb(self.dim, level)

95 * (self.n_cat - 1) ** level

96 * cur_kravchuk_normalized[..., None]

97 ) # [N, N2, 1]

99 return B.concat(*values, axis=-1) # [N, N2, L]

100

101 def _addition_theorem_diag(self, X: B.Numeric, **kwargs) -> B.Numeric:

102 """

103 These are certain easy to compute constants.

104 """

105 values = [

106 comb(self.dim, level)

107 * (self.n_cat - 1) ** level

108 * B.ones(float_like(X), *X.shape[:-1], 1) # [N, 1]

109 for level in range(self.num_levels)

110 ]

111 return B.concat(*values, axis=1) # [N, L]

112

113 def weighted_outerproduct(

114 self,

115 weights: B.Numeric,

116 X: B.Numeric,

117 X2: Optional[B.Numeric] = None, # type: ignore

118 **kwargs,

119 ) -> B.Numeric:

120 if X2 is None:

121 X2 = X

122

123 hamming_distances = hamming_distance(X, X2)

124

125 result = B.zeros(B.dtype(weights), X.shape[0], X2.shape[0]) # [N, N2]

126 kravchuk_normalized_j_minus_1, kravchuk_normalized_j_minus_2 = None, None

127 for level in range(self.num_levels):

128 cur_kravchuk_normalized = generalized_kravchuk_normalized(

129 self.dim,

130 level,

131 hamming_distances,

132 self.n_cat,

133 kravchuk_normalized_j_minus_1,

134 kravchuk_normalized_j_minus_2,

135 )

136 kravchuk_normalized_j_minus_2 = kravchuk_normalized_j_minus_1

137 kravchuk_normalized_j_minus_1 = cur_kravchuk_normalized

138

139 # Instead of multiplying weights by binomial coefficients, we sum their

140 # logs and then exponentiate the result for numerical stability.

141 # Furthermore, we save the computed Kravchuk polynomials for next iterations.

142 result += (

143 B.exp(

144 B.log(weights[level])

145 + log_binomial(self.dim, level)

146 + level * B.log(self.n_cat - 1)

147 )

148 * cur_kravchuk_normalized

149 )

150

151 return result # [N, N2]

152

153 def weighted_outerproduct_diag(

154 self, weights: B.Numeric, X: B.Numeric, **kwargs

155 ) -> B.Numeric:

156

157 # Instead of multiplying weights by binomial coefficients, we sum their

158 # logs and then exponentiate the result for numerical stability.

159 result = sum(

160 B.exp(

161 B.log(weights[level])

162 + log_binomial(self.dim, level)

163 + level * B.log(self.n_cat - 1)

164 )

165 * B.ones(float_like(X), *X.shape[:-1], 1)

166 for level in range(self.num_levels)

167 ) # [N, 1]

168

169 return B.reshape(result, *result.shape[:-1]) # [N,]

170

171 @property

172 def num_eigenfunctions(self) -> int:

173 if self._num_eigenfunctions is None:

174 self._num_eigenfunctions = sum(self.num_eigenfunctions_per_level)

175 return self._num_eigenfunctions

176

177 @property

178 def num_levels(self) -> int:

179 return self._num_levels

180

181 @property

182 def num_eigenfunctions_per_level(self) -> List[int]:

183 return [

184 comb(self.dim, level) * (self.n_cat - 1) ** level

185 for level in range(self.num_levels)

186 ]

187

188

189class HammingGraph(DiscreteSpectrumSpace):

190 r"""

191 The GeometricKernels space representing the q-ary Hamming graph

192 $H(d,q) = \{0, 1, ..., q-1\}^d$, the combinatorial space of categorical

193 vectors (with $q$ categories) of length $d$.

194

195 The elements of this space are represented by d-dimensional categorical vectors

196 (with $q$ categories) taking integer values in $\{0, 1, ..., q-1\}$.

197

198 Levels are the whole eigenspaces.

199

200 .. note::

201 If you need a kernel operating on categorical vectors where $q$ varies

202 between dimensions, you can use `HammingGraph` in conjunction with

203 :class:`ProductGeometricKernel` or :class:`ProductDiscreteSpectrumSpace`.

204

205 .. note::

206 For the special case $q = 2$, this reduces to the binary hypercube graph,

207 also available as :class:`HypercubeGraph`.

208

209 .. note::

210 A tutorial on how to use this space is available in the

211 :doc:`HammingGraph.ipynb </examples/HammingGraph>` notebook.

212

213 .. note::

214 Since the degree matrix is a constant multiple of the identity, all

215 types of the graph Laplacian coincide on the Hamming graph up to a

216 constant, we choose the normalized Laplacian for numerical stability.

217

218 :param dim:

219 Dimension $d$ of the Hamming graph $H(d,q)$, a positive integer.

220

221 :param n_cat:

222 Number of categories $q$ of the Hamming graph $H(d,q)$, a positive

223 integer $q \geq 2$.

224

225 .. admonition:: Citation

226

227 If you use this GeometricKernels space in your research, please consider

228 citing :cite:t:`borovitskiy2023` and :cite:t:`doumont2025`.

229 """

230

231 def __init__(self, dim: int, n_cat: int):

232 if dim < 1:

233 raise ValueError("dim must be a positive integer.")

234 if n_cat < 1:

235 raise ValueError("n_cat must be a positive integer.")

236 self.dim = dim

237 self.n_cat = n_cat

238

239 def __str__(self):

240 return f"HammingGraph({self.dim},{self.n_cat})"

241

242 @property

243 def dimension(self) -> int:

244 """

245 Returns d, the `dim` parameter that was passed down to `__init__`.

246

247 .. note:

248 Although this is a graph, and graphs are generally treated as

249 0-dimensional throughout GeometricKernels, we make an exception for

250 HammingGraph. This is because it helps maintain good behavior of

251 Matérn kernels with the usual values of the smoothness parameter

252 nu, i.e. nu = 1/2, nu = 3/2, nu = 5/2.

253 """

254 return self.dim

255

256 def get_eigenfunctions(self, num: int) -> Eigenfunctions:

257 """

258 Returns the :class:`~.VilenkinFunctions` object with `num` levels.

259

260 :param num:

261 Number of levels.

262 """

263 return VilenkinFunctions(self.dim, self.n_cat, num)

264

265 def get_eigenvalues(self, num: int) -> B.Numeric:

266 eigenvalues = np.array(

267 [

268 (self.n_cat * level)

269 / (

270 self.dim * (self.n_cat - 1)

271 ) # we assume normalized Laplacian (for numerical stability)

272 for level in range(num)

273 ]

274 )

275 return B.reshape(eigenvalues, -1, 1) # [num, 1]

276

277 def get_repeated_eigenvalues(self, num: int) -> B.Numeric:

278 eigenvalues_per_level = self.get_eigenvalues(num)

279

280 eigenfunctions = VilenkinFunctions(self.dim, self.n_cat, num)

281 eigenvalues = chain(

282 B.squeeze(eigenvalues_per_level),

283 eigenfunctions.num_eigenfunctions_per_level,

284 ) # [J,]

285 return B.reshape(eigenvalues, -1, 1) # [J, 1]

286

287 def random(self, key: B.RandomState, number: int) -> B.Numeric:

288 r"""

289 Sample uniformly random points on the Hamming graph $H(d,q)$.

290

291 Always returns [N, D] integer array of the `key`'s backend with values

292 in $\{0, 1, ..., q-1\}$.

293

294 :param key:

295 Either `np.random.RandomState`, `tf.random.Generator`,

296 `torch.Generator` or `jax.tensor` (representing random state).

297 :param number:

298 Number N of samples to draw.

299

300 :return:

301 An array of `number` uniformly random samples on the space.

302 """

303 key, random_points = B.random.randint(

304 key, dtype_integer(key), number, self.dimension, lower=0, upper=self.n_cat

305 )

306

307 return key, random_points

308

309 @property

310 def element_shape(self):

311 """

312 :return:

313 [d].

314 """

315 return [self.dimension]

316

317 @property

318 def element_dtype(self):

319 """

320 :return:

321 B.Int.

322 """

323 return B.Int

Coverage for geometric_kernels / spaces / hamming_graph.py: 87%

94 statements