@@ -33,6 +33,7 @@ service ModelService {
3333 option (google.api.default_host ) = "bigquery.googleapis.com" ;
3434 option (google.api.oauth_scopes ) =
3535 "https://www.googleapis.com/auth/bigquery,"
36+ "https://www.googleapis.com/auth/bigquery.readonly,"
3637 "https://www.googleapis.com/auth/cloud-platform,"
3738 "https://www.googleapis.com/auth/cloud-platform.read-only" ;
3839
@@ -55,7 +56,25 @@ service ModelService {
5556}
5657
5758message Model {
58- // Evaluation metrics for regression models.
59+ message KmeansEnums {
60+ // Indicates the method used to initialize the centroids for KMeans
61+ // clustering algorithm.
62+ enum KmeansInitializationMethod {
63+ KMEANS_INITIALIZATION_METHOD_UNSPECIFIED = 0 ;
64+
65+ // Initializes the centroids randomly.
66+ RANDOM = 1 ;
67+
68+ // Initializes the centroids using data specified in
69+ // kmeans_initialization_column.
70+ CUSTOM = 2 ;
71+ }
72+
73+
74+ }
75+
76+ // Evaluation metrics for regression and explicit feedback type matrix
77+ // factorization models.
5978 message RegressionMetrics {
6079 // Mean absolute error.
6180 google.protobuf.DoubleValue mean_absolute_error = 1 ;
@@ -199,19 +218,69 @@ message Model {
199218
200219 // Evaluation metrics for clustering models.
201220 message ClusteringMetrics {
221+ // Message containing the information about one cluster.
222+ message Cluster {
223+ // Representative value of a single feature within the cluster.
224+ message FeatureValue {
225+ // Representative value of a categorical feature.
226+ message CategoricalValue {
227+ // Represents the count of a single category within the cluster.
228+ message CategoryCount {
229+ // The name of category.
230+ string category = 1 ;
231+
232+ // The count of training samples matching the category within the
233+ // cluster.
234+ google.protobuf.Int64Value count = 2 ;
235+ }
236+
237+ // Counts of all categories for the categorical feature. If there are
238+ // more than ten categories, we return top ten (by count) and return
239+ // one more CategoryCount with category ‘_OTHER_’ and count as
240+ // aggregate counts of remaining categories.
241+ repeated CategoryCount category_counts = 1 ;
242+ }
243+
244+ // The feature column name.
245+ string feature_column = 1 ;
246+
247+ oneof value {
248+ // The numerical feature value. This is the centroid value for this
249+ // feature.
250+ google.protobuf.DoubleValue numerical_value = 2 ;
251+
252+ // The categorical feature value.
253+ CategoricalValue categorical_value = 3 ;
254+ }
255+ }
256+
257+ // Centroid id.
258+ int64 centroid_id = 1 ;
259+
260+ // Values of highly variant features for this cluster.
261+ repeated FeatureValue feature_values = 2 ;
262+
263+ // Count of training data rows that were assigned to this cluster.
264+ google.protobuf.Int64Value count = 3 ;
265+ }
266+
202267 // Davies-Bouldin index.
203268 google.protobuf.DoubleValue davies_bouldin_index = 1 ;
204269
205270 // Mean of squared distances between each sample to its cluster centroid.
206271 google.protobuf.DoubleValue mean_squared_distance = 2 ;
272+
273+ // [Beta] Information for all clusters.
274+ repeated Cluster clusters = 3 ;
207275 }
208276
209277 // Evaluation metrics of a model. These are either computed on all training
210278 // data or just the eval data based on whether eval data was used during
211279 // training. These are not present for imported models.
212280 message EvaluationMetrics {
213281 oneof metrics {
214- // Populated for regression models.
282+ // Populated for regression models and explicit feedback type matrix
283+ // factorization models.
215284 RegressionMetrics regression_metrics = 1 ;
216285
217286 // Populated for binary classification/classifier models.
@@ -220,7 +289,7 @@ message Model {
220289 // Populated for multi-class classification/classifier models.
221290 MultiClassClassificationMetrics multi_class_classification_metrics = 3 ;
222291
223- // [Beta] Populated for clustering models.
292+ // Populated for clustering models.
224293 ClusteringMetrics clustering_metrics = 4 ;
225294 }
226295 }
@@ -292,10 +361,10 @@ message Model {
292361 // training data. Only applicable for classification models.
293362 map <string , double > label_class_weights = 17 ;
294363
295- // [Beta] Distance type for clustering models.
364+ // Distance type for clustering models.
296365 DistanceType distance_type = 20 ;
297366
298- // [Beta] Number of clusters for clustering models.
367+ // Number of clusters for clustering models.
299368 int64 num_clusters = 21 ;
300369
301370 // [Beta] Google Cloud Storage URI from which the model was imported. Only
@@ -304,6 +373,13 @@ message Model {
304373
305374 // Optimization strategy for training linear regression models.
306375 OptimizationStrategy optimization_strategy = 23 ;
376+
377+ // The method used to initialize the centroids for kmeans algorithm.
378+ KmeansEnums.KmeansInitializationMethod kmeans_initialization_method = 33 ;
379+
380+ // The column used to provide the initial centroids for kmeans algorithm
381+ // when kmeans_initialization_method is CUSTOM.
382+ string kmeans_initialization_column = 34 ;
307383 }
308384
309385 // Information about a single iteration of the training run.
@@ -336,7 +412,7 @@ message Model {
336412 // Learn rate used for this iteration.
337413 double learn_rate = 7 ;
338414
339- // [Beta] Information about top clusters for clustering models.
415+ // Information about top clusters for clustering models.
340416 repeated ClusterInfo cluster_infos = 8 ;
341417 }
342418
@@ -365,7 +441,7 @@ message Model {
365441 // Logistic regression based classification model.
366442 LOGISTIC_REGRESSION = 2 ;
367443
368- // [Beta] K-means clustering model.
444+ // K-means clustering model.
369445 KMEANS = 3 ;
370446
371447 // [Beta] An imported TensorFlow model.
@@ -513,8 +589,8 @@ message PatchModelRequest {
513589 string model_id = 3 ;
514590
515591 // Patched model.
516- // Follows patch semantics. Missing fields are not updated. To clear a field,
517- // explicitly set to default value.
592+ // Follows RFC5789 patch semantics. Missing fields are not updated.
593+ // To clear a field, explicitly set to default value.
518594 Model model = 4 ;
519595}
520596
@@ -536,7 +612,8 @@ message ListModelsRequest {
536612 // Dataset ID of the models to list.
537613 string dataset_id = 2 ;
538614
539- // The maximum number of results per page.
615+ // The maximum number of results to return in a single response page.
616+ // Leverage the page tokens to iterate through the entire collection.
540617 google.protobuf.UInt32Value max_results = 3 ;
541618
542619 // Page token, returned by a previous call to request the next page of
0 commit comments