@@ -520,3 +520,63 @@ def generate_text(
520520 return bpd .read_gbq_query (sql )
521521 else :
522522 return session .read_gbq_query (sql )
523+
524+
525+ @log_adapter .method_logger (custom_base_name = "bigquery_ml" )
526+ def generate_embedding (
527+ model : Union [bigframes .ml .base .BaseEstimator , str , pd .Series ],
528+ input_ : Union [pd .DataFrame , dataframe .DataFrame , str ],
529+ * ,
530+ flatten_json_output : Optional [bool ] = None ,
531+ task_type : Optional [str ] = None ,
532+ output_dimensionality : Optional [int ] = None ,
533+ ) -> dataframe .DataFrame :
534+ """
535+ Generates text embedding using a BigQuery ML model.
536+
537+ See the `BigQuery ML GENERATE_EMBEDDING function syntax
538+ <https://docs.cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-generate-embedding>`_
539+ for additional reference.
540+
541+ Args:
542+ model (bigframes.ml.base.BaseEstimator or str):
543+ The model to use for text embedding.
544+ input_ (Union[bigframes.pandas.DataFrame, str]):
545+ The DataFrame or query to use for text embedding.
546+ flatten_json_output (bool, optional):
547+ A BOOL value that determines the content of the generated JSON column.
548+ task_type (str, optional):
549+ A STRING value that specifies the intended downstream application task.
550+ Supported values are:
551+ - `RETRIEVAL_QUERY`
552+ - `RETRIEVAL_DOCUMENT`
553+ - `SEMANTIC_SIMILARITY`
554+ - `CLASSIFICATION`
555+ - `CLUSTERING`
556+ - `QUESTION_ANSWERING`
557+ - `FACT_VERIFICATION`
558+ - `CODE_RETRIEVAL_QUERY`
559+ output_dimensionality (int, optional):
560+ An INT64 value that specifies the size of the output embedding.
561+
562+ Returns:
563+ bigframes.pandas.DataFrame:
564+ The generated text embedding.
565+ """
566+ import bigframes .pandas as bpd
567+
568+ model_name , session = _get_model_name_and_session (model , input_ )
569+ table_sql = _to_sql (input_ )
570+
571+ sql = bigframes .core .sql .ml .generate_embedding (
572+ model_name = model_name ,
573+ table = table_sql ,
574+ flatten_json_output = flatten_json_output ,
575+ task_type = task_type ,
576+ output_dimensionality = output_dimensionality ,
577+ )
578+
579+ if session is None :
580+ return bpd .read_gbq_query (sql )
581+ else :
582+ return session .read_gbq_query (sql )
0 commit comments