Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 64 additions & 60 deletions samples/detect_api/deidentify_file.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
from skyflow.error import SkyflowError
from skyflow import Env, Skyflow, LogLevel
from skyflow.utils.enums import DetectEntities, MaskingMethod, DetectOutputTranscriptions
from skyflow.vault.detect import DeidentifyFileRequest, TokenFormat, Transformations, DateTransformation, Bleep, FileInput
from skyflow.vault.detect import (
DeidentifyFileRequest,
TokenFormat,
Transformations,
DateTransformation,
Bleep,
FileInput,
)

"""
* Skyflow Deidentify File Example
Expand All @@ -11,6 +18,7 @@
* spreadsheets, presentations, structured text.
"""


def perform_file_deidentification():
try:
# Step 1: Configure Credentials
Expand All @@ -23,7 +31,7 @@ def perform_file_deidentification():
'vault_id': '<YOUR_VAULT_ID>', # Replace with your vault ID
'cluster_id': '<YOUR_CLUSTER_ID>', # Replace with your cluster ID
'env': Env.PROD, # Deployment environment
'credentials': credentials
'credentials': credentials,
}

# Step 3: Configure & Initialize Skyflow Client
Expand All @@ -36,70 +44,66 @@ def perform_file_deidentification():

# Step 4: Create File Object
file_path = '<FILE_PATH>' # Replace with your file path
file = open(file_path, 'rb')
# Step 5: Configure Deidentify File Request with all options
deidentify_request = DeidentifyFileRequest(
file=FileInput(file), # File to de-identify (can also provide a file path)
entities=[DetectEntities.SSN, DetectEntities.CREDIT_CARD], # Entities to detect
allow_regex_list=['<YOUR_REGEX_PATTERN>'], # Optional: Patterns to allow
restrict_regex_list=['<YOUR_REGEX_PATTERN>'], # Optional: Patterns to restrict

# Token format configuration
token_format=TokenFormat(
vault_token=[DetectEntities.SSN], # Use vault tokens for these entities
),

# Optional: Custom transformations
# transformations=Transformations(
# shift_dates=DateTransformation(
# max_days=30,
# min_days=10,
# entities=[DetectEntities.DOB]
# )
# ),

# Output configuration
output_directory='<OUTPUT_DIRECTORY_PATH>', # Where to save processed file
wait_time=15, # Max wait time in seconds (max 64)

# Image-specific options
output_processed_image=True, # Include processed image in output
output_ocr_text=True, # Include OCR text in response
masking_method=MaskingMethod.BLACKBOX, # Masking method for images

# PDF-specific options
pixel_density=15, # Pixel density for PDF processing
max_resolution=2000, # Max resolution for PDF

# Audio-specific options
output_processed_audio=True, # Include processed audio
output_transcription=DetectOutputTranscriptions.PLAINTEXT_TRANSCRIPTION, # Transcription type

# Audio bleep configuration

# bleep=Bleep(
# gain=5, # Loudness in dB
# frequency=1000, # Pitch in Hz
# start_padding=0.1, # Padding at start (seconds)
# stop_padding=0.2 # Padding at end (seconds)
# )
)

# Step 6: Call deidentifyFile API
response = skyflow_client.detect().deidentify_file(deidentify_request)
# Step 5: Configure Deidentify File Request and call API
with open(file_path, 'rb') as file:
deidentify_request = DeidentifyFileRequest(
file=FileInput(file), # File to de-identify (can also provide a file path)
entities=[DetectEntities.SSN, DetectEntities.CREDIT_CARD], # Entities to detect
allow_regex_list=['<YOUR_REGEX_PATTERN>'], # Optional: Patterns to allow
restrict_regex_list=['<YOUR_REGEX_PATTERN>'], # Optional: Patterns to restrict
# Token format configuration
token_format=TokenFormat(
vault_token=[DetectEntities.SSN], # Use vault tokens for these entities
),
# Optional: Custom transformations
# transformations=Transformations(
# shift_dates=DateTransformation(
# max_days=30,
# min_days=10,
# entities=[DetectEntities.DOB]
# )
# ),
# Output configuration
output_directory='<OUTPUT_DIRECTORY_PATH>', # Where to save processed file
wait_time=15, # Max wait time in seconds (max 64)
# Image-specific options
output_processed_image=True, # Include processed image in output
output_ocr_text=True, # Include OCR text in response
masking_method=MaskingMethod.BLACKBOX, # Masking method for images
# PDF-specific options
pixel_density=15, # Pixel density for PDF processing
max_resolution=2000, # Max resolution for PDF
# Audio-specific options
output_processed_audio=True, # Include processed audio
output_transcription=DetectOutputTranscriptions.PLAINTEXT_TRANSCRIPTION, # Transcription type
# Audio bleep configuration
# bleep=Bleep(
# gain=5, # Loudness in dB
# frequency=1000, # Pitch in Hz
# start_padding=0.1, # Padding at start (seconds)
# stop_padding=0.2 # Padding at end (seconds)
# )
)

# Step 6: Call deidentifyFile API
response = skyflow_client.detect().deidentify_file(deidentify_request)

# Handle Successful Response
print("\nDeidentify File Response:", response)
print('\nDeidentify File Response:', response)

except SkyflowError as error:
# Handle Skyflow-specific errors
print('\nSkyflow Error:', {
'http_code': error.http_code,
'grpc_code': error.grpc_code,
'http_status': error.http_status,
'message': error.message,
'details': error.details
})
print(
'\nSkyflow Error:',
{
'http_code': error.http_code,
'grpc_code': error.grpc_code,
'http_status': error.http_status,
'message': error.message,
'details': error.details,
},
)
except Exception as error:
# Handle unexpected errors
print('Unexpected Error:', error)
62 changes: 31 additions & 31 deletions samples/detect_api/deidentify_file_async.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
from skyflow.error import SkyflowError
from skyflow import Env, Skyflow, LogLevel
from skyflow.utils.enums import DetectEntities, MaskingMethod, DetectOutputTranscriptions
from skyflow.vault.detect import DeidentifyFileRequest, TokenFormat, Transformations, DateTransformation, Bleep, FileInput
from skyflow.vault.detect import (
DeidentifyFileRequest,
TokenFormat,
Transformations,
DateTransformation,
Bleep,
FileInput,
)
from concurrent.futures import ThreadPoolExecutor

"""
Expand All @@ -13,6 +20,7 @@
* spreadsheets, presentations, structured text.
"""


def perform_file_deidentification_async():
try:
# Step 1: Configure Credentials
Expand All @@ -25,7 +33,7 @@ def perform_file_deidentification_async():
'vault_id': '<YOUR_VAULT_ID>', # Replace with your vault ID
'cluster_id': '<YOUR_CLUSTER_ID>', # Replace with your cluster ID
'env': Env.PROD, # Deployment environment
'credentials': credentials
'credentials': credentials,
}

# Step 3: Configure & Initialize Skyflow Client
Expand All @@ -38,18 +46,16 @@ def perform_file_deidentification_async():

# Step 4: Create File Object
file_path = '<FILE_PATH>' # Replace with your file path

deidentify_request = DeidentifyFileRequest(
file=FileInput(file_path=file_path), # File to de-identify
# entities=[DetectEntities.SSN, DetectEntities.CREDIT_CARD], # Entities to detect
allow_regex_list=['<YOUR_REGEX_PATTERN>'], # Optional: Patterns to allow
restrict_regex_list=['<YOUR_REGEX_PATTERN>'], # Optional: Patterns to restrict

# Token format configuration
token_format=TokenFormat(
vault_token=[DetectEntities.SSN], # Use vault tokens for these entities
),

# Optional: Custom transformations
# transformations=Transformations(
# shift_dates=DateTransformation(
Expand All @@ -58,67 +64,61 @@ def perform_file_deidentification_async():
# entities=[DetectEntities.DOB]
# )
# ),

# Output configuration
output_directory='<OUTPUT_DIRECTORY_PATH>', # Where to save processed file
wait_time=15, # Max wait time in seconds (max 64)

# Output configuration
output_directory='<OUTPUT_DIRECTORY_PATH>', # Where to save processed file
wait_time=15, # Max wait time in seconds (max 64)
# Image-specific options
output_processed_image=True, # Include processed image in output
output_ocr_text=True, # Include OCR text in response
masking_method=MaskingMethod.BLACKBOX, # Masking method for images

# PDF-specific options
pixel_density=15, # Pixel density for PDF processing
max_resolution=2000, # Max resolution for PDF

# Audio-specific options
output_processed_audio=True, # Include processed audio
output_transcription=DetectOutputTranscriptions.PLAINTEXT_TRANSCRIPTION, # Transcription type

# Audio bleep configuration

# bleep=Bleep(
# gain=5, # Loudness in dB
# frequency=1000, # Pitch in Hz
# start_padding=0.1, # Padding at start (seconds)
# stop_padding=0.2 # Padding at end (seconds)
# )
)

# Create a thread pool executor
executor = ThreadPoolExecutor(max_workers=1)

future = executor.submit(
lambda: skyflow_client.detect().deidentify_file(deidentify_request)
)


future = executor.submit(lambda: skyflow_client.detect().deidentify_file(deidentify_request))

def handle_response(future):
exception = future.exception()
if exception is not None:
if isinstance(exception, SkyflowError):
# Handle Skyflow-specific errors
print('\nSkyflow Error:', {
'http_code': exception.http_code,
'grpc_code': exception.grpc_code,
'http_status': exception.http_status,
'message': exception.message,
'details': exception.details
})
print(
'\nSkyflow Error:',
{
'http_code': exception.http_code,
'grpc_code': exception.grpc_code,
'http_status': exception.http_status,
'message': exception.message,
'details': exception.details,
},
)
else:
# Handle unexpected errors
print('Unexpected Error:', exception)
return

# Handle Successful Response
result = future.result()
print("\nDeidentify File Response:", result)
print('\nDeidentify File Response:', result)

future.add_done_callback(handle_response)

executor.shutdown(wait=True)

except Exception as error:
# Handle unexpected errors
print('Unexpected Error:', error)

38 changes: 23 additions & 15 deletions samples/service_account/signed_token_generation_example.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
import json
from skyflow.service_account import (
is_expired,
generate_signed_data_tokens,
generate_signed_data_tokens_from_creds,
)

file_path = 'CREDENTIALS_FILE_PATH'
bearer_token = ''
file_path = '<CREDENTIALS_FILE_PATH>'

skyflow_credentials = {
'clientID': '<YOUR_CLIENT_ID>',
Expand All @@ -19,15 +17,18 @@


# Approach 1: Signed data tokens with string context
# Returns: [('<DATA_TOKEN>', '<SIGNED_TOKEN>'), ...]
def get_signed_tokens_with_string_context():
options = {
'ctx': 'user_12345',
'data_tokens': ['DATA_TOKEN1', 'DATA_TOKEN2'],
'data_tokens': ['<DATA_TOKEN1>', '<DATA_TOKEN2>'],
'time_to_live': 90, # in seconds
}
try:
data_token, signed_data_token = generate_signed_data_tokens(file_path, options)
return data_token, signed_data_token
results = generate_signed_data_tokens(file_path, options)
for data_token, signed_data_token in results:
print(f' Token: {data_token}, Signed Token: {signed_data_token}')
return results
except Exception as e:
print(f'Error: {str(e)}')

Expand All @@ -42,12 +43,14 @@ def get_signed_tokens_with_object_context():
'department': 'research',
'user_id': 'user_67890',
},
'data_tokens': ['DATA_TOKEN1', 'DATA_TOKEN2'],
'data_tokens': ['<DATA_TOKEN1>', '<DATA_TOKEN2>'],
'time_to_live': 90,
}
try:
data_token, signed_data_token = generate_signed_data_tokens(file_path, options)
return data_token, signed_data_token
results = generate_signed_data_tokens(file_path, options)
for data_token, signed_data_token in results:
print(f' Token: {data_token}, Signed Token: {signed_data_token}')
return results
except Exception as e:
print(f'Error: {str(e)}')

Expand All @@ -56,16 +59,21 @@ def get_signed_tokens_with_object_context():
def get_signed_tokens_from_credentials_string():
options = {
'ctx': 'user_12345',
'data_tokens': ['DATA_TOKEN1', 'DATA_TOKEN2'],
'data_tokens': ['<DATA_TOKEN1>', '<DATA_TOKEN2>'],
'time_to_live': 90,
}
try:
data_token, signed_data_token = generate_signed_data_tokens_from_creds(credentials_string, options)
return data_token, signed_data_token
results = generate_signed_data_tokens_from_creds(credentials_string, options)
for data_token, signed_data_token in results:
print(f' Token: {data_token}, Signed Token: {signed_data_token}')
return results
except Exception as e:
print(f'Error: {str(e)}')


print("String context:", get_signed_tokens_with_string_context())
print("Object context:", get_signed_tokens_with_object_context())
print("Creds string:", get_signed_tokens_from_credentials_string())
print('String context:')
get_signed_tokens_with_string_context()
print('Object context:')
get_signed_tokens_with_object_context()
print('Creds string:')
get_signed_tokens_from_credentials_string()
Loading
Loading