Skip to content

pipe

Config location: conf/pipe

__dataframe__.yaml

defaults:
  - __init__

columns_to_apply:
use_batcher: false
num_workers: 1

__dataframe_external_funcs__.yaml

defaults:
  - __dataframe__

pipe_target: hyfi.pipe.dataframe_external_funcs

__dataframe_instance_methods__.yaml

defaults:
  - __dataframe__

pipe_target: hyfi.pipe.dataframe_instance_methods

__general_external_funcs__.yaml

defaults:
  - __init__

pipe_target: hyfi.pipe.general_external_funcs

__general_instance_methods__.yaml

defaults:
  - __init__

pipe_target: hyfi.pipe.general_instance_methods

__init__.yaml

# _target_:
pipe_target:
name:
desc:
env:
use_pipe_obj: true # if true, the pipe target function will be called with the pipe object as the first argument
pipe_obj_arg_name: # if use_pipe_obj is true, the pipe object will be passed to the pipe target function with this argument name
return_pipe_obj: false # if true, the pipe target function will return the pipe object instead of the return value
verbose: false

dataframe_combine_str_columns.yaml

defaults:
- __general_external_funcs__
- /run: dataframe_combine_str_columns
use_pipe_obj: true
pipe_obj_arg_name: null
return_pipe_obj: false

dataframe_drop.yaml

defaults:
- __general_external_funcs__
- /run: dataframe_drop
use_pipe_obj: true
pipe_obj_arg_name: null
return_pipe_obj: false

dataframe_drop_columns.yaml

defaults:
- __general_external_funcs__
- /run: dataframe_drop_columns
use_pipe_obj: true
pipe_obj_arg_name: null
return_pipe_obj: false

dataframe_eval_columns.yaml

defaults:
- __general_external_funcs__
- /run: dataframe_eval_columns
use_pipe_obj: true
pipe_obj_arg_name: null
return_pipe_obj: false

dataframe_eval_columns_with_pd_eval.yaml

defaults:
- __general_external_funcs__
- /run: dataframe_eval_columns_with_pd_eval
use_pipe_obj: true
pipe_obj_arg_name: null
return_pipe_obj: false

dataframe_print_head_and_tail.yaml

defaults:
- __general_external_funcs__
- /run: dataframe_print_head_and_tail
use_pipe_obj: true
pipe_obj_arg_name: null
return_pipe_obj: false

dataframe_select_columns.yaml

defaults:
- __general_external_funcs__
- /run: dataframe_select_columns
use_pipe_obj: true
pipe_obj_arg_name: null
return_pipe_obj: false

dataframe_split_str_column.yaml

defaults:
- __general_external_funcs__
- /run: dataframe_split_str_column
use_pipe_obj: true
pipe_obj_arg_name: null
return_pipe_obj: false

dataset_remove_columns.yaml

defaults:
  - __general_instance_methods__

run:
  _target_: remove_columns
  column_names:
  new_fingerprint:
verbose: true

dataset_to_pandas.yaml

defaults:
  - __general_instance_methods__

run:
  _target_: to_pandas
verbose: true

dict_to_dataframe.yaml

defaults:
- __general_external_funcs__
- /run: dict_to_dataframe
use_pipe_obj: true
pipe_obj_arg_name: null
return_pipe_obj: false

filter_and_sample_data.yaml

defaults:
- __dataframe_external_funcs__
- /run: filter_and_sample_data
use_pipe_obj: true
pipe_obj_arg_name: null
return_pipe_obj: false

filter_data_by_queries.yaml

defaults:
- __dataframe_external_funcs__
- /run: filter_data_by_queries
use_pipe_obj: true
pipe_obj_arg_name: null
return_pipe_obj: false

load_data.yaml

defaults:
- __general_external_funcs__
- /run: load_data
use_pipe_obj: false
pipe_obj_arg_name: null
return_pipe_obj: false

load_dataframe.yaml

defaults:
- __dataframe_external_funcs__
- /run: load_dataframe
use_pipe_obj: false
pipe_obj_arg_name: null
return_pipe_obj: false

load_dataframes.yaml

defaults:
- __dataframe_external_funcs__
- /run: load_dataframes
use_pipe_obj: false
pipe_obj_arg_name: null
return_pipe_obj: false

load_dataset.yaml

defaults:
- __general_external_funcs__
- /run: load_dataset
use_pipe_obj: false
pipe_obj_arg_name: null
return_pipe_obj: false

load_dataset_from_disk.yaml

defaults:
- __general_external_funcs__
- /run: load_dataset_from_disk
use_pipe_obj: false
pipe_obj_arg_name: null
return_pipe_obj: false

merge_dataframes.yaml

defaults:
- __general_external_funcs__
- /run: merge_dataframes
use_pipe_obj: true
pipe_obj_arg_name: null
return_pipe_obj: false

records_to_dataframe.yaml

defaults:
- __general_external_funcs__
- /run: records_to_dataframe
use_pipe_obj: true
pipe_obj_arg_name: null
return_pipe_obj: false

sample_data.yaml

defaults:
- __dataframe_external_funcs__
- /run: sample_data
use_pipe_obj: true
pipe_obj_arg_name: null
return_pipe_obj: false

sample_dataset.yaml

defaults:
- __general_external_funcs__
- /run: sample_dataset
use_pipe_obj: true
pipe_obj_arg_name: null
return_pipe_obj: false

save_dataframes.yaml

defaults:
- __dataframe_external_funcs__
- /run: save_dataframes
use_pipe_obj: true
pipe_obj_arg_name: null
return_pipe_obj: false

save_dataset_to_disk.yaml

defaults:
- __general_external_funcs__
- /run: save_dataset_to_disk
use_pipe_obj: true
pipe_obj_arg_name: null
return_pipe_obj: false

split_dataframe.yaml

defaults:
- __general_external_funcs__
- /run: split_dataframe
use_pipe_obj: true
pipe_obj_arg_name: null
return_pipe_obj: false