# coding=utf-8 # # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. """ @file mean_std_dev_calculator.py_in @brief @namespace utilities """ import plpy from convex.utils_regularization import utils_ind_var_scales from convex.utils_regularization import utils_ind_var_scales_grouping from utilities import _array_to_string m4_changequote(`') #TODO: use this for all the modules that calculate the std dev and mean for x # mlp, pca, elastic_net class MeanStdDevCalculator: def __init__(self, schema_madlib, source_table, indep_var_array_str, dimension): self.schema_madlib= schema_madlib self.source_table= source_table self.indep_var_array_str = indep_var_array_str self.dimension = dimension def get_mean_and_std_dev_for_ind_var(self): x_scaled_vals = utils_ind_var_scales(self.source_table, self.indep_var_array_str, self.dimension, self.schema_madlib, x_mean_table = None, # do not dump the output to a temp table set_zero_std_to_one=True) x_mean_str = _array_to_string(x_scaled_vals["mean"]) x_std_str = _array_to_string(x_scaled_vals["std"]) if not x_mean_str or not x_std_str: plpy.error("mean/stddev for the independent variable" "cannot be null") return x_mean_str, x_std_str def create_mean_std_table_for_ind_var_grouping(self, x_mean_table, grouping_cols): utils_ind_var_scales_grouping(self.source_table, self.indep_var_array_str, self.dimension, self.schema_madlib, grouping_cols, x_mean_table, set_zero_std_to_one = True, create_temp_table = False)