From 563a6a4b454c9e98ed1d48960550a60453f986c6 Mon Sep 17 00:00:00 2001
From: chenmingyu <chenmingyu@baidu.com>
Date: Tue, 22 Aug 2017 09:47:25 +0800
Subject: [PATCH] add detail explanation of FE configurations.

---
 conf/fe.conf                             |   8 +-
 fe/src/com/baidu/palo/common/Config.java | 316 +++++++++++++++++++++--
 2 files changed, 299 insertions(+), 25 deletions(-)

diff --git a/conf/fe.conf b/conf/fe.conf
index 7c2b403ff3a8fd..819abb12e378cc 100644
--- a/conf/fe.conf
+++ b/conf/fe.conf
@@ -1,6 +1,8 @@
-##
-## the uppercase properties are read and exported by bin/start_fe.sh.
-##
+#####################################################################
+## The uppercase properties are read and exported by bin/start_fe.sh.
+## To see all Frontend configurations,
+## see fe/src/com/baidu/palo/common/Config.java
+#####################################################################
 
 # set JAVA_HOME or set JAVA_HOME in env variables
 # JAVA_HOME = 
diff --git a/fe/src/com/baidu/palo/common/Config.java b/fe/src/com/baidu/palo/common/Config.java
index 7873a84d42596d..9897e8f38417ef 100644
--- a/fe/src/com/baidu/palo/common/Config.java
+++ b/fe/src/com/baidu/palo/common/Config.java
@@ -17,121 +17,393 @@
 
 public class Config extends ConfigBase {
 
+    /*
+     * This specifies FE log dir. FE will produces 2 log files:
+     * fe.log:      all logs of FE process.
+     * fe.warn.log  all WARNING and ERROR log of FE process.
+     */
     @ConfField public static String sys_log_dir = System.getenv("PALO_HOME") + "/log";
     @ConfField public static String sys_log_level = "INFO"; // INFO, WARNING, ERROR, FATAL
+    /*
+     * The roll mode of FE log files.
+     * TIME-DAY:    roll every day.
+     * TIME-HOUR:   roll every hour.
+     * SIZE-MB-nnn: roll by size.
+     */
     @ConfField public static String sys_log_roll_mode = "SIZE-MB-1024"; // TIME-DAY， TIME-HOUR， SIZE-MB-nnn
-    @ConfField public static int sys_log_roll_num = 10; // the config doesn't work if rollmode is TIME-*
+    /*
+     * Maximal FE log files to be kept.
+     * Doesn't work if roll mode is TIME-*
+     */
+    @ConfField public static int sys_log_roll_num = 10;
 
-    // verbose modules. VERBOSE level is implemented by log4j DEBUG level.
+    /*
+     * Verbose modules. VERBOSE level is implemented by log4j DEBUG level.
+     * eg:
+     *      sys_log_verbose_modules = com.baidu.palo.catalog
+     *  This will only print verbose log of files in package com.baidu.palo.catalog and all its sub packages.
+     */
     @ConfField public static String[] sys_log_verbose_modules = {};
 
+    /*
+     * This specifies FE audit log dir.
+     * Audit log fe.audit.log contains all SQL queries with related infos such as user, host, cost, status, etc.
+     */
     @ConfField public static String audit_log_dir = System.getenv("PALO_HOME") + "/log";
+    /*
+     * Slow query contains all queries which cost exceed *qe_slow_log_ms*
+     */
     @ConfField public static String[] audit_log_modules = {"slow_query", "query"};
     @ConfField public static String audit_log_roll_mode = "TIME-DAY"; // TIME-DAY， TIME-HOUR， SIZE-MB-nnn
-    @ConfField public static int audit_log_roll_num = 10; // the config doesn't work if rollmode is TIME-*
+    @ConfField
+    public static int audit_log_roll_num = 10; // Doesn't work if roll mode is TIME-*
 
+    /*
+     * Labels of finished or cancelled load jobs will be removed after *label_keep_max_second*
+     * The removed labels can be reused.
+     * Set a short time will lower the FE memory usage.
+     * (Because all load jobs' info is kept in memoery before being removed)
+     */
     @ConfField public static int label_keep_max_second = 7 * 24 * 3600; // 7 days
+    /*
+     * Load label cleaner will run every *label_clean_interval_second* to clean the outdated jobs.
+     */
     @ConfField public static int label_clean_interval_second = 4 * 3600; // 4 hours
+    /*
+     * If a load job stay in QUORUM_FINISHED state longer than *quorum_load_job_max_second*,
+     * a clone job will be triggered to help finishing this load job.
+     */
     @ConfField public static int quorum_load_job_max_second = 24 * 3600; // 1 days
 
     // Configurations for meta data durability
+    /*
+     * Palo meta data will be saved here.
+     * The storage of this dir is highly recommended as to be:
+     * 1. High write performance (SSD)
+     * 2. Safe (RAID)
+     */
     @ConfField public static String meta_dir = System.getenv("PALO_HOME") + "/palo-meta";
-    @ConfField public static String edit_log_type = "BDB";    // BDB, LOCAL
-    @ConfField public static int edit_log_port = 9010;        // Only used when edit_log_type = "BDB
+    /*
+     * Edit log type.
+     * BDB: write log to bdbje
+     * LOCAL: deprecated.
+     */
+    @ConfField
+    public static String edit_log_type = "BDB";
+    /*
+     * bdbje port
+     */
+    @ConfField
+    public static int edit_log_port = 9010;
+    /*
+     * Master FE will save image every *edit_log_roll_num* meta journals.
+     */
     @ConfField public static int edit_log_roll_num = 100000;
+    /*
+     * Non-master FE will stop offering service
+     * if meta data delay gap exceeds *meta_delay_toleration_second*
+     */
     @ConfField public static int meta_delay_toleration_second = 300;    // 5 min
+    /*
+     * Master FE sync policy of bdbje.
+     * more info, see: http://docs.oracle.com/cd/E17277_02/html/java/com/sleepycat/je/Durability.SyncPolicy.html
+     */
     @ConfField public static String master_sync_policy = "WRITE_NO_SYNC"; // SYNC, NO_SYNC, WRITE_NO_SYNC
+    /*
+     * Follower FE sync policy of bdbje.
+     */
     @ConfField public static String replica_sync_policy = "WRITE_NO_SYNC"; // SYNC, NO_SYNC, WRITE_NO_SYNC
+    /*
+     * Replica ack policy of bdbje.
+     * more info, see: http://docs.oracle.com/cd/E17277_02/html/java/com/sleepycat/je/Durability.ReplicaAckPolicy.html
+     */
     @ConfField public static String replica_ack_policy = "SIMPLE_MAJORITY"; // ALL, NONE, SIMPLE_MAJORITY
 
-    // kudu master address
+    /*
+     * Kudu is currently not supported.
+     */
     @ConfField public static String kudu_master_addresses = "127.0.0.1:8030";
     @ConfField public static int kudu_client_timeout_ms = 500;
 
     /*
-     * true means reset replication group. If all the electable nodes can not start, we can
-     * copy the meta data to another node and set this item to true to recover the metadata.
-     * In this scenario, we can get the newest image file contains all the meta data, then
-     * use the image file to restart the failed cluster.
+     * If true, FE will reset bdbje replication group(that is, to remove all electable nodes info)
+     * and is supposed to start as Master. 
+     * If all the electable nodes can not start, we can copy the meta data
+     * to another node and set this config to true to try to restart the FE.
      */
     @ConfField public static String metadata_failure_recovery = "false";
 
     /*
-     * false means non-master node need to check its own journal is out of date or not in every replay loop.
-     * true means to ignore this meta check.
-     * this should only be set by rest api and only be set when master is truly out of services.
+     * If true, non-master FE will ignore the meta data delay gap between Master FE and its self,
+     * even if the metadata delay gap exceeds *meta_delay_toleration_second*.
+     * Non-master FE will still offer read service.
+     * 
+     * This is helpful when you try to stop the Master FE for a relatively long time for some reason,
+     * but still wish the non-master FE can offer read service.
      */
     @ConfField public static boolean ignore_meta_check = false;
 
+    /*
+     * Fe http port
+     * Currently, all FEs' http port must be same.
+     */
     @ConfField public static int http_port = 8030;
+    /*
+     * FE thrift server port
+     */
     @ConfField public static int rpc_port = 9020;
+    /*
+     * FE mysql server port
+     */
     @ConfField public static int query_port = 9030;
 
-    // Config cluster name and id
+    /*
+     * Cluster name will be shown as the title of web page
+     */
     @ConfField public static String cluster_name = "Baidu Palo";
+    /*
+     * node(FE or BE) will be considered belonging to the same Palo cluster if they have same cluster id.
+     * Cluster id is usually a random integer generated when master FE start at first time.
+     * You can also sepecify one.
+     */
     @ConfField public static int cluster_id = -1;
 
     // Configurations for load, clone, create table, alter table etc. We will rarely change them
+    /*
+     * Maximal waiting time for creating a single replica.
+     * eg. 
+     *      if you create a table with #m tablets and #n replicas for each tablet,
+     *      the create table request will run at most (m * n * tablet_create_timeout_second) before timeout.
+     */
     @ConfField public static int tablet_create_timeout_second = 1;
-    @ConfField public static int table_create_default_keys_num = 5;
-    @ConfField public static int table_create_default_distribute_num = 5;
+
+    /*
+     * Load checker's running interval.
+     * A load job will transfer its state from PENDING to ETL to LOADING to FINISHED.
+     * So a load job will cost at least 3 check intervals to finish.
+     */
     @ConfField public static int load_checker_interval_second = 5;
+
+    /*
+     * Concurrency of HIGH priority pending load jobs.
+     * Load job priority is defined as HIGH or NORMAL.
+     * All mini batch load jobs are HIGH priority, other types of load jobs are NORMAL priority.
+     * Priority is set to avoid that a slow load job occupies a thread for a long time.
+     * This is just a internal optimized scheduling policy.
+     * Currently, you can not specified the job priority manually,
+     * and do not change this if you know what you are doing.
+     */
     @ConfField public static int load_pending_thread_num_high_priority = 3;
+    /*
+     * Concurrency of NORMAL priority pending load jobs.
+     * Do not change this if you know what you are doing.
+     */
     @ConfField public static int load_pending_thread_num_normal_priority = 10;
+    /*
+     * Concurrency of HIGH priority etl load jobs.
+     * Do not change this if you know what you are doing.
+     */
     @ConfField public static int load_etl_thread_num_high_priority = 3;
+    /*
+     * Concurrency of NORMAL priority etl load jobs.
+     * Do not change this if you know what you are doing.
+     */
     @ConfField public static int load_etl_thread_num_normal_priority = 10;
+    /*
+     * Not available.
+     */
     @ConfField public static int load_input_size_limit_gb = 0; // GB, 0 is no limit
+    /*
+     * Not available.
+     */
     @ConfField public static int load_running_job_num_limit = 0; // 0 is no limit
+    /*
+     * Default pull load timeout
+     */
+    @ConfField
+    public static int pull_load_task_default_timeout_second = 3600; // 1hour
+
+    /*
+     * Same meaning as *tablet_create_timeout_second*, but used when delete a tablet.
+     */
     @ConfField public static int tablet_delete_timeout_second = 2;
+    /*
+     * Clone checker's running interval.
+     */
     @ConfField public static int clone_checker_interval_second = 300;
+    /*
+     * Default timeout of a single clone job. Set long enough to fit your replica size.
+     * The larger the replica data size is, the more time is will cost to finish clone.
+     */
     @ConfField public static int clone_job_timeout_second = 7200; // 2h
+    /*
+     * Concurrency of LOW priority clone jobs.
+     * Concurrency of High priority clone jobs is currently unlimit.
+     */
     @ConfField public static int clone_max_job_num = 100;
+    /*
+     * LOW priority clone job's delay trigger time.
+     * A clone job contains a tablet which need to be cloned(recovery or migration).
+     * If the priority is LOW, it will be delayed *clone_low_priority_delay_second* 
+     * after the job creation and then be executed.
+     * This is to avoid a large number of clone jobs running at same time only because a host is down for a short time.
+     * 
+     * NOTICE that this config(and *clone_normal_priority_delay_second* as well)
+     * will not work if it's smaller then *clone_checker_interval_second*
+     */
     @ConfField public static int clone_low_priority_delay_second = 600;
+    /*
+     * NORMAL priority clone job's delay trigger time.
+     */
     @ConfField public static int clone_normal_priority_delay_second = 300;
+    /*
+     * HIGH priority clone job's delay trigger time.
+     */
     @ConfField public static int clone_high_priority_delay_second = 0;
+    /*
+     * Balance threshold of data size in BE.
+     * The balance algorithm is:
+     * 1. Calculate the average used capacity(AUC) of the entire cluster. (total data size / total backends num)
+     * 2. The high water level is (AUC * (1 + clone_capacity_balance_threshold))
+     * 3. The low water level is (AUC * (1 - clone_capacity_balance_threshold))
+     * The Clone checker will try to move replica from high water level BE to low water level BE.
+     */
     @ConfField public static double clone_capacity_balance_threshold = 0.2;
+    /*
+     * Balance threshold of num of replicas in Backends.
+     */
     @ConfField public static double clone_distribution_balance_threshold = 0.2;
+    /*
+     * Maximal timeout of ALTER TABEL request. Set long enough to fit your table data size.
+     */
     @ConfField public static int alter_table_timeout_second = 86400; // 1day
+    /*
+     * After ALTER TABEL finished, deletion of the old schema replica is delayed,
+     * in case there are still some queries using the old schema replica.
+     */
     @ConfField public static int alter_delete_base_delay_second = 600; // 10min
+    /*
+     * If a backend is down for *max_backend_down_time_second*, a BACKEND_DOWN event will be triggered.
+     * Do not set this if you know what you are doing.
+     */
     @ConfField public static int max_backend_down_time_second = 3600; // 1h
+    /*
+     * When create a table(or partition), you can specfied its storage media(HDD or SSD).
+     * If set to SSD, this specifies the default duration that tablets will stay on SSD.
+     * After that, tablets will be moved to HDD automatically.
+     * You can set storage cooldown time in LOAD stmt.
+     */
     @ConfField public static long storage_cooldown_second = 30 * 24 * 3600L; // 30 days
+    /*
+     * After dropping database(table/partition), you can recover it by using RECOVER stmt.
+     * And this specifies the maximal data retention time. After time, the data will be deleted permanently.
+     */
     @ConfField public static long catalog_trash_expire_second = 86400L; // 1day
-    @ConfField public static int pull_load_task_default_timeout_second = 3600; // 1hour
+    /*
+     * Maximal bytes that a single broker scanner will read.
+     * Do not set this if you know what you are doing.
+     */
     @ConfField public static long min_bytes_per_broker_scanner = 67108864L; // 64MB
+    /*
+     * Maximal concurrency of broker scanners.
+     * Do not set this if you know what you are doing.
+     */
     @ConfField public static int max_broker_concurrency = 10;
+
+    /*
+     * Export checker's running interval.
+     */
     @ConfField public static int export_checker_interval_second = 5;
+    /*
+     * Concurrency of pending export jobs.
+     */
     @ConfField public static int export_pending_thread_num = 5;
+    /*
+     * Num of thread to handle export jobs.
+     */
     @ConfField public static int export_exporting_thread_num = 10;
+    /*
+     * Limitation of the concurrency of running export jobs.
+     * Default is no limit.
+     */
     @ConfField public static int export_running_job_num_limit = 0; // 0 is no limit
-    @ConfField public static int export_task_default_timeout_second = 24 * 3600;
+    /*
+     * Default timeout of export jobs.
+     */
+    @ConfField public static int export_task_default_timeout_second = 24 * 3600;    // 24h
+    /*
+     * Concurrency of exporting tablets.
+     */
     @ConfField public static int export_parallel_tablet_num = 5;
+    /*
+     * Labels of finished or cancelled export jobs will be removed after *label_keep_max_second*.
+     * The removed labels can be reused.
+     */
     @ConfField public static int export_keep_max_second = 7 * 24 * 3600; // 7 days
 
     // Configurations for consistency check
+    /*
+     * Consistency checker will run from *consistency_check_start_time* to *consistency_check_end_time*.
+     * Default is from 23:00 to 04:00
+     */
     @ConfField public static String consistency_check_start_time = "23";
     @ConfField public static String consistency_check_end_time = "4";
+    /*
+     * Default timeout of a single consistency check task. Set long enough to fit your tablet size.
+     */
     @ConfField public static long check_consistency_default_timeout_second = 600; // 10 min
 
     // Configurations for query engine
+    /*
+     * Maximal number of connections per FE.
+     */
     @ConfField public static int qe_max_connection = 1024;
+    /*
+     * Maximal number of connections per user, per FE.
+     */
     @ConfField public static int max_conn_per_user = 100;
+    /*
+     * Default query timeout.
+     */
     @ConfField public static int qe_query_timeout_second = 300;
+    /*
+     * If the response time of a query exceed this threshold, it will be recored in audit log as slow_query.
+     */
     @ConfField public static long qe_slow_log_ms = 5000;
-    @ConfField public static int blacklist_backends_max_times = 6;
+    /*
+     * The interval of user resource publishing.
+     * User resource contains cgroup configurations of a user.
+     */
     @ConfField public static int meta_resource_publish_interval_ms = 60000; // 1m
+    /*
+     * The default user resource publishing timeout.
+     */
     @ConfField public static int meta_publish_timeout_ms = 1000;
     @ConfField public static boolean proxy_auth_enable = false;
     @ConfField public static String proxy_auth_magic_prefix = "x@8";
-    // Limits on the number of expr children and the depth of an expr tree.
-    // exceed this limit may cause long analysis time while holding db read lock.
+    /*
+     * Limit on the number of expr children of an expr tree.
+     * Exceed this limit may cause long analysis time while holding database read lock.
+     * Do not set this if you know what you are doing.
+     */
     @ConfField public static int expr_children_limit = 10000;
-    // The expr depth limit is mostly due to our recursive implementation of toSql().
+    /*
+     * Limit on the depth of an expr tree.
+     * Exceed this limit may cause long analysis time while holding db read lock.
+     * Do not set this if you know what you are doing.
+     */
     @ConfField public static int expr_depth_limit = 3000;
 
     // Configurations for backup and restore
+    /*
+     * Plugins' path for BACKUP and RESTORE operations. Currently deprecated.
+     */
     @ConfField public static String backup_plugin_path = "/tools/trans_file_tool/trans_files.sh";
 
     // Configurations for hadoop dpp
+    /*
+     * The following configurations are not available.
+     */
     @ConfField public static String dpp_hadoop_client_path = "/lib/hadoop-client/hadoop/bin/hadoop";
     @ConfField public static long dpp_bytes_per_reduce = 100 * 1024 * 1024L; // 100M
     @ConfField public static String dpp_default_cluster = "palo-dpp";