Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixing of association rule lastest version #1121

Merged
merged 12 commits into from
Jan 25, 2025
Original file line number Diff line number Diff line change
Expand Up @@ -2418,13 +2418,16 @@
},
{
"cell_type": "code",

"execution_count": 20,

"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [

"/tmp/ipykernel_34953/2823279667.py:23: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'nan' has dtype incompatible with bool, please explicitly cast to a compatible dtype first.\n",
" df.iloc[idx[i], col[i]] = np.nan\n",
"/tmp/ipykernel_34953/2823279667.py:23: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'nan' has dtype incompatible with bool, please explicitly cast to a compatible dtype first.\n",
Expand All @@ -2438,6 +2441,7 @@
"/tmp/ipykernel_34953/2823279667.py:23: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'nan' has dtype incompatible with bool, please explicitly cast to a compatible dtype first.\n",
" df.iloc[idx[i], col[i]] = np.nan\n",
"/tmp/ipykernel_34953/2823279667.py:23: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'nan' has dtype incompatible with bool, please explicitly cast to a compatible dtype first.\n",

" df.iloc[idx[i], col[i]] = np.nan\n"
]
},
Expand Down Expand Up @@ -2489,6 +2493,7 @@
" <td>True</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",

" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
Expand Down Expand Up @@ -2710,6 +2715,7 @@
]
},
"execution_count": 21,

"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -2718,6 +2724,7 @@
"frequent_itemsets = fpgrowth(df, min_support=0.6, null_values = True, use_colnames=True)\n",
"# frequent_itemsets = fpmax(df, min_support=0.6, null_values = True, use_colnames=True)\n",
"rules = association_rules(frequent_itemsets, len(df), df, null_values = True, metric=\"confidence\", min_threshold=0.8)\n",

"rules"
]
},
Expand Down
11 changes: 7 additions & 4 deletions mlxtend/frequent_patterns/association_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@

def association_rules(
df: pd.DataFrame,
num_itemsets: int,
num_itemsets: Optional[int] = 1,
df_orig: Optional[pd.DataFrame] = None,
null_values=False,
metric="confidence",
Expand All @@ -54,8 +54,8 @@ def association_rules(
df_orig : pandas DataFrame (default: None)
DataFrame with original input data. Only provided when null_values exist

num_itemsets : int
Number of transactions in original input data
num_itemsets : int (default: 1)
Number of transactions in original input data (df_orig)

null_values : bool (default: False)
In case there are null values as NaNs in the original input data
Expand Down Expand Up @@ -119,6 +119,10 @@ def association_rules(
if null_values and df_orig is None:
raise TypeError("If null values exist, df_orig must be provided.")

# if null values exist, num_itemsets must be provided
if null_values and num_itemsets == 1:
raise TypeError("If null values exist, num_itemsets must be provided.")

# check for valid input
fpc.valid_input_check(df_orig, null_values)

Expand Down Expand Up @@ -285,7 +289,6 @@ def certainty_metric_helper(sAC, sA, sC, disAC, disA, disC, dis_int, dis_int_):
# if the input dataframe is complete
if not null_values:
disAC, disA, disC, dis_int, dis_int_ = 0, 0, 0, 0, 0
num_itemsets = 1

else:
an = list(antecedent)
Expand Down
2 changes: 1 addition & 1 deletion mlxtend/frequent_patterns/fpcommon.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def setup_fptree(df, min_support):
)

item_support = np.array(
np.sum(np.logical_or(df.values == 1, df.values is True), axis=0)
np.nansum(df.values, axis=0)
/ (float(num_itemsets) - np.nansum(disabled, axis=0))
)
item_support = item_support.reshape(-1)
Expand Down
Loading