diff options
author | Sergey Poznyakoff <gray@gnu.org.ua> | 2017-09-01 12:06:46 +0200 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org.ua> | 2017-09-04 08:10:58 +0200 |
commit | b65a5ac0bfcfbf0eb656e77fbe05c03e1670678e (patch) | |
tree | e3a46bfc1566fa434048ca76381add66d8b4faf5 | |
parent | d092ea3523615dad200477c83c347ca55ef9ec2d (diff) | |
download | savane-gray-b65a5ac0bfcfbf0eb656e77fbe05c03e1670678e.tar.gz savane-gray-b65a5ac0bfcfbf0eb656e77fbe05c03e1670678e.tar.bz2 |
Fix interaction between spam marking and learning.
* backend/misc/sv_spamchecker: Use 100 as the reporter ID
(com_learn): Fix learning from user's resume
Simplify select condition
(com_usercheck): Bugfix
* frontend/php/include/spam.php (spam_flag): Remove spurious call
to spam_get_user_score. Whether this user has reported this item
was checked right at the beinning of the function.
Use spam_recompute_user_score to update total user's score.
(spam_flag_resume): Enqueue item for spam checking
Use spam_recompute_user_score to update total user's score.
(spam_recompute_user_score,spamcheck_enqueue)
(spam_clear): New functions.
(spam_unflag,spam_add_to_spamcheck_queue): Use spamcheck_enqueue
(spam_get_user_score): Remove 2nd argument.
(spam_set_item_default_score): Update record priority on
duplicate key
* frontend/php/siteadmin/spamlist.php: Use spam_clear to clear
the user's score (as well as that of all his comments & resume).
-rwxr-xr-x | backend/misc/sv_spamchecker | 23 | ||||
-rw-r--r-- | frontend/php/include/spam.php | 179 | ||||
-rw-r--r-- | frontend/php/include/trackers/data.php | 2 | ||||
-rw-r--r-- | frontend/php/siteadmin/spamlist.php | 57 |
4 files changed, 135 insertions, 126 deletions
diff --git a/backend/misc/sv_spamchecker b/backend/misc/sv_spamchecker index 767e5ea..830eb76 100755 --- a/backend/misc/sv_spamchecker +++ b/backend/misc/sv_spamchecker @@ -39,7 +39,7 @@ my $sys_spamc_timeout = GetConf('backend.sv_spamchecker.timeout'); my $sys_spamc_username = GetConf('backend.sv_spamchecker.user'); my $sys_spamcheck_expiry = GetConf('backend.sv_spamchecker.expiry'); my $sys_spamcheck_spamassassin = GetConf('backend.sv_spamchecker.check'); -my $reporter_uid = 101; # FIXME! +my $reporter_uid = 100; # FIXME! # End of configuration variables; my $verbose; @@ -120,7 +120,7 @@ sub update_user_spamscores { }, qq{ SELECT u.user_id AS uid, - avg(IF(t.artifact = 'resume', 2 * t.score, t.score)) AS score + AVG(IF(t.artifact = 'resume', 2 * t.score, t.score)) AS score FROM user u, trackers_spamscore t WHERE u.status='A' AND u.user_id=t.affected_user_id$cond GROUP BY 1}, @@ -382,6 +382,7 @@ sub com_learn { sub { my $ref = shift; $ref->{ip} = '127.0.0.1'; + $ref->{comment_id} = 0; learn_from_item('resume', $ref) if defined $ref->{text}; }, qq{ SELECT user.user_id AS item_id, @@ -393,10 +394,8 @@ FROM user LEFT JOIN trackers_spamcheck_cache cache ON user.user_id = cache.item_id AND cache.artifact = ? -WHERE cache.artifact is NULL - OR (cache.comment_id = 0 - AND cache.isspam <> IF(user.resume_spamscore>4,'Y','N'))}, - 'resume'); +WHERE IF(cache.isspam IS NULL,'N',cache.isspam) <> IF(user.resume_spamscore>4,'Y','N')}, + 'resume'); foreach my $tracker (TrackerNames()) { db_foreach(sub { @@ -410,10 +409,7 @@ FROM $tracker tracker LEFT JOIN trackers_spamcheck_cache cache ON tracker.bug_id = cache.item_id AND cache.artifact = ? -WHERE cache.artifact is NULL - OR (cache.comment_id = 0 - AND cache.isspam <> IF(tracker.spamscore>4,'Y','N')) - }, $tracker); +WHERE IF(cache.isspam IS NULL,'N',cache.isspam) <> IF(tracker.spamscore>4,'Y','N')}, $tracker); db_foreach(sub { my $ref = shift; @@ -430,10 +426,7 @@ LEFT JOIN trackers_spamcheck_cache cache ON hist.bug_id = cache.item_id AND cache.artifact = ? WHERE hist.field_name='details' - AND (cache.artifact is NULL - OR (cache.comment_id = hist.bug_history_id - AND cache.isspam <> IF(hist.spamscore>4,'Y','N'))) - }, $tracker); + AND IF(cache.isspam IS NULL,'N',cache.isspam) <> IF(hist.spamscore>4,'Y','N')}, $tracker); } } @@ -591,7 +584,7 @@ sub com_usercheck { notify => 0); }, @_); - update_user_spamscores(U_IDS, [@_]); + update_user_spamscores(U_NAMES, [@_]); } my %command = ( diff --git a/frontend/php/include/spam.php b/frontend/php/include/spam.php index 7a6d826..62d5f8d 100644 --- a/frontend/php/include/spam.php +++ b/frontend/php/include/spam.php @@ -150,12 +150,6 @@ function spam_flag ($item_id, $comment_id, $score, $group_id, $reporter_user_id= return true; } - # If the reporter already flagged a message of this user, end here - # (we do not want a single user being able to increment by more than one - # another user spamscore) - if (spam_get_user_score($affected_user_id, $reporter_user_id) > 1) - return true; - # If the reporter is not member of the project that owns the item, # not increment user spamscore # FIXME: not sure about this ; as we increment the spamscore only if the @@ -164,12 +158,7 @@ function spam_flag ($item_id, $comment_id, $score, $group_id, $reporter_user_id= #if (!member_check($reporter_user_id, $group_id)) # { return true; } - # Compute the score of the user - $userscore = spam_get_user_score($affected_user_id); - - # Update the user spamscore field - db_execute("UPDATE user SET spamscore=? WHERE user_id=?", - array($userscore, $affected_user_id)); + spam_recompute_user_score ($affected_user_id); # No feedback about this last part, one user spamscore is the kind of info # that belongs to site admins territory. @@ -200,51 +189,55 @@ function spam_flag_resume($user_id, $score, $reporter_user_id=0) 'item_id' => $user_id, 'comment_id' => 0), DB_AUTOQUERY_INSERT); + + if ($score < 5) + spam_add_to_spamcheck_queue($user_id, 0, 'resume', 0, $score); # Compute the score of the user - $userscore = spam_get_user_score($user_id); + spam_recompute_user_score ($user_id); +} - # Update the user spamscore field +function spam_recompute_user_score ($user_id) +{ + $result = db_execute("SELECT AVG(IF(t.artifact = 'resume', 2 * t.score, t.score)) AS score + FROM user u, trackers_spamscore t + WHERE u.status='A' AND u.user_id=t.affected_user_id AND u.user_id=?", + array($user_id)); + if ($result) + $score = db_result($result,0,'score'); + else + $score = 0; db_execute("UPDATE user SET spamscore=? WHERE user_id=?", - array($userscore, $user_id)); -} - + array($score, $user_id)); +} + # Mark that a spam is actually not one # (allow to set the tracker, because this function may be called from # siteadmin area) function spam_unflag ($item_id, $comment_id, $tracker, $group_id) { + if (!ctype_alnum($tracker)) + util_die('Tracker is not valid (not alnum): ' . htmlescape($tracker)); + + $result = db_execute("SELECT affected_user_id FROM trackers_spamscore + WHERE item_id=? AND comment_id=? AND artifact=?", + array($item_id, $comment_id, $tracker)); + # update the spamscore table db_execute("DELETE FROM trackers_spamscore WHERE item_id=? AND comment_id=? AND artifact=?", array($item_id, $comment_id, $tracker)); - if (!ctype_alnum($tracker)) - util_die('Tracker is not valid (not alnum): ' . htmlescape($tracker)); + spamcheck_enqueue($tracker, $item_id, $comment_id, 10, 0); - # Update the item spamscore fields - if ($tracker == 'resume') - { - db_execute("UPDATE user SET resume_spamscore=0 WHERE user_id=?", - array($item_id)); - } - else if ($comment_id) - { - db_execute("UPDATE ".$tracker."_history SET spamscore=0 - WHERE bug_history_id=? AND field_name='details' AND bug_id=?", - array($comment_id, $item_id)); - } - else + if (db_numrows($result)) { - db_execute("UPDATE $tracker SET spamscore=0 - WHERE bug_id=? AND group_id=?", - array($item_id, $group_id)); + spam_recompute_user_score(db_result($result,0,'affected_user_id')); } - } # Return the total score of a user -function spam_get_user_score ($user_id=0, $set_by_user_id=0) +function spam_get_user_score ($user_id=0) { if (!$user_id) $user_id = user_getid(); @@ -284,16 +277,16 @@ function spam_set_item_default_score ($item_id, $comment_id, $tracker, $score, $ # Nothing to do for anonymous post, spam_flag will properly interpret # the fact that the default is not specifically set if ($user_id == 100) - { return; } + return; # If the score is null, there is obviously nothing to do if ($score < 1) - { return; } + return; # If the score means spam, fill the global that will be used later # to skip mail notif if ($score > 4) - { $GLOBALS['int_probablyspam'] = true; } + $GLOBALS['int_probablyspam'] = true; # Otherwise, add a new entry in the database, without mentioning the # affected user: we want to set the default score for the item, not to @@ -304,33 +297,65 @@ function spam_set_item_default_score ($item_id, $comment_id, $tracker, $score, $ $score)); fb(sprintf(_("Spam score of your post set to %s"), $score), 1); +} + +# Add $tracker#$item_id.$comment_id to the spamcheck queue with the given +# priority and assumed spam score. Updated the tracker item record. Return +# >0 on success, 0 if the item update failed. +function spamcheck_enqueue ($tracker, $item_id, $comment_id, $prio, $score) +{ + db_execute("INSERT INTO trackers_spamcheck_queue (artifact,item_id,comment_id,priority,date) VALUES (?, ?, ?, ?, ?) ON DUPLICATE KEY UPDATE priority=GREATEST(priority,?)", + array($tracker, $item_id, $comment_id, $prio, time(), $prio)); + # We change only the item spamscore field, not the spamscore table: + # it means that if any user unflag the item, it will be as if + # there was no score yet. + # (no discussion lock, update will generate notif if sent by users that + # can skip this spam queue check - members, etc) + + if ($tracker == 'resume') + { + $result = db_execute("UPDATE user SET resume_spamscore=? WHERE user_id=?", + array($score, $item_id)); + } + else if ($comment_id) + { + $result = db_execute("UPDATE ".$tracker."_history SET spamscore=? + WHERE bug_history_id=? AND field_name='details' AND bug_id=?", + array($score, $comment_id, $item_id)); + } + else + { + $result = db_execute("UPDATE ".$tracker." SET spamscore=? WHERE bug_id=?", + array($score, $item_id)); + } + return db_affected_rows($result); } -# Put an item or a comment in temporary queue +# Put an item or a comment to the spamcheck queue. This does some +# housekeeping and calls spamcheck_enqueue to do the main job. function spam_add_to_spamcheck_queue ($item_id, $comment_id, $tracker, $group_id, $current_score) { assert('ctype_alnum($tracker)'); # Useless if already considered as spam if ($GLOBALS['int_probablyspam']) - { return false; } + return false; # Check in config if we want to do such checks if (!$GLOBALS['sys_spamcheck_spamassassin']) - { return false; } + return false; # If user is member of the current group, stop anyway if (member_check(0, $group_id)) - { return false; } + return false; # If logged in and we have to check only anonymous users, stop here - if ($GLOBALS['sys_spamcheck_spamassassin'] == "anonymous" && - user_isloggedin()) - { return false; } + if ($GLOBALS['sys_spamcheck_spamassassin'] == "anonymous" + && user_isloggedin()) + return false; # Otherwise, add to the queue and arbitrarily change spamscore - $date = time(); $priority = 2; $newscore = ($current_score + 5); @@ -340,35 +365,9 @@ function spam_add_to_spamcheck_queue ($item_id, $comment_id, $tracker, $group_id # confort, we have to take into account that we need to start with post # that are the most likely to contain spams. if (!user_isloggedin()) - { $priority++; } - - # Fill the queue - db_execute("INSERT INTO trackers_spamcheck_queue (artifact,item_id,comment_id,priority,date) VALUES (?, ?, ?, ?, ?)", - array($tracker, $item_id, $comment_id, $priority, $date)); - - # We change only the item spamscore field, not the spamscore table: - # it means that if any user unflag the item, it will be as if - # there was no score yet. - # (no discussion lock, update will generate notif if sent by users that - # can skip this spam queue check - members, etc) - if ($tracker == 'resume') - { - $result = db_execute("UPDATE user SET resume_spamscore=? WHERE user_id=?", - array($newscore, $item_id)); - } - else if ($comment_id) - { - $result = db_execute("UPDATE ".$tracker."_history SET spamscore=? - WHERE bug_history_id=? AND field_name='details' AND bug_id=?", - array($newscore, $comment_id, $item_id)); - } - else - { - $result = db_execute("UPDATE ".$tracker." SET spamscore=? WHERE bug_id=? AND group_id=?", - array($newscore, $item_id, $group_id)); - } + $priority++; - if (db_affected_rows($result)) + if (spamcheck_enqueue($tracker, $item_id, $comment_id, $priority, $newscore)) { fb(sprintf(_("Spam score of your post set temporarily to %s, until it is checked by spam filters"), $newscore), 1); } @@ -446,3 +445,31 @@ function spam_bancheck () # Finally, block here exit_error(_("Your IP address was banned for several hours due to spam reports incriminating it. In the meantime, if you log in, you can work around this ban. You should investigate about probable cause of spam reports incriminating your IP")); } + +# Clear spam mark for user $uid. If $all is true, clear also spam scores +# of the user's resume and all tracker items submitted by him. +function spam_clear($uid, $all) +{ + db_execute("UPDATE user SET spamscore='0' WHERE user_id=?", array($uid)); + if ($all) + { + db_execute("DELETE FROM trackers_spamscore WHERE affected_user_id=?", + array($uid)); + db_execute("UPDATE user SET resume_spamscore=0 WHERE user_id=?", + array($uid)); + foreach (array('cookbook', 'support', 'bugs', 'task', 'patch') + as $tracker) + { + db_execute("UPDATE $tracker SET spamscore=0 WHERE submitted_by=?", + array($uid)); + db_execute("UPDATE ${tracker}_history SET spamscore=0 ". + "WHERE field_name=? AND mod_by=?", + array('detail', $uid)); + } + } + else + { + db_execute("UPDATE IGNORE trackers_spamscore SET affected_user_id='100' WHERE affected_user_id=?", + array($uid)); + } +}
\ No newline at end of file diff --git a/frontend/php/include/trackers/data.php b/frontend/php/include/trackers/data.php index 2806dca..2e1f25c 100644 --- a/frontend/php/include/trackers/data.php +++ b/frontend/php/include/trackers/data.php @@ -2441,7 +2441,7 @@ function trackers_data_create_item($group_id,$vfl,&$extra_addresses) # 5, if necessary) # Useless if already considered to be spam. if ($spamscore < 5) - { spam_add_to_spamcheck_queue($item_id, 0, ARTIFACT, $group_id, $spamscore); } + spam_add_to_spamcheck_queue($item_id, 0, ARTIFACT, $group_id, $spamscore); # If we are on the cookbook, Store related links if (ARTIFACT == 'cookbook') diff --git a/frontend/php/siteadmin/spamlist.php b/frontend/php/siteadmin/spamlist.php index ab9f584..52e51b9 100644 --- a/frontend/php/siteadmin/spamlist.php +++ b/frontend/php/siteadmin/spamlist.php @@ -29,6 +29,24 @@ extract(sane_import('get', array('ban_user_id', 'wash_user_id', 'wash_ip', 'users_max_rows', 'users_offset', 'ip_max_rows', 'ip_offset'))); +function mkurl($q) +{ + $url = $_SERVER['PHP_SELF']; + $delim = '?'; + foreach (array('users_max_rows', 'users_offset', + 'ip_max_rows', 'ip_offset') as $kw) + { + global ${$kw}; + if (${$kw}) + { + $url .= $delim . $kw . '=' . ${$kw}; + $delim = '&'; + } + } + $url .= $delim . $q; + return $url; +} + if ($ban_user_id) { if (!user_exists($ban_user_id)) @@ -40,31 +58,20 @@ if ($ban_user_id) if ($wash_user_id) { if (!user_exists($wash_user_id)) - { fb(_("User not found"), 1); } + { + fb(_("User not found"), 1); + } else { - # Update the user spamscore field - db_execute("UPDATE user SET spamscore='0' WHERE user_id=?", - array($wash_user_id)); - - # All comments flagged as spam will stay as such. - # We just changed the affected user id that it wont affect this guy - # any more. - # We assume that messages flagged as spam really were. - # (we may change that in the future, depending on user experience) - db_execute("UPDATE IGNORE trackers_spamscore SET affected_user_id='100' WHERE affected_user_id=?", - array($wash_user_id)); - + spam_clear($wash_user_id, true); } -} + } if ($wash_ip) { db_execute("DELETE FROM trackers_spamban WHERE ip=?", array($wash_ip)); } - - ###### Start HTML site_admin_header(array('title'=>_("Monitor Spams"),'context'=>'admhome')); @@ -91,24 +98,6 @@ else $result = db_execute("SELECT user_name,realname,user_id,spamscore FROM user WHERE status='A' AND spamscore > 0 ORDER BY spamscore DESC LIMIT ?,?", array($users_offset,($users_max_rows+1))); -function mkurl($q) -{ - $url = $_SERVER['PHP_SELF']; - $delim = '?'; - foreach (array('users_max_rows', 'users_offset', - 'ip_max_rows', 'ip_offset') as $kw) - { - global ${$kw}; - if (${$kw}) - { - $url .= $delim . $kw . '=' . ${$kw}; - $delim = '&'; - } - } - $url .= $delim . $q; - return $url; -} - if (!db_numrows($result)) { print '<p>'._("No suspects found").'</p>'; |