aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Poznyakoff <gray@gnu.org.ua>2017-09-01 12:06:46 +0200
committerSergey Poznyakoff <gray@gnu.org.ua>2017-09-04 08:10:58 +0200
commitb65a5ac0bfcfbf0eb656e77fbe05c03e1670678e (patch)
treee3a46bfc1566fa434048ca76381add66d8b4faf5
parentd092ea3523615dad200477c83c347ca55ef9ec2d (diff)
downloadsavane-gray-b65a5ac0bfcfbf0eb656e77fbe05c03e1670678e.tar.gz
savane-gray-b65a5ac0bfcfbf0eb656e77fbe05c03e1670678e.tar.bz2
Fix interaction between spam marking and learning.
* backend/misc/sv_spamchecker: Use 100 as the reporter ID (com_learn): Fix learning from user's resume Simplify select condition (com_usercheck): Bugfix * frontend/php/include/spam.php (spam_flag): Remove spurious call to spam_get_user_score. Whether this user has reported this item was checked right at the beinning of the function. Use spam_recompute_user_score to update total user's score. (spam_flag_resume): Enqueue item for spam checking Use spam_recompute_user_score to update total user's score. (spam_recompute_user_score,spamcheck_enqueue) (spam_clear): New functions. (spam_unflag,spam_add_to_spamcheck_queue): Use spamcheck_enqueue (spam_get_user_score): Remove 2nd argument. (spam_set_item_default_score): Update record priority on duplicate key * frontend/php/siteadmin/spamlist.php: Use spam_clear to clear the user's score (as well as that of all his comments & resume).
-rwxr-xr-xbackend/misc/sv_spamchecker23
-rw-r--r--frontend/php/include/spam.php179
-rw-r--r--frontend/php/include/trackers/data.php2
-rw-r--r--frontend/php/siteadmin/spamlist.php57
4 files changed, 135 insertions, 126 deletions
diff --git a/backend/misc/sv_spamchecker b/backend/misc/sv_spamchecker
index 767e5ea..830eb76 100755
--- a/backend/misc/sv_spamchecker
+++ b/backend/misc/sv_spamchecker
@@ -39,7 +39,7 @@ my $sys_spamc_timeout = GetConf('backend.sv_spamchecker.timeout');
my $sys_spamc_username = GetConf('backend.sv_spamchecker.user');
my $sys_spamcheck_expiry = GetConf('backend.sv_spamchecker.expiry');
my $sys_spamcheck_spamassassin = GetConf('backend.sv_spamchecker.check');
-my $reporter_uid = 101; # FIXME!
+my $reporter_uid = 100; # FIXME!
# End of configuration variables;
my $verbose;
@@ -120,7 +120,7 @@ sub update_user_spamscores {
},
qq{
SELECT u.user_id AS uid,
- avg(IF(t.artifact = 'resume', 2 * t.score, t.score)) AS score
+ AVG(IF(t.artifact = 'resume', 2 * t.score, t.score)) AS score
FROM user u, trackers_spamscore t
WHERE u.status='A' AND u.user_id=t.affected_user_id$cond
GROUP BY 1},
@@ -382,6 +382,7 @@ sub com_learn {
sub {
my $ref = shift;
$ref->{ip} = '127.0.0.1';
+ $ref->{comment_id} = 0;
learn_from_item('resume', $ref) if defined $ref->{text};
}, qq{
SELECT user.user_id AS item_id,
@@ -393,10 +394,8 @@ FROM user
LEFT JOIN trackers_spamcheck_cache cache
ON user.user_id = cache.item_id
AND cache.artifact = ?
-WHERE cache.artifact is NULL
- OR (cache.comment_id = 0
- AND cache.isspam <> IF(user.resume_spamscore>4,'Y','N'))},
- 'resume');
+WHERE IF(cache.isspam IS NULL,'N',cache.isspam) <> IF(user.resume_spamscore>4,'Y','N')},
+ 'resume');
foreach my $tracker (TrackerNames()) {
db_foreach(sub {
@@ -410,10 +409,7 @@ FROM $tracker tracker
LEFT JOIN trackers_spamcheck_cache cache
ON tracker.bug_id = cache.item_id
AND cache.artifact = ?
-WHERE cache.artifact is NULL
- OR (cache.comment_id = 0
- AND cache.isspam <> IF(tracker.spamscore>4,'Y','N'))
- }, $tracker);
+WHERE IF(cache.isspam IS NULL,'N',cache.isspam) <> IF(tracker.spamscore>4,'Y','N')}, $tracker);
db_foreach(sub {
my $ref = shift;
@@ -430,10 +426,7 @@ LEFT JOIN trackers_spamcheck_cache cache
ON hist.bug_id = cache.item_id
AND cache.artifact = ?
WHERE hist.field_name='details'
- AND (cache.artifact is NULL
- OR (cache.comment_id = hist.bug_history_id
- AND cache.isspam <> IF(hist.spamscore>4,'Y','N')))
- }, $tracker);
+ AND IF(cache.isspam IS NULL,'N',cache.isspam) <> IF(hist.spamscore>4,'Y','N')}, $tracker);
}
}
@@ -591,7 +584,7 @@ sub com_usercheck {
notify => 0);
},
@_);
- update_user_spamscores(U_IDS, [@_]);
+ update_user_spamscores(U_NAMES, [@_]);
}
my %command = (
diff --git a/frontend/php/include/spam.php b/frontend/php/include/spam.php
index 7a6d826..62d5f8d 100644
--- a/frontend/php/include/spam.php
+++ b/frontend/php/include/spam.php
@@ -150,12 +150,6 @@ function spam_flag ($item_id, $comment_id, $score, $group_id, $reporter_user_id=
return true;
}
- # If the reporter already flagged a message of this user, end here
- # (we do not want a single user being able to increment by more than one
- # another user spamscore)
- if (spam_get_user_score($affected_user_id, $reporter_user_id) > 1)
- return true;
-
# If the reporter is not member of the project that owns the item,
# not increment user spamscore
# FIXME: not sure about this ; as we increment the spamscore only if the
@@ -164,12 +158,7 @@ function spam_flag ($item_id, $comment_id, $score, $group_id, $reporter_user_id=
#if (!member_check($reporter_user_id, $group_id))
# { return true; }
- # Compute the score of the user
- $userscore = spam_get_user_score($affected_user_id);
-
- # Update the user spamscore field
- db_execute("UPDATE user SET spamscore=? WHERE user_id=?",
- array($userscore, $affected_user_id));
+ spam_recompute_user_score ($affected_user_id);
# No feedback about this last part, one user spamscore is the kind of info
# that belongs to site admins territory.
@@ -200,51 +189,55 @@ function spam_flag_resume($user_id, $score, $reporter_user_id=0)
'item_id' => $user_id,
'comment_id' => 0),
DB_AUTOQUERY_INSERT);
+
+ if ($score < 5)
+ spam_add_to_spamcheck_queue($user_id, 0, 'resume', 0, $score);
# Compute the score of the user
- $userscore = spam_get_user_score($user_id);
+ spam_recompute_user_score ($user_id);
+}
- # Update the user spamscore field
+function spam_recompute_user_score ($user_id)
+{
+ $result = db_execute("SELECT AVG(IF(t.artifact = 'resume', 2 * t.score, t.score)) AS score
+ FROM user u, trackers_spamscore t
+ WHERE u.status='A' AND u.user_id=t.affected_user_id AND u.user_id=?",
+ array($user_id));
+ if ($result)
+ $score = db_result($result,0,'score');
+ else
+ $score = 0;
db_execute("UPDATE user SET spamscore=? WHERE user_id=?",
- array($userscore, $user_id));
-}
-
+ array($score, $user_id));
+}
+
# Mark that a spam is actually not one
# (allow to set the tracker, because this function may be called from
# siteadmin area)
function spam_unflag ($item_id, $comment_id, $tracker, $group_id)
{
+ if (!ctype_alnum($tracker))
+ util_die('Tracker is not valid (not alnum): ' . htmlescape($tracker));
+
+ $result = db_execute("SELECT affected_user_id FROM trackers_spamscore
+ WHERE item_id=? AND comment_id=? AND artifact=?",
+ array($item_id, $comment_id, $tracker));
+
# update the spamscore table
db_execute("DELETE FROM trackers_spamscore
WHERE item_id=? AND comment_id=? AND artifact=?",
array($item_id, $comment_id, $tracker));
- if (!ctype_alnum($tracker))
- util_die('Tracker is not valid (not alnum): ' . htmlescape($tracker));
+ spamcheck_enqueue($tracker, $item_id, $comment_id, 10, 0);
- # Update the item spamscore fields
- if ($tracker == 'resume')
- {
- db_execute("UPDATE user SET resume_spamscore=0 WHERE user_id=?",
- array($item_id));
- }
- else if ($comment_id)
- {
- db_execute("UPDATE ".$tracker."_history SET spamscore=0
- WHERE bug_history_id=? AND field_name='details' AND bug_id=?",
- array($comment_id, $item_id));
- }
- else
+ if (db_numrows($result))
{
- db_execute("UPDATE $tracker SET spamscore=0
- WHERE bug_id=? AND group_id=?",
- array($item_id, $group_id));
+ spam_recompute_user_score(db_result($result,0,'affected_user_id'));
}
-
}
# Return the total score of a user
-function spam_get_user_score ($user_id=0, $set_by_user_id=0)
+function spam_get_user_score ($user_id=0)
{
if (!$user_id)
$user_id = user_getid();
@@ -284,16 +277,16 @@ function spam_set_item_default_score ($item_id, $comment_id, $tracker, $score, $
# Nothing to do for anonymous post, spam_flag will properly interpret
# the fact that the default is not specifically set
if ($user_id == 100)
- { return; }
+ return;
# If the score is null, there is obviously nothing to do
if ($score < 1)
- { return; }
+ return;
# If the score means spam, fill the global that will be used later
# to skip mail notif
if ($score > 4)
- { $GLOBALS['int_probablyspam'] = true; }
+ $GLOBALS['int_probablyspam'] = true;
# Otherwise, add a new entry in the database, without mentioning the
# affected user: we want to set the default score for the item, not to
@@ -304,33 +297,65 @@ function spam_set_item_default_score ($item_id, $comment_id, $tracker, $score, $
$score));
fb(sprintf(_("Spam score of your post set to %s"), $score), 1);
+}
+
+# Add $tracker#$item_id.$comment_id to the spamcheck queue with the given
+# priority and assumed spam score. Updated the tracker item record. Return
+# >0 on success, 0 if the item update failed.
+function spamcheck_enqueue ($tracker, $item_id, $comment_id, $prio, $score)
+{
+ db_execute("INSERT INTO trackers_spamcheck_queue (artifact,item_id,comment_id,priority,date) VALUES (?, ?, ?, ?, ?) ON DUPLICATE KEY UPDATE priority=GREATEST(priority,?)",
+ array($tracker, $item_id, $comment_id, $prio, time(), $prio));
+ # We change only the item spamscore field, not the spamscore table:
+ # it means that if any user unflag the item, it will be as if
+ # there was no score yet.
+ # (no discussion lock, update will generate notif if sent by users that
+ # can skip this spam queue check - members, etc)
+
+ if ($tracker == 'resume')
+ {
+ $result = db_execute("UPDATE user SET resume_spamscore=? WHERE user_id=?",
+ array($score, $item_id));
+ }
+ else if ($comment_id)
+ {
+ $result = db_execute("UPDATE ".$tracker."_history SET spamscore=?
+ WHERE bug_history_id=? AND field_name='details' AND bug_id=?",
+ array($score, $comment_id, $item_id));
+ }
+ else
+ {
+ $result = db_execute("UPDATE ".$tracker." SET spamscore=? WHERE bug_id=?",
+ array($score, $item_id));
+ }
+ return db_affected_rows($result);
}
-# Put an item or a comment in temporary queue
+# Put an item or a comment to the spamcheck queue. This does some
+# housekeeping and calls spamcheck_enqueue to do the main job.
function spam_add_to_spamcheck_queue ($item_id, $comment_id, $tracker, $group_id, $current_score)
{
assert('ctype_alnum($tracker)');
# Useless if already considered as spam
if ($GLOBALS['int_probablyspam'])
- { return false; }
+ return false;
# Check in config if we want to do such checks
if (!$GLOBALS['sys_spamcheck_spamassassin'])
- { return false; }
+ return false;
# If user is member of the current group, stop anyway
if (member_check(0, $group_id))
- { return false; }
+ return false;
# If logged in and we have to check only anonymous users, stop here
- if ($GLOBALS['sys_spamcheck_spamassassin'] == "anonymous" &&
- user_isloggedin())
- { return false; }
+ if ($GLOBALS['sys_spamcheck_spamassassin'] == "anonymous"
+ && user_isloggedin())
+ return false;
# Otherwise, add to the queue and arbitrarily change spamscore
- $date = time();
$priority = 2;
$newscore = ($current_score + 5);
@@ -340,35 +365,9 @@ function spam_add_to_spamcheck_queue ($item_id, $comment_id, $tracker, $group_id
# confort, we have to take into account that we need to start with post
# that are the most likely to contain spams.
if (!user_isloggedin())
- { $priority++; }
-
- # Fill the queue
- db_execute("INSERT INTO trackers_spamcheck_queue (artifact,item_id,comment_id,priority,date) VALUES (?, ?, ?, ?, ?)",
- array($tracker, $item_id, $comment_id, $priority, $date));
-
- # We change only the item spamscore field, not the spamscore table:
- # it means that if any user unflag the item, it will be as if
- # there was no score yet.
- # (no discussion lock, update will generate notif if sent by users that
- # can skip this spam queue check - members, etc)
- if ($tracker == 'resume')
- {
- $result = db_execute("UPDATE user SET resume_spamscore=? WHERE user_id=?",
- array($newscore, $item_id));
- }
- else if ($comment_id)
- {
- $result = db_execute("UPDATE ".$tracker."_history SET spamscore=?
- WHERE bug_history_id=? AND field_name='details' AND bug_id=?",
- array($newscore, $comment_id, $item_id));
- }
- else
- {
- $result = db_execute("UPDATE ".$tracker." SET spamscore=? WHERE bug_id=? AND group_id=?",
- array($newscore, $item_id, $group_id));
- }
+ $priority++;
- if (db_affected_rows($result))
+ if (spamcheck_enqueue($tracker, $item_id, $comment_id, $priority, $newscore))
{
fb(sprintf(_("Spam score of your post set temporarily to %s, until it is checked by spam filters"), $newscore), 1);
}
@@ -446,3 +445,31 @@ function spam_bancheck ()
# Finally, block here
exit_error(_("Your IP address was banned for several hours due to spam reports incriminating it. In the meantime, if you log in, you can work around this ban. You should investigate about probable cause of spam reports incriminating your IP"));
}
+
+# Clear spam mark for user $uid. If $all is true, clear also spam scores
+# of the user's resume and all tracker items submitted by him.
+function spam_clear($uid, $all)
+{
+ db_execute("UPDATE user SET spamscore='0' WHERE user_id=?", array($uid));
+ if ($all)
+ {
+ db_execute("DELETE FROM trackers_spamscore WHERE affected_user_id=?",
+ array($uid));
+ db_execute("UPDATE user SET resume_spamscore=0 WHERE user_id=?",
+ array($uid));
+ foreach (array('cookbook', 'support', 'bugs', 'task', 'patch')
+ as $tracker)
+ {
+ db_execute("UPDATE $tracker SET spamscore=0 WHERE submitted_by=?",
+ array($uid));
+ db_execute("UPDATE ${tracker}_history SET spamscore=0 ".
+ "WHERE field_name=? AND mod_by=?",
+ array('detail', $uid));
+ }
+ }
+ else
+ {
+ db_execute("UPDATE IGNORE trackers_spamscore SET affected_user_id='100' WHERE affected_user_id=?",
+ array($uid));
+ }
+} \ No newline at end of file
diff --git a/frontend/php/include/trackers/data.php b/frontend/php/include/trackers/data.php
index 2806dca..2e1f25c 100644
--- a/frontend/php/include/trackers/data.php
+++ b/frontend/php/include/trackers/data.php
@@ -2441,7 +2441,7 @@ function trackers_data_create_item($group_id,$vfl,&$extra_addresses)
# 5, if necessary)
# Useless if already considered to be spam.
if ($spamscore < 5)
- { spam_add_to_spamcheck_queue($item_id, 0, ARTIFACT, $group_id, $spamscore); }
+ spam_add_to_spamcheck_queue($item_id, 0, ARTIFACT, $group_id, $spamscore);
# If we are on the cookbook, Store related links
if (ARTIFACT == 'cookbook')
diff --git a/frontend/php/siteadmin/spamlist.php b/frontend/php/siteadmin/spamlist.php
index ab9f584..52e51b9 100644
--- a/frontend/php/siteadmin/spamlist.php
+++ b/frontend/php/siteadmin/spamlist.php
@@ -29,6 +29,24 @@ extract(sane_import('get', array('ban_user_id', 'wash_user_id', 'wash_ip',
'users_max_rows', 'users_offset',
'ip_max_rows', 'ip_offset')));
+function mkurl($q)
+{
+ $url = $_SERVER['PHP_SELF'];
+ $delim = '?';
+ foreach (array('users_max_rows', 'users_offset',
+ 'ip_max_rows', 'ip_offset') as $kw)
+ {
+ global ${$kw};
+ if (${$kw})
+ {
+ $url .= $delim . $kw . '=' . ${$kw};
+ $delim = '&';
+ }
+ }
+ $url .= $delim . $q;
+ return $url;
+}
+
if ($ban_user_id)
{
if (!user_exists($ban_user_id))
@@ -40,31 +58,20 @@ if ($ban_user_id)
if ($wash_user_id)
{
if (!user_exists($wash_user_id))
- { fb(_("User not found"), 1); }
+ {
+ fb(_("User not found"), 1);
+ }
else
{
- # Update the user spamscore field
- db_execute("UPDATE user SET spamscore='0' WHERE user_id=?",
- array($wash_user_id));
-
- # All comments flagged as spam will stay as such.
- # We just changed the affected user id that it wont affect this guy
- # any more.
- # We assume that messages flagged as spam really were.
- # (we may change that in the future, depending on user experience)
- db_execute("UPDATE IGNORE trackers_spamscore SET affected_user_id='100' WHERE affected_user_id=?",
- array($wash_user_id));
-
+ spam_clear($wash_user_id, true);
}
-}
+ }
if ($wash_ip)
{
db_execute("DELETE FROM trackers_spamban WHERE ip=?", array($wash_ip));
}
-
-
###### Start HTML
site_admin_header(array('title'=>_("Monitor Spams"),'context'=>'admhome'));
@@ -91,24 +98,6 @@ else
$result = db_execute("SELECT user_name,realname,user_id,spamscore FROM user WHERE status='A' AND spamscore > 0 ORDER BY spamscore DESC LIMIT ?,?", array($users_offset,($users_max_rows+1)));
-function mkurl($q)
-{
- $url = $_SERVER['PHP_SELF'];
- $delim = '?';
- foreach (array('users_max_rows', 'users_offset',
- 'ip_max_rows', 'ip_offset') as $kw)
- {
- global ${$kw};
- if (${$kw})
- {
- $url .= $delim . $kw . '=' . ${$kw};
- $delim = '&';
- }
- }
- $url .= $delim . $q;
- return $url;
-}
-
if (!db_numrows($result))
{
print '<p>'._("No suspects found").'</p>';

Return to:

Send suggestions and report system problems to the System administrator.