Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
Zied SELLAMI
priority-inbox
Commits
d63367ac
Commit
d63367ac
authored
May 10, 2019
by
Zied SELLAMI
Browse files
Integrating DynamicFeature
parent
4c333593
Changes
13
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
496 additions
and
301 deletions
+496
-301
priorityInbox/CONFIG
priorityInbox/CONFIG
+2
-0
priorityInbox/CONFIG_Docker
priorityInbox/CONFIG_Docker
+2
-0
priorityInbox/pom.xml
priorityInbox/pom.xml
+14
-1
priorityInbox/src/main/java/org/linagora/priorityInbox/api/Configuration.java
...in/java/org/linagora/priorityInbox/api/Configuration.java
+8
-0
priorityInbox/src/main/java/org/linagora/priorityInbox/api/WebService.java
.../main/java/org/linagora/priorityInbox/api/WebService.java
+4
-0
priorityInbox/src/main/java/org/linagora/priorityInbox/api/WebServiceMain.java
...n/java/org/linagora/priorityInbox/api/WebServiceMain.java
+24
-1
priorityInbox/src/main/java/org/linagora/priorityInbox/feature/DynamicFeature.java
...va/org/linagora/priorityInbox/feature/DynamicFeature.java
+204
-267
priorityInbox/src/main/java/org/linagora/priorityInbox/feature/FeatureExtractor.java
.../org/linagora/priorityInbox/feature/FeatureExtractor.java
+61
-11
priorityInbox/src/main/java/org/linagora/priorityInbox/feature/LabeledDocument.java
...a/org/linagora/priorityInbox/feature/LabeledDocument.java
+66
-2
priorityInbox/src/main/java/org/linagora/priorityInbox/feature/PositionWeight.java
...va/org/linagora/priorityInbox/feature/PositionWeight.java
+16
-0
priorityInbox/src/main/java/org/linagora/priorityInbox/feature/TextDataModel.java
...ava/org/linagora/priorityInbox/feature/TextDataModel.java
+26
-3
priorityInbox/src/main/java/org/linagora/priorityInbox/text/Text.java
...x/src/main/java/org/linagora/priorityInbox/text/Text.java
+68
-15
priorityInbox/src/main/java/org/linagora/priorityInbox/text/TextCleaner.java
...ain/java/org/linagora/priorityInbox/text/TextCleaner.java
+1
-1
No files found.
priorityInbox/CONFIG
View file @
d63367ac
...
@@ -4,3 +4,5 @@ TMP_DIRECTORY = /home/zsellami/tmp
...
@@ -4,3 +4,5 @@ TMP_DIRECTORY = /home/zsellami/tmp
NOTIFICATION_CHANNEL = /home/zsellami/dev/git/priority-inbox/priorityInbox/channels/
NOTIFICATION_CHANNEL = /home/zsellami/dev/git/priority-inbox/priorityInbox/channels/
JOB_POSITION_RULE = /home/zsellami/dev/git/priority-inbox/priorityInbox/jobpositions/
JOB_POSITION_RULE = /home/zsellami/dev/git/priority-inbox/priorityInbox/jobpositions/
TEXT_CLEANER_RULE = /home/zsellami/dev/git/priority-inbox/priorityInbox/TextCleaner.regex
TEXT_CLEANER_RULE = /home/zsellami/dev/git/priority-inbox/priorityInbox/TextCleaner.regex
MODEL_DIRECTORY = /home/zsellami/dev/git/priority-inbox/priorityInbox/models/
STOP_WORDS_DIRECTORY = /home/zsellami/dev/git/priority-inbox/priorityInbox/stopwords/
\ No newline at end of file
priorityInbox/CONFIG_Docker
View file @
d63367ac
...
@@ -4,3 +4,5 @@ TMP_DIRECTORY = /priority-inbox/priorityInbox/tmp
...
@@ -4,3 +4,5 @@ TMP_DIRECTORY = /priority-inbox/priorityInbox/tmp
NOTIFICATION_CHANNEL = /priority-inbox/priorityInbox/channels/
NOTIFICATION_CHANNEL = /priority-inbox/priorityInbox/channels/
JOB_POSITION_RULE = /priority-inbox/priorityInbox/jobpositions/
JOB_POSITION_RULE = /priority-inbox/priorityInbox/jobpositions/
TEXT_CLEANER_RULE = /priority-inbox/priorityInbox/TextCleaner.regex
TEXT_CLEANER_RULE = /priority-inbox/priorityInbox/TextCleaner.regex
MODEL_DIRECTORY = /priority-inbox/priorityInbox/models/
STOP_WORDS_DIRECTORY = /priority-inbox/priorityInbox/stopwords/
\ No newline at end of file
priorityInbox/pom.xml
View file @
d63367ac
...
@@ -105,7 +105,20 @@
...
@@ -105,7 +105,20 @@
<artifactId>
tika-langdetect
</artifactId>
<artifactId>
tika-langdetect
</artifactId>
<version>
1.20
</version>
<version>
1.20
</version>
</dependency>
</dependency>
</dependencies>
<dependency>
<groupId>
com.thoughtworks.paranamer
</groupId>
<artifactId>
paranamer
</artifactId>
<version>
2.8
</version>
</dependency>
<dependency>
<groupId>
com.google.guava
</groupId>
<artifactId>
guava
</artifactId>
<version>
15.0
</version>
</dependency>
</dependencies>
<build>
<build>
<plugins>
<plugins>
...
...
priorityInbox/src/main/java/org/linagora/priorityInbox/api/Configuration.java
View file @
d63367ac
...
@@ -58,6 +58,14 @@ public class Configuration {
...
@@ -58,6 +58,14 @@ public class Configuration {
public
String
getTMPDirectory
()
{
public
String
getTMPDirectory
()
{
return
parameters
.
get
(
"TMP_DIRECTORY"
);
return
parameters
.
get
(
"TMP_DIRECTORY"
);
}
}
public
String
getModelDirectory
()
{
return
parameters
.
get
(
"MODEL_DIRECTORY"
);
}
public
String
getStopWordsDirectory
()
{
return
parameters
.
get
(
"STOP_WORDS_DIRECTORY"
);
}
@Override
@Override
public
String
toString
()
{
public
String
toString
()
{
...
...
priorityInbox/src/main/java/org/linagora/priorityInbox/api/WebService.java
View file @
d63367ac
...
@@ -22,8 +22,10 @@ import org.glassfish.jersey.media.multipart.FormDataContentDisposition;
...
@@ -22,8 +22,10 @@ import org.glassfish.jersey.media.multipart.FormDataContentDisposition;
import
org.glassfish.jersey.media.multipart.FormDataParam
;
import
org.glassfish.jersey.media.multipart.FormDataParam
;
import
org.linagora.priorityInbox.data.Email
;
import
org.linagora.priorityInbox.data.Email
;
import
org.linagora.priorityInbox.feature.DynamicFeature
;
import
org.linagora.priorityInbox.feature.FeatureExtractor
;
import
org.linagora.priorityInbox.feature.FeatureExtractor
;
import
org.linagora.priorityInbox.feature.LabeledDocument
;
import
org.linagora.priorityInbox.feature.LabeledDocument
;
import
org.linagora.priorityInbox.text.Text
;
import
org.linagora.priorityInbox.text.TextCleaner
;
import
org.linagora.priorityInbox.text.TextCleaner
;
import
com.fasterxml.jackson.databind.ObjectMapper
;
import
com.fasterxml.jackson.databind.ObjectMapper
;
...
@@ -41,6 +43,8 @@ public class WebService {
...
@@ -41,6 +43,8 @@ public class WebService {
FeatureExtractor
.
JOB_POSITION_RULES
=
new
File
(
config
.
getJobPositionRules
());
FeatureExtractor
.
JOB_POSITION_RULES
=
new
File
(
config
.
getJobPositionRules
());
FeatureExtractor
.
NOTIFICATION_CHANNELS
=
new
File
(
config
.
getNotificationChannels
());
FeatureExtractor
.
NOTIFICATION_CHANNELS
=
new
File
(
config
.
getNotificationChannels
());
TextCleaner
.
CLEANING_REGEX
=
new
File
(
config
.
getTextCleanerRegex
());
TextCleaner
.
CLEANING_REGEX
=
new
File
(
config
.
getTextCleanerRegex
());
DynamicFeature
.
modelDirectory
=
config
.
getModelDirectory
();
Text
.
setStopWordsDirectory
(
config
.
getStopWordsDirectory
());
}
}
// https://stackoverflow.com/questions/30653012/multipart-form-data-no-injection-source-found-for-a-parameter-of-type-public-ja?utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa
// https://stackoverflow.com/questions/30653012/multipart-form-data-no-injection-source-found-for-a-parameter-of-type-public-ja?utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa
...
...
priorityInbox/src/main/java/org/linagora/priorityInbox/api/WebServiceMain.java
View file @
d63367ac
package
org.linagora.priorityInbox.api
;
package
org.linagora.priorityInbox.api
;
import
java.net.URI
;
import
java.net.URI
;
import
java.util.concurrent.Executors
;
import
java.util.concurrent.ScheduledExecutorService
;
import
java.util.concurrent.TimeUnit
;
import
javax.ws.rs.core.UriBuilder
;
import
javax.ws.rs.core.UriBuilder
;
...
@@ -8,6 +11,7 @@ import org.glassfish.grizzly.http.server.HttpServer;
...
@@ -8,6 +11,7 @@ import org.glassfish.grizzly.http.server.HttpServer;
import
org.glassfish.jersey.grizzly2.httpserver.GrizzlyHttpServerFactory
;
import
org.glassfish.jersey.grizzly2.httpserver.GrizzlyHttpServerFactory
;
import
org.glassfish.jersey.media.multipart.MultiPartFeature
;
import
org.glassfish.jersey.media.multipart.MultiPartFeature
;
import
org.glassfish.jersey.server.ResourceConfig
;
import
org.glassfish.jersey.server.ResourceConfig
;
import
org.linagora.priorityInbox.feature.DynamicFeature
;
public
class
WebServiceMain
{
public
class
WebServiceMain
{
...
@@ -17,8 +21,26 @@ public class WebServiceMain {
...
@@ -17,8 +21,26 @@ public class WebServiceMain {
}
}
public
static
void
main
(
String
[]
args
)
{
public
static
void
main
(
String
[]
args
)
{
final
ScheduledExecutorService
executor
=
Executors
.
newSingleThreadScheduledExecutor
();
Runnable
periodicSaving
=
new
Runnable
()
{
public
void
run
()
{
DynamicFeature
.
saveModels
();
}
};
if
(
args
.
length
==
1
)
{
if
(
args
.
length
==
1
)
{
Thread
app
=
new
Thread
()
{
Runtime
.
getRuntime
().
addShutdownHook
(
new
Thread
()
{
public
void
run
()
{
DynamicFeature
.
saveModels
();
System
.
out
.
println
(
"Program stopped. Models saved before stopping the service."
);
executor
.
shutdown
();
}
});
executor
.
scheduleWithFixedDelay
(
periodicSaving
,
300
,
600
,
TimeUnit
.
SECONDS
);
Thread
app
=
new
Thread
()
{
public
void
run
()
{
public
void
run
()
{
Configuration
config
=
new
Configuration
(
args
[
0
]);
Configuration
config
=
new
Configuration
(
args
[
0
]);
...
@@ -59,6 +81,7 @@ public class WebServiceMain {
...
@@ -59,6 +81,7 @@ public class WebServiceMain {
System
.
out
.
println
(
"Please set config argument. Usage: WebServiceMain configFilePath"
);
System
.
out
.
println
(
"Please set config argument. Usage: WebServiceMain configFilePath"
);
System
.
exit
(
0
);
System
.
exit
(
0
);
}
}
}
}
...
...
priorityInbox/src/main/java/org/linagora/priorityInbox/feature/DynamicFeature.java
View file @
d63367ac
This diff is collapsed.
Click to expand it.
priorityInbox/src/main/java/org/linagora/priorityInbox/feature/FeatureExtractor.java
View file @
d63367ac
...
@@ -37,6 +37,8 @@ public class FeatureExtractor {
...
@@ -37,6 +37,8 @@ public class FeatureExtractor {
private
static
String
QUESTION_REGEX
=
"(?im)^(who|what|when|where|why|how|is|can|does|do|which|could|would|should|did|shall|are|has|have|will|at).+\\?$|^\\w+\\-(ce|vous|tu|nous|t\\-(on|il|elle)).+\\?$|^(qui|(à|avec|de) qui|(à|avec|de) quoi|où|comment|pourquoi|combien) .+\\?$|^(quelles|quels|quelle|quel) .+\\?$|^est ce (que|qu').+\\?$"
;
private
static
String
QUESTION_REGEX
=
"(?im)^(who|what|when|where|why|how|is|can|does|do|which|could|would|should|did|shall|are|has|have|will|at).+\\?$|^\\w+\\-(ce|vous|tu|nous|t\\-(on|il|elle)).+\\?$|^(qui|(à|avec|de) qui|(à|avec|de) quoi|où|comment|pourquoi|combien) .+\\?$|^(quelles|quels|quelle|quel) .+\\?$|^est ce (que|qu').+\\?$"
;
private
static
String
EMAIL_FROM_SUBSCRIPTION
=
"(?im)(se désinscrire|se désabonner|unsubscribe)\\s?$"
;
private
static
String
[]
FWD_REGEX_LIST
=
{
private
static
String
[]
FWD_REGEX_LIST
=
{
"(-+ *\\w+ (d'origine|original|transf(é|é|.{0,5})r(é|é|.{0,5})) *-+|-+ *(forwarded|original) \\w+ *-+)"
};
"(-+ *\\w+ (d'origine|original|transf(é|é|.{0,5})r(é|é|.{0,5})) *-+|-+ *(forwarded|original) \\w+ *-+)"
};
...
@@ -76,6 +78,8 @@ public class FeatureExtractor {
...
@@ -76,6 +78,8 @@ public class FeatureExtractor {
private
static
Pattern
QUESTION_PATTERN
=
Pattern
.
compile
(
QUESTION_REGEX
);
private
static
Pattern
QUESTION_PATTERN
=
Pattern
.
compile
(
QUESTION_REGEX
);
private
static
Pattern
EMAIL_FROM_SUBSCRIPTION_PATTERN
=
Pattern
.
compile
(
EMAIL_FROM_SUBSCRIPTION
);
private
static
Pattern
PUNCTUATION_PATTERN
=
Pattern
.
compile
(
"(…|\\.\\.\\.|\\.|\\?|:|\\!|;)"
);
private
static
Pattern
PUNCTUATION_PATTERN
=
Pattern
.
compile
(
"(…|\\.\\.\\.|\\.|\\?|:|\\!|;)"
);
private
static
Pattern
BLANK_LINE_PATTERN
=
Pattern
.
compile
(
"(?im)\\s*"
);
private
static
Pattern
BLANK_LINE_PATTERN
=
Pattern
.
compile
(
"(?im)\\s*"
);
...
@@ -132,8 +136,19 @@ public class FeatureExtractor {
...
@@ -132,8 +136,19 @@ public class FeatureExtractor {
boolean
containsMeetingInvitation
=
containsMeetingInvitation
(
email
);
boolean
containsMeetingInvitation
=
containsMeetingInvitation
(
email
);
boolean
isANotification
=
isANotification
(
email
);
boolean
isANotification
=
isANotification
(
email
);
double
senderScore
=
getSenderScore
(
email
)
;
//+ DynamicFeature.getSenderWeight(email);
PositionWeight
positionWeight
=
getSenderScore
(
email
);
// double contentWeight = DynamicFeature.getEmailContentWeight(email);
double
senderScore
=
0
d
;
String
jobPosition
=
null
;
if
(
positionWeight
!=
null
)
{
senderScore
=
positionWeight
.
getWeight
();
jobPosition
=
positionWeight
.
getName
();
}
//else {
double
tfidfSenderScore
=
DynamicFeature
.
getSenderWeight
(
email
);
senderScore
=
Math
.
max
(
senderScore
,
tfidfSenderScore
);
//}
double
contentWeight
=
DynamicFeature
.
getEmailContentWeight
(
email
);
boolean
existReplyText
=
existReplyText
(
email
);
boolean
existReplyText
=
existReplyText
(
email
);
// System.out.println("Sender Score: " + senderScore);
// System.out.println("Sender Score: " + senderScore);
// System.out.println("Content Weight: " + contentWeight);
// System.out.println("Content Weight: " + contentWeight);
...
@@ -173,7 +188,11 @@ public class FeatureExtractor {
...
@@ -173,7 +188,11 @@ public class FeatureExtractor {
document
.
setExistReplyText
(
existReplyText
);
document
.
setExistReplyText
(
existReplyText
);
document
.
setContainsQuestions
(
containsQuestions
);
document
.
setContainsQuestions
(
containsQuestions
);
document
.
setNumberOfQuestions
(
numberOfQuestions
);
document
.
setNumberOfQuestions
(
numberOfQuestions
);
// document.setContentWeight(contentWeight);
if
(
jobPosition
!=
null
)
{
document
.
setJobPosition
(
jobPosition
);
}
document
.
setContentWeight
(
contentWeight
);
// document.setSubjectWords(subjectWords);
// document.setSubjectWords(subjectWords);
// document.setBodyWords(bodyWords);
// document.setBodyWords(bodyWords);
...
@@ -244,7 +263,16 @@ public class FeatureExtractor {
...
@@ -244,7 +263,16 @@ public class FeatureExtractor {
boolean
containsMeetingInvitation
=
containsMeetingInvitation
(
email
);
boolean
containsMeetingInvitation
=
containsMeetingInvitation
(
email
);
boolean
isANotification
=
isANotification
(
email
);
boolean
isANotification
=
isANotification
(
email
);
double
senderScore
=
getSenderScore
(
email
);
// + DynamicFeature.getSenderWeight(email);
PositionWeight
positionWeight
=
getSenderScore
(
email
);
double
senderScore
=
0
d
;
String
jobPosition
=
null
;
if
(
positionWeight
!=
null
)
{
senderScore
=
positionWeight
.
getWeight
();
jobPosition
=
positionWeight
.
getName
();
}
else
{
senderScore
=
DynamicFeature
.
getSenderWeight
(
email
);
}
//double contentWeight = DynamicFeature.getEmailContentWeight(email);
//double contentWeight = DynamicFeature.getEmailContentWeight(email);
boolean
existReplyText
=
existReplyText
(
email
);
boolean
existReplyText
=
existReplyText
(
email
);
...
@@ -286,6 +314,9 @@ public class FeatureExtractor {
...
@@ -286,6 +314,9 @@ public class FeatureExtractor {
// document.setBodyWords(bodyWords);
// document.setBodyWords(bodyWords);
document
.
setContainsQuestions
(
containsQuestions
);
document
.
setContainsQuestions
(
containsQuestions
);
document
.
setNumberOfQuestions
(
numberOfQuestions
);
document
.
setNumberOfQuestions
(
numberOfQuestions
);
if
(
jobPosition
!=
null
)
{
document
.
setJobPosition
(
jobPosition
);
}
// System.out.println(email.getMessageId()+ " - " + email.getSubject() + " - " + emailFolderName);
// System.out.println(email.getMessageId()+ " - " + email.getSubject() + " - " + emailFolderName);
// System.out.println("isSent: " + isSent);
// System.out.println("isSent: " + isSent);
...
@@ -626,6 +657,8 @@ public class FeatureExtractor {
...
@@ -626,6 +657,8 @@ public class FeatureExtractor {
}
}
public
static
boolean
isANotification
(
Email
email
)
{
public
static
boolean
isANotification
(
Email
email
)
{
boolean
isANotification
=
false
;
boolean
existUnsubscription
=
false
;
if
(
email
.
getFrom
()
==
null
)
{
if
(
email
.
getFrom
()
==
null
)
{
return
false
;
return
false
;
}
}
...
@@ -648,8 +681,17 @@ public class FeatureExtractor {
...
@@ -648,8 +681,17 @@ public class FeatureExtractor {
}
}
}
}
return
email
.
getFrom
().
getAddress
().
matches
(
NOTIFICATION_REGEX
);
isANotification
=
email
.
getFrom
().
getAddress
().
matches
(
NOTIFICATION_REGEX
);
if
(!
isANotification
)
{
if
(
email
.
getBody
()
!=
null
)
{
String
content
=
email
.
getBody
();
existUnsubscription
=
EMAIL_FROM_SUBSCRIPTION_PATTERN
.
matcher
(
content
).
find
();
}
}
return
isANotification
||
existUnsubscription
;
}
}
...
@@ -678,7 +720,7 @@ public class FeatureExtractor {
...
@@ -678,7 +720,7 @@ public class FeatureExtractor {
return
false
;
return
false
;
}
}
public
static
double
getSenderScore
(
Email
email
)
{
public
static
PositionWeight
getSenderScore
(
Email
email
)
{
String
signature
=
""
;
String
signature
=
""
;
double
rankScore
=
0
d
;
double
rankScore
=
0
d
;
...
@@ -690,12 +732,20 @@ public class FeatureExtractor {
...
@@ -690,12 +732,20 @@ public class FeatureExtractor {
}
}
for
(
PositionWeight
rank
:
positionRanks
.
get
(
"all.txt"
))
{
for
(
PositionWeight
rank
:
positionRanks
.
get
(
"all.txt"
))
{
boolean
rankFind
=
rank
.
getRegex
().
matcher
(
signature
).
find
();
boolean
rankFind
=
rank
.
getRegex
().
matcher
(
signature
).
find
();
if
(
rankFind
)
{
Matcher
match
=
rank
.
getRegex
().
matcher
(
signature
);
rankScore
=
rank
.
getWeight
();
while
(
match
.
find
())
{
break
;
// String text = match.group();
// System.out.println("Matched name: " + rank.getName());
return
rank
;
//break;
}
}
// if (rankFind) {
// rankScore = rank.getWeight();
// break;
// }
}
}
return
rankScore
;
//return rankScore;
return
null
;
}
}
public
static
boolean
existReplyText
(
Email
email
)
{
public
static
boolean
existReplyText
(
Email
email
)
{
...
...
priorityInbox/src/main/java/org/linagora/priorityInbox/feature/LabeledDocument.java
View file @
d63367ac
package
org.linagora.priorityInbox.feature
;
package
org.linagora.priorityInbox.feature
;
import
java.util.List
;
import
java.util.List
;
import
java.util.Random
;
import
org.linagora.priorityInbox.data.Email
;
import
org.linagora.priorityInbox.data.Email
;
...
@@ -42,6 +43,8 @@ public class LabeledDocument implements Comparable<LabeledDocument>{
...
@@ -42,6 +43,8 @@ public class LabeledDocument implements Comparable<LabeledDocument>{
private
boolean
isSameOrganisation
;
private
boolean
isSameOrganisation
;
private
boolean
existReplyText
;
private
boolean
existReplyText
;
private
String
jobPosition
=
null
;
public
static
double
IMPORTANT_SCORE_THRESHOLD
=
10
d
;
public
static
double
IMPORTANT_SCORE_THRESHOLD
=
10
d
;
...
@@ -195,6 +198,7 @@ public class LabeledDocument implements Comparable<LabeledDocument>{
...
@@ -195,6 +198,7 @@ public class LabeledDocument implements Comparable<LabeledDocument>{
if
(
documentType
==
null
)
{
if
(
documentType
==
null
)
{
if
(
isAReply
||
isAForward
||
emailTriggeringAReply
||
isImportant
||
(!
isANotification
&&
this
.
getScore
()
>=
IMPORTANT_SCORE_THRESHOLD
))
{
documentType
=
DocumentType
.
IMPORTANT
;}
if
(
isAReply
||
isAForward
||
emailTriggeringAReply
||
isImportant
||
(!
isANotification
&&
this
.
getScore
()
>=
IMPORTANT_SCORE_THRESHOLD
))
{
documentType
=
DocumentType
.
IMPORTANT
;}
else
if
((
isANotification
&&
(
isAReply
||
isAForward
)
&&
!
isSpam
&&
!
isImportant
)
||
(!
isANotification
&&
containsQuestions
))
{
documentType
=
DocumentType
.
TO_READ
;}
else
if
((
isANotification
&&
(
isAReply
||
isAForward
)
&&
!
isSpam
&&
!
isImportant
)
||
(!
isANotification
&&
containsQuestions
))
{
documentType
=
DocumentType
.
TO_READ
;}
else
if
(
isANotification
||
isSpam
)
{
documentType
=
DocumentType
.
NOTIFICATION
;}
else
if
(
isANotification
||
isSpam
)
{
documentType
=
DocumentType
.
NOTIFICATION
;}
...
@@ -222,14 +226,14 @@ public class LabeledDocument implements Comparable<LabeledDocument>{
...
@@ -222,14 +226,14 @@ public class LabeledDocument implements Comparable<LabeledDocument>{
if
(
isToRecipient
)
{
score
=
score
+
(
1
d
/(
double
)
numberOfRecipient
);}
// rule 9
if
(
isToRecipient
)
{
score
=
score
+
(
1
d
/(
double
)
numberOfRecipient
);}
// rule 9
if
(
isCcRecipient
)
{
score
=
score
+
(
0.8d
/(
double
)
numberOfRecipient
);}
// rule 10
if
(
isCcRecipient
)
{
score
=
score
+
(
0.8d
/(
double
)
numberOfRecipient
);}
// rule 10
if
(
isBccRecipient
)
{
score
=
score
+
(
0.
5
d
/(
double
)
numberOfRecipient
);}
// rule 11
if
(
isBccRecipient
)
{
score
=
score
+
(
0.
6
d
/(
double
)
numberOfRecipient
);}
// rule 11
if
(
emailTriggeringAReply
)
{
score
=
score
+
1
d
;}
// rule 12
if
(
emailTriggeringAReply
)
{
score
=
score
+
1
d
;}
// rule 12
if
(!
isSameOrganisation
)
{
score
=
score
+
1
d
;}
// rule 13
if
(!
isSameOrganisation
)
{
score
=
score
+
1
d
;}
// rule 13
if
(!
isToGroup
)
{
score
=
score
+
1
d
;}
// rule 14
if
(!
isToGroup
)
{
score
=
score
+
1
d
;}
// rule 14
score
=
score
+
senderScore
;
// rule 15
score
=
score
+
senderScore
;
// rule 15
//
score = score + contentWeight; // rule 16
score
=
score
+
contentWeight
;
// rule 16
documentScore
=
new
Double
(
score
);
documentScore
=
new
Double
(
score
);
}
}
...
@@ -319,6 +323,66 @@ public class LabeledDocument implements Comparable<LabeledDocument>{
...
@@ -319,6 +323,66 @@ public class LabeledDocument implements Comparable<LabeledDocument>{
public
void
setNumberOfQuestions
(
int
numberOfQuestions
)
{
public
void
setNumberOfQuestions
(
int
numberOfQuestions
)
{
this
.
numberOfQuestions
=
numberOfQuestions
;
this
.
numberOfQuestions
=
numberOfQuestions
;
}
}
public
void
setJobPosition
(
String
jobPosition
)
{
this
.
jobPosition
=
jobPosition
;
}
public
String
getJobPosition
()
{
return
this
.
jobPosition
;
}
public
String
getBooleanFeatureAsText
()
{
String
feature
=
""
;
//feature = feature + " " + this.email.getFrom().getAddress();
if
(
isSent
)
{
feature
=
feature
+
" isSent"
;}
if
(
isRecieved
)
{
feature
=
feature
+
" isRecieved"
;}
if
(
isAReply
)
{
feature
=
feature
+
" isAReply"
;}
if
(
isToRecipient
)
{
feature
=
feature
+
" isToRecipient"
;}
if
(
isCcRecipient
)
{
feature
=
feature
+
" isCcRecipient"
;}
if
(
isBccRecipient
)
{
feature
=
feature
+
" isBccRecipient"
;}
if
(
isAForward
)
{
feature
=
feature
+
" isAForward"
;}
if
(
isImportant
)
{
feature
=
feature
+
" isImportant"
;}
if
(
isSpam
)
{
feature
=
feature
+
" isSpam"
;}
if
(
containsDocAttachments
)
{
feature
=
feature
+
" containsDocAttachments"
;}
if
(
containsMeetingInvitation
)
{
feature
=
feature
+
" containsMeetingInvitation"
;}
if
(
isANotification
)
{
feature
=
feature
+
" isANotification"
;}
if
(
emailTriggeringAReply
)
{
feature
=
feature
+
" emailTriggeringAReply"
;}
if
(
containsQuestions
)
{
feature
=
feature
+
" containsQuestions"
;}
if
(
isSameOrganisation
)
{
feature
=
feature
+
" isSameOrganisation"
;}
if
(
isToGroup
)
{
feature
=
feature
+
" isToGroup"
;}
if
(
existReplyText
){
feature
=
feature
+
" existReplyText"
;}
if
(
jobPosition
!=
null
)
{
feature
=
feature
+
" "
+
jobPosition
;}
return
feature
;
}
public
String
getFeatureAsBinaryData
(
int
startIndex
)
{
String
feature
=
""
;
//feature = feature + " " + this.email.getFrom().getAddress();
if
(
isSent
)
{
feature
=
feature
+
" 1.0"
;}
else
{
feature
=
feature
+
" 0.0"
;}
if
(
isRecieved
)
{
feature
=
feature
+
" 1.0"
;}
else
{
feature
=
feature
+
" 0.0"
;}
if
(
isAReply
)
{
feature
=
feature
+
" 1.0"
;}
else
{
feature
=
feature
+
" 0.0"
;}
if
(
isToRecipient
)
{
feature
=
feature
+
" 1.0"
;}
else
{
feature
=
feature
+
" 0.0"
;}
if
(
isCcRecipient
)
{
feature
=
feature
+
" 1.0"
;}
else
{
feature
=
feature
+
" 0.0"
;}
if
(
isBccRecipient
)
{
feature
=
feature
+
" 1.0"
;}
else
{
feature
=
feature
+
" 0.0"
;}
if
(
isAForward
)
{
feature
=
feature
+
" 1.0"
;}
else
{
feature
=
feature
+
" 0.0"
;}
if
(
isImportant
)
{
feature
=
feature
+
" 1.0"
;}
else
{
feature
=
feature
+
" 0.0"
;}
if
(
isSpam
)
{
feature
=
feature
+
" 1.0"
;}
else
{
feature
=
feature
+
" 0.0"
;}
if
(
containsDocAttachments
)
{
feature
=
feature
+
" 1.0"
;}
else
{
feature
=
feature
+
" 0.0"
;}
if
(
containsMeetingInvitation
)
{
feature
=
feature
+
" 1.0"
;}
else
{
feature
=
feature
+
" 0.0"
;}
if
(
isANotification
)
{
feature
=
feature
+
" 1.0"
;}
else
{
feature
=
feature
+
" 0.0"
;}
if
(
emailTriggeringAReply
)
{
feature
=
feature
+
" 1.0"
;}
else
{
feature
=
feature
+
" 0.0"
;}
if
(
containsQuestions
)
{
feature
=
feature
+
" 1.0"
;}
else
{
feature
=
feature
+
" 0.0"
;}
if
(
isSameOrganisation
)
{
feature
=
feature
+
" 1.0"
;}
else
{
feature
=
feature
+
" 0.0"
;}
if
(
isToGroup
)
{
feature
=
feature
+
" 1.0"
;}
else
{
feature
=
feature
+
" 0.0"
;}
if
(
existReplyText
){
feature
=
feature
+
" 1.0"
;}
else
{
feature
=
feature
+
" 0.0"
;}
//if(jobPosition != null) {feature = feature + " " + jobPosition;}
return
feature
;
}
...
...
priorityInbox/src/main/java/org/linagora/priorityInbox/feature/PositionWeight.java
View file @
d63367ac
...
@@ -12,6 +12,7 @@ public class PositionWeight {
...
@@ -12,6 +12,7 @@ public class PositionWeight {
private
Pattern
regex
=
null
;
private
Pattern
regex
=
null
;
private
double
weight
=
0
;
private
double
weight
=
0
;
private
String
name
=
null
;
private
static
HashMap
<
String
,
List
<
PositionWeight
>>
positionWeights
=
new
HashMap
<
String
,
List
<
PositionWeight
>>();
private
static
HashMap
<
String
,
List
<
PositionWeight
>>
positionWeights
=
new
HashMap
<
String
,
List
<
PositionWeight
>>();
public
static
double
maxWeight
=
LabeledDocument
.
IMPORTANT_SCORE_THRESHOLD
*
2
d
;
public
static
double
maxWeight
=
LabeledDocument
.
IMPORTANT_SCORE_THRESHOLD
*
2
d
;
...
@@ -40,6 +41,7 @@ public class PositionWeight {
...
@@ -40,6 +41,7 @@ public class PositionWeight {
PositionWeight
positionRank
=
new
PositionWeight
();
PositionWeight
positionRank
=
new
PositionWeight
();
positionRank
.
setWeight
(
maxWeight
-
allStep
);
positionRank
.
setWeight
(
maxWeight
-
allStep
);
allStep
=
allStep
+
step
;
allStep
=
allStep
+
step
;
positionRank
.
setName
(
elements
.
get
(
0
).
replace
(
"\\Q"
,
""
).
replace
(
"\\E"
,
""
).
replace
(
" "
,
"_"
).
trim
());
positionRank
.
setRegex
(
Pattern
.
compile
(
String
.
join
(
"|"
,
elements
),
Pattern
.
CASE_INSENSITIVE
));
positionRank
.
setRegex
(
Pattern
.
compile
(
String
.
join
(
"|"
,
elements
),
Pattern
.
CASE_INSENSITIVE
));
size
--;
size
--;
ranks
.
add
(
positionRank
);
ranks
.
add
(
positionRank
);
...
@@ -60,6 +62,12 @@ public class PositionWeight {
...
@@ -60,6 +62,12 @@ public class PositionWeight {
this
.
regex
=
regex
;
this
.
regex
=
regex
;
this
.
weight
=
rank
;
this
.
weight
=
rank
;
}
}
public
PositionWeight
(
String
name
,
Pattern
regex
,
int
rank
)
{
this
.
name
=
name
;
this
.
regex
=
regex
;
this
.
weight
=
rank
;
}
public
Pattern
getRegex
()
{
public
Pattern
getRegex
()
{
...
@@ -77,5 +85,13 @@ public class PositionWeight {
...
@@ -77,5 +85,13 @@ public class PositionWeight {
public
void
setWeight
(
double
rank
)
{
public
void
setWeight
(
double
rank
)
{
this
.
weight
=
rank
;
this
.
weight
=
rank
;
}
}
public
String
getName
()
{
return
name
;
}
public
void
setName
(
String
name
)
{
this
.
name
=
name
;
}
}
}
priorityInbox/src/main/java/org/linagora/priorityInbox/feature/TextDataModel.java
View file @
d63367ac
package
org.linagora.priorityInbox.feature
;
package
org.linagora.priorityInbox.feature
;
public
class
TextDataModel
{
public
class
TextDataModel
implements
Comparable
<
TextDataModel
>
{
private
String
word
=
null
;
private
String
word
=
null
;
private
int
index
=
-
1
;
private
int
frequency
=
-
1
;