Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
Zied SELLAMI
priority-inbox
Commits
ba6aa0f7
Commit
ba6aa0f7
authored
Feb 01, 2019
by
Zied SELLAMI
Browse files
Add reading html email body
parent
61737666
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
82 additions
and
9 deletions
+82
-9
priorityInbox/src/main/java/org/linagora/priorityInbox/data/Email.java
.../src/main/java/org/linagora/priorityInbox/data/Email.java
+19
-1
priorityInbox/src/main/java/org/linagora/priorityInbox/emailReader/CSVReader.java
...ava/org/linagora/priorityInbox/emailReader/CSVReader.java
+20
-3
priorityInbox/src/main/java/org/linagora/priorityInbox/emailReader/CheckEmail.java
...va/org/linagora/priorityInbox/emailReader/CheckEmail.java
+39
-3
priorityInbox/src/main/java/org/linagora/priorityInbox/feature/Test.java
...rc/main/java/org/linagora/priorityInbox/feature/Test.java
+2
-1
priorityInbox/src/main/java/org/linagora/priorityInbox/feature/TestEmailSample.java
...a/org/linagora/priorityInbox/feature/TestEmailSample.java
+2
-1
No files found.
priorityInbox/src/main/java/org/linagora/priorityInbox/data/Email.java
View file @
ba6aa0f7
...
...
@@ -9,7 +9,7 @@ import org.joda.time.DateTime;
public
class
Email
{
//"messageId\tfrom\tto\tcc\tbcc\treceivedTime\tsentTime\tinReplyTo\tsubject\tbodyFile\tattachment\tpriority\temailFolder\n"
//"messageId\tfrom\tto\tcc\tbcc\treceivedTime\tsentTime\tinReplyTo\tsubject\tbodyFile\
thtmlBodyFile\
tattachment\tpriority\temailFolder\n"
private
String
messageId
=
null
;
private
InternetAddress
from
=
null
;
private
List
<
InternetAddress
>
to
=
null
;
...
...
@@ -20,7 +20,9 @@ public class Email {
private
String
inReplyTo
=
null
;
private
String
subject
=
null
;
private
String
bodyFile
=
null
;
private
String
htmlBodyFile
=
null
;
private
String
body
=
null
;
private
String
htmlBody
=
null
;
private
List
<
EmailAttachment
>
attachments
=
null
;
private
Integer
priority
=
null
;
private
boolean
isASpam
=
false
;
...
...
@@ -181,5 +183,21 @@ public class Email {
public
void
setAlternativeAddress
(
List
<
String
>
alternativeAddress
)
{
this
.
alternativeAddress
=
alternativeAddress
;
}
public
String
getHtmlBodyFile
()
{
return
htmlBodyFile
;
}
public
void
setHtmlBodyFile
(
String
htmlBodyFile
)
{
this
.
htmlBodyFile
=
htmlBodyFile
;
}
public
String
getHtmlBody
()
{
return
htmlBody
;
}
public
void
setHtmlBody
(
String
htmlBody
)
{
this
.
htmlBody
=
htmlBody
;
}
}
priorityInbox/src/main/java/org/linagora/priorityInbox/emailReader/CSVReader.java
View file @
ba6aa0f7
...
...
@@ -49,14 +49,14 @@ public class CSVReader {
// CSV file header
private
static
final
String
[]
FILE_HEADER
=
{
"messageId"
,
"from"
,
"to"
,
"cc"
,
"bcc"
,
"receivedTime"
,
"sentTime"
,
"inReplyTo"
,
"subject"
,
"bodyFile"
,
"attachment"
,
"priority"
,
"emailFolder"
,
"spamFlag"
};
"inReplyTo"
,
"subject"
,
"bodyFile"
,
"htmlBodyFile"
,
"attachment"
,
"priority"
,
"emailFolder"
,
"spamFlag"
};
private
static
Pattern
Blank_Pattern
=
Pattern
.
compile
(
"(\u00a0|\u202f|\u2007| )"
);
private
static
Pattern
Email_Section_Pattern
=
Pattern
.
compile
(
"<([^<]+@[^>]+)>"
);
public
static
HashMap
<
String
,
Email
>
parseEmails
(
String
csvPath
,
String
bodyPath
)
{
public
static
HashMap
<
String
,
Email
>
parseEmails
(
String
csvPath
,
String
bodyPath
,
String
htmBodyPath
)
{
HashMap
<
String
,
Email
>
emails
=
new
HashMap
<
String
,
Email
>();
int
counter
=
0
;
try
{
...
...
@@ -101,6 +101,15 @@ public class CSVReader {
body
=
Blank_Pattern
.
matcher
(
body
).
replaceAll
(
" "
);
email
.
setBody
(
body
);
}
email
.
setHtmlBodyFile
(
csvRecord
.
get
(
"htmlBodyFile"
));
if
(
csvRecord
.
get
(
"htmlBodyFile"
)
!=
null
)
{
String
htmlBody
=
FileUtils
.
readFileToString
(
new
File
(
htmBodyPath
+
csvRecord
.
get
(
"htmlBodyFile"
)));
htmlBody
=
Blank_Pattern
.
matcher
(
htmlBody
).
replaceAll
(
" "
);
email
.
setHtmlBody
(
htmlBody
);
}
email
.
setAttachments
(
parseAttachments
(
csvRecord
.
get
(
"attachment"
)));
email
.
setPriority
(
parseStringInteger
(
csvRecord
.
get
(
"priority"
)));
...
...
@@ -134,7 +143,7 @@ public class CSVReader {
return
emails
;
}
public
static
Inbox
parseInbox
(
String
csvPath
,
String
bodyPath
)
{
public
static
Inbox
parseInbox
(
String
csvPath
,
String
bodyPath
,
String
htmBodyPath
)
{
HashMap
<
String
,
EmailFolder
>
emailFolders
=
new
HashMap
<
String
,
EmailFolder
>();
Inbox
inbox
=
null
;
int
counter
=
0
;
...
...
@@ -180,6 +189,14 @@ public class CSVReader {
body
=
Blank_Pattern
.
matcher
(
body
).
replaceAll
(
" "
);
email
.
setBody
(
body
);
}
email
.
setHtmlBodyFile
(
csvRecord
.
get
(
"htmlBodyFile"
));
if
(
csvRecord
.
get
(
"htmlBodyFile"
)
!=
null
)
{
String
htmlBody
=
FileUtils
.
readFileToString
(
new
File
(
htmBodyPath
+
csvRecord
.
get
(
"htmlBodyFile"
)));
htmlBody
=
Blank_Pattern
.
matcher
(
htmlBody
).
replaceAll
(
" "
);
email
.
setHtmlBody
(
htmlBody
);
}
email
.
setAttachments
(
parseAttachments
(
csvRecord
.
get
(
"attachment"
)));
email
.
setPriority
(
parseStringInteger
(
csvRecord
.
get
(
"priority"
)));
...
...
priorityInbox/src/main/java/org/linagora/priorityInbox/emailReader/CheckEmail.java
View file @
ba6aa0f7
...
...
@@ -32,10 +32,10 @@ import org.apache.commons.io.FileUtils;
public
class
CheckEmail
{
private
static
final
String
email_id
=
""
;
private
static
final
String
email_id
=
"
zsellami@linagora.com
"
;
private
static
final
String
password
=
""
;
private
static
String
directory
=
""
;
private
static
String
directory
=
"
/home/zsellami/newCorpus/zied2/
"
;
/**
...
...
@@ -60,7 +60,7 @@ public class CheckEmail {
private
static
final
String
NULL
=
"null"
;
//CSV file header
private
static
final
String
[]
FILE_HEADER
=
{
"messageId"
,
"from"
,
"to"
,
"cc"
,
"bcc"
,
"receivedTime"
,
"sentTime"
,
"inReplyTo"
,
"subject"
,
"bodyFile"
,
"attachment"
,
"priority"
,
"emailFolder"
,
"spamFlag"
};
private
static
final
String
[]
FILE_HEADER
=
{
"messageId"
,
"from"
,
"to"
,
"cc"
,
"bcc"
,
"receivedTime"
,
"sentTime"
,
"inReplyTo"
,
"subject"
,
"bodyFile"
,
"
htmlBodyFile"
,
"
attachment"
,
"priority"
,
"emailFolder"
,
"spamFlag"
};
static
FileWriter
fileWriter
=
null
;
...
...
@@ -165,11 +165,17 @@ private static void readMessage(MimeMessage message) {
String
subject
=
getSubject
(
message
);
String
body
=
getTextFromMessage
(
message
);
String
htmlBody
=
getHtmlFromMessage
(
message
);
String
bodyFile
=
null
;
if
(
body
!=
null
&&
!
body
.
equals
(
""
))
{
bodyFile
=
UUID
.
randomUUID
()
+
"_"
+
System
.
nanoTime
()
+
".txt"
;
FileUtils
.
write
(
new
File
(
directory
+
"emails_body/"
+
bodyFile
),
body
);
}
String
htmlBodyFile
=
null
;
if
(
htmlBody
!=
null
&&
!
htmlBody
.
equals
(
""
))
{
htmlBodyFile
=
UUID
.
randomUUID
()
+
"_"
+
System
.
nanoTime
()
+
".txt"
;
FileUtils
.
write
(
new
File
(
directory
+
"emails_htmlBody/"
+
htmlBodyFile
),
htmlBody
);
}
String
attachments
=
getAttachments
(
message
);
String
priority
=
"0"
;
...
...
@@ -195,6 +201,7 @@ private static void readMessage(MimeMessage message) {
emailRecord
.
add
(
inReplyTo
);
emailRecord
.
add
(
subject
);
emailRecord
.
add
(
bodyFile
);
emailRecord
.
add
(
htmlBodyFile
);
emailRecord
.
add
(
attachments
);
emailRecord
.
add
(
priority
);
emailRecord
.
add
(
emailFolder
);
...
...
@@ -223,6 +230,7 @@ private static void readMessage(MimeMessage message) {
}
private
static
String
getSpamFlag
(
MimeMessage
message
)
{
// TODO Auto-generated method stub
try
{
...
...
@@ -298,6 +306,34 @@ private static String getTextFromMessage(Message message) throws MessagingExcept
return
result
;
}
private
static
String
getHtmlFromMessage
(
MimeMessage
message
)
throws
MessagingException
,
IOException
{
String
html
=
null
;
if
(
message
.
isMimeType
(
"text/html"
))
{
html
=
message
.
getContent
().
toString
();
}
else
if
(
message
.
isMimeType
(
"multipart/*"
))
{
MimeMultipart
mimeMultipart
=
(
MimeMultipart
)
message
.
getContent
();
html
=
getHtmlFromMimeMultipart
(
mimeMultipart
);
}
return
html
;
}
private
static
String
getHtmlFromMimeMultipart
(
MimeMultipart
mimeMultipart
)
throws
MessagingException
,
IOException
{
String
html
=
null
;
int
count
=
mimeMultipart
.
getCount
();
for
(
int
i
=
0
;
i
<
count
;
i
++)
{
BodyPart
bodyPart
=
mimeMultipart
.
getBodyPart
(
i
);
if
(
bodyPart
.
isMimeType
(
"text/html"
))
{
html
=
(
String
)
bodyPart
.
getContent
();
break
;
}
else
if
(
bodyPart
.
getContent
()
instanceof
MimeMultipart
){
html
=
html
+
"\n"
+
getHtmlFromMimeMultipart
((
MimeMultipart
)
bodyPart
.
getContent
());
}
}
return
html
;
}
private
static
String
getTextFromMimeMultipart
(
MimeMultipart
mimeMultipart
)
throws
MessagingException
,
IOException
{
String
result
=
""
;
...
...
priorityInbox/src/main/java/org/linagora/priorityInbox/feature/Test.java
View file @
ba6aa0f7
...
...
@@ -20,6 +20,7 @@ public class Test {
//Text.setStopWordsDirectory("stopwords");
String
DATA_FILE_PATH
=
"/home/zsellami/newCorpus/zied2/DATA.csv"
;
String
BODY_DIR_PATH
=
"/home/zsellami/newCorpus/zied2/emails_body/"
;
String
HTML_BODY_DIR_PATH
=
"/home/zsellami/newCorpus/zied2/emails_htmlBody/"
;
String
user
=
"Zied Sellami"
;
List
<
String
>
alternativeEmails
=
Arrays
.
asList
(
"zsellami@linagora.com"
,
"zied.sellami@linagora.com"
);
...
...
@@ -29,7 +30,7 @@ public class Test {
long
startTime
=
System
.
currentTimeMillis
();
Inbox
inbox
=
CSVReader
.
parseInbox
(
DATA_FILE_PATH
,
BODY_DIR_PATH
);
Inbox
inbox
=
CSVReader
.
parseInbox
(
DATA_FILE_PATH
,
BODY_DIR_PATH
,
HTML_
BODY_DIR_PATH
);
inbox
.
setUser
(
user
);
inbox
.
setEmailAddresses
(
alternativeEmails
);
...
...
priorityInbox/src/main/java/org/linagora/priorityInbox/feature/TestEmailSample.java
View file @
ba6aa0f7
...
...
@@ -68,6 +68,7 @@ public class TestEmailSample {
String
DATA_FILE_PATH
=
"/home/zsellami/newCorpus/zied2/DATA.csv"
;
String
BODY_DIR_PATH
=
"/home/zsellami/newCorpus/zied2/emails_body/"
;
String
HTML_BODY_DIR_PATH
=
"/home/zsellami/newCorpus/zied2/emails_htmlBody/"
;
String
user
=
"Zied Sellami"
;
List
<
String
>
alternativeEmails
=
Arrays
.
asList
(
"zsellami@linagora.com"
,
"zied.sellami@linagora.com"
);
...
...
@@ -77,7 +78,7 @@ public class TestEmailSample {
long
startTime
=
System
.
currentTimeMillis
();
Inbox
inbox
=
CSVReader
.
parseInbox
(
DATA_FILE_PATH
,
BODY_DIR_PATH
);
Inbox
inbox
=
CSVReader
.
parseInbox
(
DATA_FILE_PATH
,
BODY_DIR_PATH
,
HTML_
BODY_DIR_PATH
);
inbox
.
setUser
(
user
);
inbox
.
setEmailAddresses
(
alternativeEmails
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment