#!/usr/local/bin/perl # nstouwimap # $Id: nstouwimap,v 1.19 2002/05/23 15:45:51 dwallach Exp dwallach $ # Author: Dan Wallach # Warranty: None expressed or implied. # The purpose of this program is to convert from Netscape's internal # file format to that used by the UW IMAP server. There are many differences # that make this not entirely trivial: # (1) different ways of marking a message as being flagged/read # (Netscape uses X-Mozilla-Flags, UW uses Status and X-Status) # (2) slight differences in formatting of messages separators # (the UW imap server is *not* happy with CR-LF) # (3) different handling of folders with messages *and* subfolders # (Netscape supports this, UW doesn't) # (4) special treatment of the Inbox/Trash folders # (5) the UW imap server puts a special mail message at the top of every # mail folder that includes an inscrutable X-IMAP header # The first two problems are fairly straightforward to solve. # The third is tricky. Netscape solves this problem, internally, # by appending an ugly ".sbd" extension to each folder. We map # those ".sbd" extensions to "_", which is somewhat less gruesome. # An optional flag allows these extensions to be ignored when # no messages are in the top-level folder. # Command-line options are described in the help message, at the # bottom of this file. require "newgetopt.pl"; $_ = $0; s@.*/@@; $prog_name = $_; $counter = 0; # counter return true if it actually printed anything $prev_blurt = 1; sub reset_counter { $prev_blurt = 0; $counter = 0; } sub counter { return 0 unless ((++$counter % 100) == 0); $prev_blurt = 0; print STDERR "$counter..."; 1; } sub tally_counter { print STDERR "$counter",(defined $opt_verbose)?" total\n\n":"\n"; } sub debug_blurt { &blurt(@_) if $opt_verbose; } sub blurt { return if $opt_quiet; print STDERR "\n" unless $prev_blurt; print STDERR @_; $prev_blurt = 1; } sub process { local($input_dir, $full_input_dir, $full_output_dir); local(@subdirs); local($input_base, $output_base, @input_dirs) = @_; &debug_blurt(sprintf("Process: %s -> %s\n", $input_base, $output_base)); # note: by sorting, we'll see data files before we see subdirectories # since Netscape subdirectories always have a ".sbd" suffix dirtag: foreach $input_dir (sort @input_dirs) { $full_input_dir = "$input_base/$input_dir"; $full_output_dir = "$output_base/$input_dir"; # special logic for handing Inbox / Inbox.sbd if($full_input_dir eq "$root_input_dir/Inbox") { $full_output_dir = "$output_base/$opt_in"; } if(!-e $full_input_dir) { &blurt("Weird stuff: $full_input_dir doesn't exist!\n"); next dirtag; } if(-f $full_input_dir) { ### && -r $input_dir && -w $input_dir # ignore various Netscape index files and sundry crap if($full_input_dir =~ /\.log$/ || $full_input_dir =~ /\.snm$/ || $full_input_dir =~ /\.dat$/ || $full_input_dir =~ /~$/) { # &debug_blurt("Ignoring file: $full_input_dir\n"); } else { $messagesInFolder{$full_input_dir} = &convert($full_input_dir, $full_output_dir); } next dirtag; } if (-d $full_input_dir && !-l $full_input_dir) { next dirtag if $input_dir eq "." || $input_dir eq ".."; if(!opendir(DIR, $full_input_dir)) { &blurt("Can't read $full_input_dir: $!\n"); next; } @subdirs = readdir(DIR); $tmp = join(",", @subdirs); # &debug_blurt("Found: $tmp\n"); closedir(DIR); # logic to simplify folder names # special logic for handing Inbox / Inbox.sbd if($full_input_dir eq "$root_input_dir/Inbox.sbd") { $new_output_base = "$output_base/$opt_in"; } else { $new_output_base = "$output_base/$input_dir"; $new_output_base =~ s/\.sbd$//; } # Here's the ugly part: test if we've seen this input # before as a message folder. If the folder was non-empty, # or it's the trash, or we're not explicitly trying to # do this optimization, then tack on the folder suffix. $test_input_base = "$input_base/$input_dir"; $test_input_base =~ s/\.sbd$//; $new_output_base = sprintf("$new_output_base%s", ($opt_re && $test_input_base ne "$root_input_dir/Trash" && !$messagesInFolder{$test_input_base}) ?"":$opt_fe); (mkdir($new_output_base, 0700) || die "Can't create $new_output_base: $!\n") unless $opt_safe; &process("$input_base/$input_dir", "$new_output_base", @subdirs); } else { &blurt("Weird file: $input_dir -- I don't do those.\n"); } } } sub convert { local($input_file, $output_file) = @_; local($header); local($headerMode) = 1; local($statusSeen) = 0; local($outfile); ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size, $atime,$mtime,$ctime,$blksize,$blocks) = stat($input_file); return 0 if $size == 0; return 1 if $opt_noconvert; # debugging hack if(!open(FILE, "$input_file")) { &blurt("Can't open $fname: $!\n"); return; } &blurt("$input_file -> $output_file: "); if($opt_safe) { $output_file = "/dev/null"; } if($output_file eq "-") { $outfile="STDOUT"; } elsif (open(OUT, "> $output_file")) { $outfile="OUT"; } else { close(FILE); &blurt("Can't open output $output_file: $!\n"); return; } chmod 0600, $output_file; &reset_counter(); # generate the stupid UW IMAP header $now_string = localtime(); $now_time = sprintf("%10i", scalar(time())); $version = '$Revision: 1.19 $'; printf $outfile < Subject: DON'T DELETE THIS MESSAGE -- NECESSARY IMAP SERVER CRUD X-IMAP: $now_time 0000000000 Status: RO This text is here because the U.W. IMAP daemon is incredibly strange in its design and seems to want to keep this around. Far be it for us to argue with them. Just leave it alone and things should work. FYI, this e-mail folder was originally converted to the UW IMAP format using nstouwimap $version Original folder: $input_file EOF # Okay, these are the fields that the UW IMAP daemon cares about # Status # X-Status # X-Keywords # And, these are the fields that Netscape cares about # Status # X-Mozilla-Status # X-Mozilla-Status2 # I have no idea what a Keyword is, so I'll just pass one of those # headers through if I find it. The UW IMAPD source puts the R and O # bits in Status and everything else in X-Status, so that's what we'll do. # Here are what the UW IMAP daemon considers to be status flags # (see osdep/unix/unix.c) # R - message read # O - message old # D - message deleted # F - message flagged # A - message answered # T - message is a draft # Mozilla code borrowed from mozilla.org: # http://lxr.mozilla.org/seamonkey/source/mailnews/base/public/nsMsgMessageFlags.h#32 # 32 #define MSG_FLAG_READ 0x0001 /* has been read */ # 33 #define MSG_FLAG_REPLIED 0x0002 /* a reply has been successfully sent */ # 34 #define MSG_FLAG_MARKED 0x0004 /* the user-provided mark */ # 35 #define MSG_FLAG_EXPUNGED 0x0008 /* already gone (when folder not # 36 compacted.) Since actually # 37 removing a message from a # 38 folder is a semi-expensive # 39 operation, we tend to delay it; # 40 messages with this bit set will # 41 be removed the next time folder # 42 compaction is done. Once this # 43 bit is set, it never gets # 44 un-set. */ # 45 #define MSG_FLAG_HAS_RE 0x0010 /* whether subject has "Re:" on # 46 the front. The folder summary # 47 uniquifies all of the strings # 48 in it, and to help this, any # 49 string which begins with "Re:" # 50 has that stripped first. This # 51 bit is then set, so that when # 52 presenting the message, we know # 53 to put it back (since the "Re:" # 54 is not itself stored in the # 55 file.) */ # The last flag is just awful, so we'll ignore it. Netscape appears to still # have the "Re:"'s in the file, anyway. The other flags seem to map nicely # from Mozilla status to UW IMAP status. There are lots of other Mozilla # flags, but none of them seem to map into nice IMAP flags, so we'll ignore # those as well. &counter(); @headerLines = (); $initialFromLine = ""; $mozillaStatus = ""; $keywords = ""; while() { chomp; # gets the \n chop if /\r$/; # gets any \r that might be remaining # this is a stupid state machine, and $headerMode says whether # we're chomping on headers or body data if($headerMode) { if (/^$/) { $headerMode = 0; # okay, print out the headers and continue on if($initialFromLine eq "") { $now_time = localtime; printf $outfile "From - %s\n", $now_time; } else { # UW imapd is picky about the format of the # date string. Try to fix it up if it's broken. # If we can't fix it, we'll generate one from scratch. # thanks to Jon Earle # for this patch if ($initialFromLine =~ m|(.*),(.*)|) { my $tmpVar1 = $1; my $wrkVar = $2; if ($wrkVar =~ m|(.*)\s(.*)\s(.*)\s(.*)|) { $initialFromLine = $tmpVar1." ".$2.$1." ".$4." ".$3; } } if ($initialFromLine ne "") { printf $outfile "%s\n", $initialFromLine; } else { $now_time = localtime(); printf $outfile "From - %s\n", $now_time; } } if($mozillaStatus eq "") { printf $outfile "Status: RO\nX-Status:\n"; } else { # parse the Mozilla status ($junk, $status) = split(/ /, $mozillaStatus); $numericStatus = oct("0x$status") & 0xf; printf $outfile "Status: %s\n", ($numericStatus & 0x1)?"RO":"O"; printf $outfile "X-Status: %s%s%s\n", ($numericStatus & 0x2) ? "A":"", ($numericStatus & 0x4) ? "F":"", ($numericStatus & 0x8) ? "D":""; } printf $outfile "%s\n", $keywords unless $keywords eq ""; # print the rest of the headers plus a blank line printf $outfile "%s\n\n", join("\n", @headerLines); # reset the state and fall through @headerLines = (); $initialFromLine = ""; $mozillaStatus = ""; $keywords = ""; } elsif (/^From /) { $initialFromLine = $_; } elsif (/^Status:/ || /^X-Status:/) { # just ignore it. What we really want is in the Mozilla status } elsif (/^X-Keywords:/) { $keywords = $_; } elsif (/^X-Mozilla-Status:/) { $mozillaStatus = $_; push @headerLines, $_; } else { push @headerLines, $_; } } else { if (/^From /) { $headerMode = 1; $initialFromLine = $_; &counter(); } else { print $outfile "$_\n"; } } } &tally_counter(); close(FILE); close($outfile); return 1; } ######### # main ######### if(! &NGetOpt("file", "fe=s", "re", "in=s", "tr", "quiet", "verbose", "safe", "noconvert") || ! (@ARGV+0)) { print <