# # Sample links dictionary file for Seth Golub's txt2html v1.19 # http://www.cs.wustl.edu/~seth/txt2html/ # # This dictionary contains some patterns for converting obvious URLs, # ftp sites, hostnames, email addresses and the like to hrefs. # # Adapted shamelessly from the html.pl package by Oscar Nierstrasz in # the Software Archive of the Software Composition Group # http://iamwww.unibe.ch/~scg/Src/ # # Email suggestions to seth@cs.wustl.edu # Please include "txt2html" in the subject of your message. # # Urls: : |snews:[\w\.]+| -> $& |http:[\w/\.:+\-~\%#?]+| -> $& |shttp:[\w/\.:+\-~\%#?]+| -> $& |https:[\w/\.:+\-~\%#?]+| -> $& |file:[\w/\.:+\-]+| -> $& |ftp:[\w/\.:+\-]+| -> $& |wais:[\w/\.:+\-]+| -> $& |gopher:[\w/\.:+\-]+| -> $& |telnet:[\w/\.:+\-]+| -> $& # catch some newsgroups to avoid confusion with sites: |([^\w\-/\.:\@>])(alt\.[\w\.+\-]+[\w+\-]+)| -h-> $1$2 |([^\w\-/\.:\@>])(bionet\.[\w\.+\-]+[\w+\-]+)| -h-> $1$2 |([^\w\-/\.:\@>])(bit\.[\w\.+\-]+[\w+\-]+)| -h-> $1$2 |([^\w\-/\.:\@>])(biz\.[\w\.+\-]+[\w+\-]+)| -h-> $1$2 |([^\w\-/\.:\@>])(clari\.[\w\.+\-]+[\w+\-]+)| -h-> $1$2 |([^\w\-/\.:\@>])(comp\.[\w\.+\-]+[\w+\-]+)| -h-> $1$2 |([^\w\-/\.:\@>])(gnu\.[\w\.+\-]+[\w+\-]+)| -h-> $1$2 |([^\w\-/\.:\@>])(humanities\.[\w\.+\-]+[\w+\-]+)| -h-> $1$2 |([^\w\-/\.:\@>])(k12\.[\w\.+\-]+[\w+\-]+)| -h-> $1$2 |([^\w\-/\.:\@>])(misc\.[\w\.+\-]+[\w+\-]+)| -h-> $1$2 |([^\w\-/\.:\@>])(news\.[\w\.+\-]+[\w+\-]+)| -h-> $1$2 |([^\w\-/\.:\@>])(rec\.[\w\.+\-]+[\w+\-]+)| -h-> $1$2 |([^\w\-/\.:\@>])(soc\.[\w\.+\-]+[\w+\-]+)| -h-> $1$2 |([^\w\-/\.:\@>])(talk\.[\w\.+\-]+[\w+\-]+)| -h-> $1$2 |([^\w\-/\.:\@>])(us\.[\w\.+\-]+[\w+\-]+)| -h-> $1$2 |([^\w\-/\.:\@>])(ch\.[\w\.+\-]+[\w+\-]+)| -h-> $1$2 |([^\w\-/\.:\@>])(de\.[\w\.+\-]+[\w+\-]+)| -h-> $1$2 # FTP locations (with directory): # anonymous@: |(anonymous\@)([a-zA-Z][\w\.+\-]+\.[a-zA-Z]{2,}):(\s*)([\w\d+\-/\.]+)| -h-> $1$2:$4$3 # ftp@: |(ftp\@)([a-zA-Z][\w\.+\-]+\.[a-zA-Z]{2,}):(\s*)([\w\d+\-/\.]+)| -h-> $1$2:$4$3 # Email address |[a-zA-Z0-9_\+\-\.]+\@([a-zA-Z][\w\.+\-]+\.[a-zA-Z]{2,})| -> mailto:$& # : |([^\w\-/\.:\@>])([a-zA-Z][\w\.+\-]+\.[a-zA-Z]{2,}):(\s*)([\w\d+\-/\.]+)| -h-> $1$2:$4$3 # NB: don't confuse an http server with a port number for # an FTP location! # internet number version: : |([^\w\-/\.:\@])(\d{2,}\.\d{2,}\.\d+\.\d+):([\w\d+\-/\.]+)| -h-> $1$2:$3 # telnet |telnet ([a-zA-Z][\w+\-]+(\.[\w\.+\-]+)+\.[a-zA-Z]{2,})\s+(\d{2,4})| -h-> telnet $1 $3 # ftp |ftp ([a-zA-Z][\w+\-]+(\.[\w\.+\-]+)+\.[a-zA-Z]{2,})| -h-> ftp $1 # host with "ftp" in the machine name |([a-zA-Z][\w+\-]*ftp[a-zA-Z][\w+\-]*\.[\w\.+\-]+\.[a-zA-Z]{2,})([^\w\d\-/\.:!])| -h-> ftp $1$2 # |([a-zA-Z][\w+\-]+\.[\w+\-]+\.[a-zA-Z]{2,})\s+(\d{2,4})| -h-> $1 $2 # just the site name: |([^\w\-/\.:\@>])([a-zA-Z][\w+\-]+(\.[\w+\-]+)+\.[a-zA-Z]{2,})| -h-> $1$2 # just internet numbers with port: |([^\w\-/\.:\@])(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\s+(\d{1,4})| -h-> $1$2 $3 # just internet numbers: |([^\w\-/\.:\@])(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})| -h-> $1$2 # (see "relative path") as used by Tom Fine # /\(see \"([^\"]+)\"\)/ -> $1.html # RFCs /RFC ?(\d+)/ -i-> http://www.cis.ohio-state.edu:82/rfc/rfc$1.html # This would turn "f^H_o^H_o^H_" into "foo". Gross, isn't it? # Thanks to Mark O'Dell for fixing this. # # /(.\\010_)+/ -he-> $tmp = $&;$tmp =~ s@\010_@@g;"$tmp" # /(_\\010.)+/ -he-> $tmp = $&;$tmp =~ s@_\010@@g;"$tmp" # /(.\^H_)+/ -he-> $tmp = $&;$tmp =~ s@\^H_@@g;"$tmp" # /(_\^H.)+/ -he-> $tmp = $&;$tmp =~ s@_\^H@@g;"$tmp" # Seth and his amazing conversion program :-) "Seth Golub" -io-> http://www.cs.wustl.edu/~seth/ "txt2html" -io-> http://www.cs.wustl.edu/~seth/txt2html/ # End of sample dictionary