# Abfrageprozessor für Mr.Check (Fremdwörterlexikon)

#!/usr/bin/tclsh8.3
###############################################################################
#
# Abfrageprozessor für Mr.Check (Fremdwörterlexikon)
# Erik Buchmann, August '01
#

regsub {[ ]+} $argv {+} search

set header ""
set content ""

global header content

################################
# Funktionen
proc getServer {client_request server} {
global header content

regsub -all -lineanchor "^\[\t \n\]+" [string trim $client_request] "" client_request

# Socket aufmachen und Daten holen
if {[catch {set server_channel [socket $server 80]}]} {
puts "Got Error: Server not found"
exit
}

fconfigure $server_channel -translation crlf -blocking true
puts $server_channel "$client_request\n"
flush $server_channel

#Header holen
set line "foo"
set header ""
while {$line!="" } {
set line [gets $server_channel]
set header "$header\n$line"
}

# Daten holen
fconfigure $server_channel -translation binary -blocking true
set content [read $server_channel]

close $server_channel
}

################################
# Anfrage nach Location aufbauen

set client_request "\
GET /v2.0/Mrcheck.php?CID=MrcheckDGFW&SB=$search HTTP/1.0
Connection: close
User-Agent: lex.tcl
Host: mr-check.xipolis.net
Accept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, image/png, */*
Accept-Language: en
Accept-Charset: iso-8859-1,*,utf-8"

getServer $client_request "mr-check.xipolis.net"

######################
# Location extrahieren
regexp -lineanchor {(Location: http://)([^/]+)([^
]*)} $header nix nix server location

# Location umformen
regsub {Mrcheck.php.*$} $location "main.php" location

################################
# Anfrage nach Inhalt aufbauen

set client_request "\
GET $location HTTP/1.0
Connection: close
User-Agent: lex.tcl
Host: $server
Accept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, image/png, */*
Accept-Language: en
Accept-Charset: iso-8859-1,*,utf-8"

getServer $client_request $server

#Anfang und Ende abschneiden
regsub {^.*?(\</select\>)} $content {} content
regsub {^.*?(\</table\>)} $content {} content
regsub {(\<img src=).*$} $content {} content

#alle HTML-Tags entfernen
regsub -all -nocase {\<.*?\>} $content {} content

#Umlaute
regsub -all {(ä|ä)} $content {ä} content
regsub -all {(Ä|Ä)} $content {Ä} content
regsub -all {(ö|ö)} $content {ö} content
regsub -all {(Ö|Ö)} $content {Ö} content
regsub -all {(ü|ü)} $content {ü} content
regsub -all {(Ü|Ü)} $content {Ü} content
regsub -all {(ß|ß)} $content {ß} content
regsub -all {<} $content {<} content
regsub -all {>} $content {>} content
regsub -all {(«|«)} $content {«} content
regsub -all {(»|»)} $content {»} content
regsub -all {(ø|ø)} $content {ø} content
regsub -all {(é|é|q|ē)} $content {é} content
regsub -all {(á|á|à)} $content {á} content
regsub -all {(ó|ó)} $content {ó} content
regsub -all {(ý|ý)} $content {ý} content

#Whitespace
regsub -all {[ ]+} $content { } content
regsub -all -lineanchor {^[ ]+} $content {} content

set clist [split $content "\n"]

puts "------------------------------------------------------------"

foreach x $clist {

if {[string length [string trim $x]]>0} {
if {[regexp {^(Quelle:)} $x]} {
puts " "
} elseif {[regexp {^(Hilfe \| schlie.en)} $x ]} {
puts "keine weiteren Ergebnisse gefunden!"
} else {
puts "$x"
}
}
}

puts "------------------------------------------------------------"