Add package identify to get language identification from Google
This commit is contained in:
parent
80eb2de872
commit
4aa5a8b0a9
47
identify/id.go
Normal file
47
identify/id.go
Normal file
@ -0,0 +1,47 @@
|
||||
package identify
|
||||
|
||||
import (
|
||||
"os/exec"
|
||||
"regexp"
|
||||
)
|
||||
|
||||
|
||||
type Indetification struct {
|
||||
word string
|
||||
Identifier func(*Indetification) (string, error)
|
||||
}
|
||||
|
||||
func New(word string) Indetification {
|
||||
return Indetification{
|
||||
word: word,
|
||||
Identifier: executeTransShell,
|
||||
}
|
||||
}
|
||||
|
||||
func executeTransShell(i *Indetification) (string, error) {
|
||||
config := []string{
|
||||
"-no-ansi",
|
||||
"-id",
|
||||
i.word,
|
||||
}
|
||||
outBytes, err := exec.Command("trans", config...).Output()
|
||||
return string(outBytes), err
|
||||
}
|
||||
|
||||
func (i Indetification) Identify() string {
|
||||
output, err := i.Identifier(&i)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return parseTransIdentifyOutput(output)
|
||||
}
|
||||
|
||||
func parseTransIdentifyOutput(out string) string {
|
||||
re := regexp.MustCompile(`(?m)^Code\s+(\w+)$`)
|
||||
result := ""
|
||||
matches := re.FindStringSubmatch(out)
|
||||
if (len(matches) >= 2) {
|
||||
result = matches[1]
|
||||
}
|
||||
return result
|
||||
}
|
86
identify/id_test.go
Normal file
86
identify/id_test.go
Normal file
@ -0,0 +1,86 @@
|
||||
package identify_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"."
|
||||
"os"
|
||||
)
|
||||
|
||||
|
||||
type testData struct {
|
||||
word string
|
||||
identifyOutput string
|
||||
expectedCode string
|
||||
}
|
||||
|
||||
func TestIdentify(t *testing.T) {
|
||||
cases := []testData{
|
||||
testData{
|
||||
"macska",
|
||||
`Magyar
|
||||
Name Hungarian
|
||||
Family Uralic
|
||||
Writing system Latin
|
||||
Code hu
|
||||
ISO 639-3 hun
|
||||
SIL http://www-01.sil.org/iso639-3/documentation.asp?id=hun
|
||||
Glottolog http://glottolog.org/resource/languoid/id/hung1274
|
||||
Wikipedia http://en.wikipedia.org/wiki/Hungarian_language
|
||||
`,
|
||||
"hu",
|
||||
},
|
||||
testData{
|
||||
"cat",
|
||||
`English
|
||||
Name English
|
||||
Family Indo-European
|
||||
Writing system Latin
|
||||
Code en
|
||||
ISO 639-3 eng
|
||||
SIL http://www-01.sil.org/iso639-3/documentation.asp?id=eng
|
||||
Glottolog http://glottolog.org/resource/languoid/id/stan1293
|
||||
Wikipedia http://en.wikipedia.org/wiki/English_language
|
||||
`,
|
||||
"en",
|
||||
},
|
||||
testData{
|
||||
"szofisztikált",
|
||||
`Magyar
|
||||
Name Hungarian
|
||||
Family Uralic
|
||||
Writing system Latin
|
||||
Code hu
|
||||
ISO 639-3 hun
|
||||
SIL http://www-01.sil.org/iso639-3/documentation.asp?id=hun
|
||||
Glottolog http://glottolog.org/resource/languoid/id/hung1274
|
||||
Wikipedia http://en.wikipedia.org/wiki/Hungarian_language
|
||||
`,
|
||||
"hu",
|
||||
},
|
||||
testData{
|
||||
"distribute",
|
||||
`English
|
||||
Name English
|
||||
Family Indo-European
|
||||
Writing system Latin
|
||||
Code en
|
||||
ISO 639-3 eng
|
||||
SIL http://www-01.sil.org/iso639-3/documentation.asp?id=eng
|
||||
Glottolog http://glottolog.org/resource/languoid/id/stan1293
|
||||
Wikipedia http://en.wikipedia.org/wiki/English_language
|
||||
`,
|
||||
"en",
|
||||
},
|
||||
}
|
||||
for _, data := range cases {
|
||||
id := identify.New(data.word)
|
||||
if _, ok := os.LookupEnv("NOMOCK"); !ok {
|
||||
id.Identifier = func(i *identify.Indetification) (string, error) {
|
||||
return data.identifyOutput, nil
|
||||
}
|
||||
}
|
||||
if id.Identify() != data.expectedCode {
|
||||
t.Errorf("Word '%s' should identify to '%s'!", data.word, data.expectedCode)
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user