diff --git a/identify/id.go b/identify/id.go new file mode 100644 index 0000000..9d4ae65 --- /dev/null +++ b/identify/id.go @@ -0,0 +1,47 @@ +package identify + +import ( + "os/exec" + "regexp" +) + + +type Indetification struct { + word string + Identifier func(*Indetification) (string, error) +} + +func New(word string) Indetification { + return Indetification{ + word: word, + Identifier: executeTransShell, + } +} + +func executeTransShell(i *Indetification) (string, error) { + config := []string{ + "-no-ansi", + "-id", + i.word, + } + outBytes, err := exec.Command("trans", config...).Output() + return string(outBytes), err +} + +func (i Indetification) Identify() string { + output, err := i.Identifier(&i) + if err != nil { + panic(err) + } + return parseTransIdentifyOutput(output) +} + +func parseTransIdentifyOutput(out string) string { + re := regexp.MustCompile(`(?m)^Code\s+(\w+)$`) + result := "" + matches := re.FindStringSubmatch(out) + if (len(matches) >= 2) { + result = matches[1] + } + return result +} diff --git a/identify/id_test.go b/identify/id_test.go new file mode 100644 index 0000000..9ff3a2b --- /dev/null +++ b/identify/id_test.go @@ -0,0 +1,86 @@ +package identify_test + +import ( + "testing" + "." + "os" +) + + +type testData struct { + word string + identifyOutput string + expectedCode string +} + +func TestIdentify(t *testing.T) { + cases := []testData{ + testData{ +"macska", +`Magyar +Name Hungarian +Family Uralic +Writing system Latin +Code hu +ISO 639-3 hun +SIL http://www-01.sil.org/iso639-3/documentation.asp?id=hun +Glottolog http://glottolog.org/resource/languoid/id/hung1274 +Wikipedia http://en.wikipedia.org/wiki/Hungarian_language +`, +"hu", + }, + testData{ +"cat", +`English +Name English +Family Indo-European +Writing system Latin +Code en +ISO 639-3 eng +SIL http://www-01.sil.org/iso639-3/documentation.asp?id=eng +Glottolog http://glottolog.org/resource/languoid/id/stan1293 +Wikipedia http://en.wikipedia.org/wiki/English_language +`, +"en", + }, + testData{ +"szofisztikált", +`Magyar +Name Hungarian +Family Uralic +Writing system Latin +Code hu +ISO 639-3 hun +SIL http://www-01.sil.org/iso639-3/documentation.asp?id=hun +Glottolog http://glottolog.org/resource/languoid/id/hung1274 +Wikipedia http://en.wikipedia.org/wiki/Hungarian_language +`, +"hu", + }, + testData{ +"distribute", +`English +Name English +Family Indo-European +Writing system Latin +Code en +ISO 639-3 eng +SIL http://www-01.sil.org/iso639-3/documentation.asp?id=eng +Glottolog http://glottolog.org/resource/languoid/id/stan1293 +Wikipedia http://en.wikipedia.org/wiki/English_language +`, +"en", + }, + } + for _, data := range cases { + id := identify.New(data.word) + if _, ok := os.LookupEnv("NOMOCK"); !ok { + id.Identifier = func(i *identify.Indetification) (string, error) { + return data.identifyOutput, nil + } + } + if id.Identify() != data.expectedCode { + t.Errorf("Word '%s' should identify to '%s'!", data.word, data.expectedCode) + } + } +}