tags:

views:

305

answers:

2

I am looking for a web service which can can extract important keywords from a piece of text.

I have already tried the Yahoo Term Extraction service. The problem with this service is that it does not give any results for short text.

Alternatively is there any ready to use code I can use which can extract the important keywords from a piece of text. i.e. remove all generic words from the string.

For example:

"I want to buy a digital camera"

terms: "digital", "camera"

Thanks.

There are two other stack overflow questions which are related and have more info:

What is a simple way to generate keywords from a text? Filter out common words for search query

+1  A: 

I know some people that have had some success in using the WordsFinder service.

Mitchel Sellers
+2  A: 

You may want to look at www.opencalais.com (associated with Reuters) is a web service that hands

Your text "I want to buy a digital camera" returns this rdf / xml document --

 <!--Use of the Calais Web Service is governed by the Terms of Service located at http://www.opencalais.com. By using this service or the results of the service you agree to these terms of service.-->
 <!--Relations: GenericRelations

 Technology: digital camera-->
 <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:c="http://s.opencalais.com/1/pred/"&gt;
  <rdf:Description c:allowDistribution="true" c:allowSearch="true" c:calaisRequestID="1ef6064f-283c-4fd4-a922-0ff493c4353a" c:externalID="calaisbridge" c:id="http://id.opencalais.com/SLlKCS2i2mZA3ABrQS0F9Q" rdf:about="http://d.opencalais.com/dochash-1/97cdaf47-fa15-31a1-be2b-3be1184d412a"&gt;
   <rdf:type rdf:resource="http://s.opencalais.com/1/type/sys/DocInfo" />
   <c:document>
 <![CDATA[<Document>
 <Date>2009-04-03</Date>
 <Body>I want to buy a digital camera</Body>
 </Document>]]>
 </c:document>
   <c:docTitle />
   <c:docDate>2009-04-03 00:00:00</c:docDate>
   <c:externalMetadata c:caller="calaisbridge" />
   <c:submitter>calaisbridge</c:submitter>
  </rdf:Description>
  <rdf:Description c:contentType="text/txt" c:emVer="UserVocabulariesIM" c:langIdVer="DefaultLangId" c:language="InputTextTooShort" c:processingVer="CalaisJob01" c:submissionDate="2009-04-03 14:14:42.532" rdf:about="http://d.opencalais.com/dochash-1/97cdaf47-fa15-31a1-be2b-3be1184d412a/meta"&gt;
   <rdf:type rdf:resource="http://s.opencalais.com/1/type/sys/DocInfoMeta" />
   <c:docId rdf:resource="http://d.opencalais.com/dochash-1/97cdaf47-fa15-31a1-be2b-3be1184d412a" />
   <c:submitterCode>416dcd8a-766f-0aa3-d94c-e5034b6ffc98</c:submitterCode>
   <c:signature>digestalg-1|sUmdk2pKaXLrsD0b2sNfX5dPvW4=|e+F5sMjqxqj0Qi+efzdG5D2s1TKBM//zH+NI1MNYvugY3FS9e3xP6g==</c:signature>
  </rdf:Description>
  <rdf:Description rdf:about="http://d.opencalais.com/dochash-1/97cdaf47-fa15-31a1-be2b-3be1184d412a/lid/DefaultLangId"&gt;
   <rdf:type rdf:resource="http://s.opencalais.com/1/type/lid/DefaultLangId" />
   <c:docId rdf:resource="http://d.opencalais.com/dochash-1/97cdaf47-fa15-31a1-be2b-3be1184d412a" />
   <c:lang rdf:resource="http://d.opencalais.com/lid/DefaultLangId/InputTextTooShort" />
  </rdf:Description>
  <rdf:Description rdf:about="http://d.opencalais.com/genericHasher-1/e224e552-7ebd-3ed1-aaa4-f8aba30331c2"&gt;
   <rdf:type rdf:resource="http://s.opencalais.com/1/type/em/e/Technology" />
   <c:name>digital camera</c:name>
  </rdf:Description>
  <rdf:Description rdf:about="http://d.opencalais.com/dochash-1/97cdaf47-fa15-31a1-be2b-3be1184d412a/Instance/1"&gt;
   <rdf:type rdf:resource="http://s.opencalais.com/1/type/sys/InstanceInfo" />
   <c:docId rdf:resource="http://d.opencalais.com/dochash-1/97cdaf47-fa15-31a1-be2b-3be1184d412a" />
   <c:subject rdf:resource="http://d.opencalais.com/genericHasher-1/e224e552-7ebd-3ed1-aaa4-f8aba30331c2" />
   <!--Technology: digital camera; -->
   <c:detection>[ment&gt;&lt;Date&gt;2009-04-03&lt;/Date&gt;&lt;Body&gt;I want to buy a ]digital camera[&lt;/Body&gt;&lt;/Document&gt;]</c:detection>
   <c:prefix>ment&gt;&lt;Date&gt;2009-04-03&lt;/Date&gt;&lt;Body&gt;I want to buy a </c:prefix>
   <c:exact>digital camera</c:exact>
   <c:suffix>&lt;/Body&gt;&lt;/Document&gt;</c:suffix>
   <c:offset>55</c:offset>
   <c:length>14</c:length>
  </rdf:Description>
  <rdf:Description rdf:about="http://d.opencalais.com/dochash-1/97cdaf47-fa15-31a1-be2b-3be1184d412a/Relevance/1"&gt;
   <rdf:type rdf:resource="http://s.opencalais.com/1/type/sys/RelevanceInfo" />
   <c:docId rdf:resource="http://d.opencalais.com/dochash-1/97cdaf47-fa15-31a1-be2b-3be1184d412a" />
   <c:subject rdf:resource="http://d.opencalais.com/genericHasher-1/e224e552-7ebd-3ed1-aaa4-f8aba30331c2" />
   <c:relevance>0.857</c:relevance>
  </rdf:Description>
  <rdf:Description rdf:about="http://d.opencalais.com/genericHasher-1/e8eac39c-f280-331e-9ccd-07f740d46ddb"&gt;
   <rdf:type rdf:resource="http://s.opencalais.com/1/type/em/r/GenericRelations" />
   <c:verb>buy</c:verb>
   <c:relationsubject>I</c:relationsubject>
   <!--digital camera-->
   <c:relationobject rdf:resource="http://d.opencalais.com/genericHasher-1/e224e552-7ebd-3ed1-aaa4-f8aba30331c2" />
  </rdf:Description>
  <rdf:Description rdf:about="http://d.opencalais.com/dochash-1/97cdaf47-fa15-31a1-be2b-3be1184d412a/Instance/2"&gt;
   <rdf:type rdf:resource="http://s.opencalais.com/1/type/sys/InstanceInfo" />
   <c:docId rdf:resource="http://d.opencalais.com/dochash-1/97cdaf47-fa15-31a1-be2b-3be1184d412a" />
   <c:subject rdf:resource="http://d.opencalais.com/genericHasher-1/e8eac39c-f280-331e-9ccd-07f740d46ddb" />
   <!--GenericRelations: verb: buy; relationsubject: I; relationobject: digital camera; -->
   <c:detection>[&lt;Document&gt;&lt;Date&gt;2009-04-03&lt;/Date&gt;&lt;Body&gt;]I want to buy a digital camera[&lt;/Body&gt;&lt;/Document&gt;]</c:detection>
   <c:prefix>&lt;Document&gt;&lt;Date&gt;2009-04-03&lt;/Date&gt;&lt;Body&gt;</c:prefix>
   <c:exact>I want to buy a digital camera</c:exact>
   <c:suffix>&lt;/Body&gt;&lt;/Document&gt;</c:suffix>
   <c:offset>39</c:offset>
   <c:length>30</c:length>
  </rdf:Description>
 </rdf:RDF>
malsmith
this looks really interesting, and appears to be what I am looking for. thanks a ton.
akshat