@Proceedings{proc_msm2011@eswc2011, title = {Proceedings, 1st Workshop on Making Sense of Microposts {(\#MSM2011)}: Big things come in small packages, Heraklion, Crete, Greece, 30th May 2011}, year = 2011, booktitle = {Making Sense of Microposts {(\#MSM2011)}}, editor = {Matthew Rowe and Milan Stankovic and Aba-Sah Dadzie and Mariann Hardey}, month = {May}, url = {http://ceur-ws.org/Vol-718}, } @InProceedings{weller.ea:2011, author = {Katrin Weller and Evelyn Dr\"oge and Cornelius Puschmann}, title = {Citation Analysis in Twitter: Approaches for Defining and Measuring Information Flows within Tweets during Scientific Conferences}, crossref = {proc_msm2011@eswc2011}, pages = {1--12}, booktitle = {Making Sense of Microposts {(\#MSM2011)}}, year = 2011, abstract = {This paper investigates Twitter usage in scientific contexts, particularly the use of Twitter during scientific conferences. It proposes a methodology for capturing and analyzing citations/references in Twitter. First results are presented based on the analysis of tweets gathered for two conference hashtags.}, keywords = {Twitter; microblogging; tweets; citation analysis; informetrics}, url = {http://ceur-ws.org/Vol-718/paper_04.pdf}, } @InProceedings{celino.ea:2011, author = {Irene Celino and Daniele Dell'Aglio and Emanuele Della Valle and Yi Huang and Tony Lee and Stanley Park and Volker Tresp}, title = {Making Sense of Location Based Micro-posts Using Stream Reasoning}, crossref = {proc_msm2011@eswc2011}, pages = {13--18}, booktitle = {Making Sense of Microposts {(\#MSM2011)}}, year = 2011, abstract = {Consider an urban environment and think to its semi-public realms (e.g., shops, bars, visitors attractions, means of transportation). Who is the maven of a district? How fast and how broad can such maven influence the opinions of others? These are just few of the questions BOTTARI (our Location-based Social Media Analysis mobile app) is getting ready to answer. In this position paper, we recap our investigation on deductive and inductive stream reasoning for social media analysis, and we show how the results of this research form the underpinning of BOTTARI.}, url = {http://ceur-ws.org/Vol-718/paper_05.pdf}, } @InProceedings{huron.ea:2011, author = {Samuel Huron and Yves-Marie Haussonne and Alexandre Monnin and Yves-Marie L'hour}, title = {DEMO: Polemical Video Annotation by Twitter}, crossref = {proc_msm2011@eswc2011}, pages = {19--21}, booktitle = {Making Sense of Microposts {(\#MSM2011)}}, year = 2011, abstract = {In this paper we present a method to enhance video metadata by using microposts generated through social interactions during live events. Our goal is to make visible the audience "polemical activity" (the exchange of arguments, counter-arguments and references) elicited by the talk, and use it as a tool to browse the video record. To achieve it, we design a new interface and service that makes a synthetic view of microposts interaction.}, keywords = {micropost; annotation; video; social interactions; live; polemic }, url = {http://ceur-ws.org/Vol-718/paper_11.pdf}, } @InProceedings{choudhury.ea:2011, author = {Smitashree Choudhury and John Breslin}, title = {Extracting Semantic Entities and Events from Sports Tweets}, crossref = {proc_msm2011@eswc2011}, pages = {22--32}, booktitle = {Making Sense of Microposts {(\#MSM2011)}}, year = 2011, abstract = {Large volumes of user-generated content on practically every major issue and event are being created on the microblogging site Twitter. This content can be combined and processed to detect events, entities and popular moods to feed various knowledge-intensive practical applications. On the downside, these content items are very noisy and highly informal, making it difficult to extract sense out of the stream. In this paper, we exploit various approaches to detect the named entities and significant micro-events from users' tweets during a live sports event. Here we describe how combining linguistic features with background knowledge and the use of Twitter-specific features can achieve high, precise detection results (f-measure = 87\%) in different datasets. A study was conducted on tweets from cricket matches in the ICC World Cup in order to augment the event-related non-textual media with collective intelligence.}, url = {http://ceur-ws.org/Vol-718/paper_17.pdf}, } @InProceedings{cano.ea:2011, author = {Amparo E. Cano and Simon Tucker and Fabio Ciravegna}, title = {Follow Me: Capturing Entity-Based Semantics Emerging from Personal Awareness Streams}, crossref = {proc_msm2011@eswc2011}, pages = {33--44}, booktitle = {Making Sense of Microposts {(\#MSM2011)}}, year = 2011, abstract = {Social activity streams provide information both about the user's interests and about the way in which they engage with real world entities. Recent research has provided evidence of the presence of emergent semantics in such streams. In this work, we explore whether the online discourse of user's social activities can convey meaningful contextual information. We introduce a user-centric methodology based on tensor analysis for deriving personal vocabularies given an entity-based context. By extracting entities (e.g. location, organisation, people) from the user's stream content, we explore the data structures that emerge from the user's interrelationship with these entities. Our experimental results revealed that the simultaneous correlation of entities leads to the identification of concepts which are relevant to the user given a specific context. This methodology is relevant for mobile application designers (1) in fostering user entity-based ontologies for merging user context in pervasive environments, (2) for personalising entity-based recommendations.}, keywords = {linked data streams; social awareness streams; microblogging; context}, url = {http://ceur-ws.org/Vol-718/paper_08.pdf}, } @InProceedings{gentile.ea:2011, author = {Anna Lisa Gentile and Amparo Elizabeth Cano Basave and Aba-Sah Dadzie and Vitaveska Lanfranchi and Neil Ireson}, title = {Does Size Matter? When Small is Good Enough}, crossref = {proc_msm2011@eswc2011}, pages = {45--56}, booktitle = {Making Sense of Microposts {(\#MSM2011)}}, year = 2011, abstract = {This paper reports the observation of the influence of the size of documents on the accuracy of a defined text processing task. Our hypothesis is that based on a specific task (in this case, topic classification), results obtained using longer texts may be approximated by short texts, of micropost size, i.e., maximum length 140 characters. Using an email dataset as the main corpus, we generate several fixed-size corpora, consisting of truncated emails, from micropost size (140 characters), and successive multiples thereof, to the full size of each email. Our methodology consists of two steps: (1) corpus-driven topic extraction and (2) document topic classification. We build the topic representation model using the main corpus, through k-means clustering, with each k-derived topic represented as a weighted number of terms. We then perform document classification according to the k topics: first over the main corpus, then over each truncated corpus, and observe the variance in classification accuracy with document size. The results obtained show that the accuracy of topic classification for micropost-size texts is a suitable approximation of classification performed on longer texts.}, keywords = {Short Messages; Email Processing; Text Processing; Document classification.}, url = {http://ceur-ws.org/Vol-718/paper_09.pdf}, } @InProceedings{milikic.ea:2011, author = {Nikola Milikic and Jelena Jovanovic and Milan Stankovic}, title = {Discovering the Dynamics of Terms' Semantic Relatedness through Twitter}, crossref = {proc_msm2011@eswc2011}, pages = {57--68}, booktitle = {Making Sense of Microposts {(\#MSM2011)}}, year = 2011, abstract = {Determining the semantic relatedness (SR) of two terms has been an appealing topic in information retrieval for many years as such information is useful for various tasks ranging from tag recommendation, over search query refinement to suggesting new web resources for the user to discover. Most approaches consider the SR of terms as static over time, and disregard the eventual temporal changes as imperfections. However, detecting and tracing changes in SR of terms over time may help in understanding the nature of changes in public opinion, as well as the change in the usage of terms in common language and jargon. In this paper, we propose an approach that makes use of microposts data in order to establish a dynamic measure of SR of terms, i.e., a measure that accounts for the changes in SR over time. We propose different scenarios of use (in online advertising and organizational knowledge management) which demonstrate the applicability of our approach in real life situations. We also provide a demo application for visualizing the change in micropost-based SR of terms.}, keywords = {Semantic relatedness; dynamic measure of semantic relatedness; microposts; Twitter}, url = {http://ceur-ws.org/Vol-718/paper_02.pdf}, } @InProceedings{skilters.ea:2011, author = {Jur\'gis {\u S}{\c k}ilters and Monika Kreile and Uldis Boj\=ars and Inta Brik{\u s}e and J\=anis Pencis and Laura Uzule}, title = {The Pragmatics of Political Messages in Twitter Communication}, crossref = {proc_msm2011@eswc2011}, pages = {69--80}, booktitle = {Making Sense of Microposts {(\#MSM2011)}}, year = 2011, abstract = {The aim of the current paper is to formulate a conception of pragmatic patterns characterizing the construction of individual and collective identities in virtual communities (in our case: the Twitter community). We have explored several theoretical approaches and frameworks and relevant empirical data to show that the agents building virtual communities are 'extended selves' grounded in a highly dynamic and compressed, linguistically mediated virtual network structure. Our empirical evidence consists of a study of discourse related to the Latvian parliamentary elections of 2010. We used a Twitter corpus (in Latvian) harvested and statistically evaluated using the Pointwise Mutual Information (PMI) algorithm and complemented with qualitative and quantitative content analysis.}, keywords = {Twitter; virtual identity; social science; political messages}, url = {http://ceur-ws.org/Vol-718/paper_18.pdf}, } @InProceedings{maynard.ea:2011, author = {Diana Maynard and Adam Funk}, title = {Automatic Detection of Political Opinions in Tweets}, crossref = {proc_msm2011@eswc2011}, pages = {81--92}, booktitle = {Making Sense of Microposts {(\#MSM2011)}}, year = 2011, abstract = {In this paper, we discuss a variety of issues related to opinion mining from microposts, and the challenges they impose on an NLP system, along with an example application we have developed to deter- mine political leanings from a set of pre-election tweets. While there are a number of sentiment analysis tools available which summarise positive, negative and neutral tweets about a given keyword or topic, these tools generally produce poor results, and operate in a fairly simplistic way, using only the presence of certain positive and negative adjectives as indicators, or simple learning techniques which do not work well on short microposts. On the other hand, intelligent tools which work well on movie and customer reviews cannot be used on microposts due to their brevity and lack of context. Our methods make use of a variety of sophisticated NLP techniques in order to extract more meaningful and higher quality opinions, and incorporate extra-linguistic contextual information.}, keywords = {NLP; opinion mining; social media analysis}, url = {http://ceur-ws.org/Vol-718/paper_19.pdf}, } @InProceedings{nielsen:2011, author = {Finn \r{A}rup Nielsen}, title = {A New {ANEW}: Evaluation of a Word List for Sentiment Analysis in Microblogs}, crossref = {proc_msm2011@eswc2011}, pages = {93--98}, booktitle = {Making Sense of Microposts {(\#MSM2011)}}, year = 2011, abstract = {Sentiment analysis of microblogs such as Twitter has recently gained a fair amount of attention. One of the simplest sentiment analysis approaches compares the words of a posting against a labeled word list, where each word has been scored for valence, -- a "sentiment lexicon" or "affective word lists". There exist several affective word lists, e.g., ANEW (Affective Norms for English Words) developed before the advent of microblogging and sentiment analysis. I wanted to examine how well ANEW and other word lists performs for the detection of sentiment strength in microblog posts in comparison with a new word list specifically constructed for microblogs. I used manually labeled postings from Twitter scored for sentiment. Using a simple word matching I show that the new word list may perform better than ANEW, though not as good as the more elaborate approach found in SentiStrength.}, url = {http://ceur-ws.org/Vol-718/paper_16.pdf}, }