@inbook {77, title = {Grounding a Sociable Robot{\textquoteright}s Movements in Multimodal, Situational Engagements}, booktitle = {New Frontiers in Artificial Intelligence}, year = {2014}, pages = {267-281}, publisher = {Springer International Publishing}, organization = {Springer International Publishing}, abstract = {
|
To deal with the question of what a sociable robot is, we describe how an educational robot is encountered by children, teachers and designers in a preschool. We consider the importance of the robot{\textquoteright}s body by focusing on how its movements are contingently embedded in interactional situations. We point out that the effects of agency that these movements generate are inseparable from their grounding in locally coordinated, multimodal actions and interactions. |
Sociable robots are benefiting from machine perception systems that automatically recognize social behavior (e.g., detect and recognize people, recognize their facial expressions and gestures).\
}, author = {Movellan, J. and Malmir, M. and Forster, D.} } @conference {76, title = {Home Alone: Social Robots for Digital Ethnography of Toddler Behavior}, booktitle = {roceedings of the IEEE International Conference on Computer Vision Workshops}, year = {2013}, publisher = {IEEE}, organization = {IEEE}, author = {Malmir, M. and Forster, D. and Youngstrom, K. and Morrison, L. and Movellan, J.} } @conference {75, title = {Design and early evaluation of the RUBI-5 sociable robots}, booktitle = {Development and Learning and Epigenetic Robotics (ICDL), 2012 IEEE International Conference on}, year = {2012}, month = {11/2012}, publisher = {IEEE}, organization = {IEEE}, address = {San Diego, CA}, isbn = {978-1-4673-4964-2}, author = {Johnson, D. and Malmir, M. and Forster, D. and Ala{\v c}, M. and Movellan, J.} } @conference {72, title = {Automated facial affect analysis for one-on-one tutoring applications}, booktitle = {2011 IEEE International Conference on Automatic Face Gesture Recognition and Workshops (FG 2011)}, year = {2011}, month = {03/2011}, publisher = {IEEE}, organization = {IEEE}, address = {Santa Barbara, CA}, abstract = {In this paper, we explore the use of computer vision techniques to analyze students{\textquoteright} moods during one-on-one teaching interactions. The eventual goal is to create automated tutoring systems that are sensitive to the student{\textquoteright}s mood and affective state. We find that the problem of accurately determining a child{\textquoteright}s mood from a single video frame is surprisingly difficult, even for humans. However when the system is allowed to make decisions based on information from 10 to 30 seconds of video, excellent performance may be obtained.
}, keywords = {automated facial affect analysis, automated tutoring system, behavioural sciences computing, computer vision technique, Context, decision making, education, Emotion recognition, face recognition, Human, human computer interaction, Labeling, Machine Learning, Mood, n Histograms, one-on-one tutoring application, s Intelligent tutoring systems, student mood analysis}, isbn = {978-1-4244-9140-7}, author = {Butko, N. and Theocharous, G. and Philipose, M. and Movellan, J.} } @conference {73, title = {The computer expression recognition toolbox (CERT)}, booktitle = {2011 IEEE International Conference on Automatic Face Gesture Recognition and Workshops (FG 2011)}, year = {2011}, month = {03/2011}, publisher = {IEEE}, organization = {IEEE}, address = {Santa Barbara, CA}, abstract = {We present the Computer Expression Recognition Toolbox (CERT), a software tool for fully automatic real-time facial expression recognition, and officially release it for free academic use. CERT can automatically code the intensity of 19 different facial actions from the Facial Action Unit Coding System (FACS) and 6 different prototypical facial expressions. It also estimates the locations of 10 facial features as well as the 3-D orientation (yaw, pitch, roll) of the head. On a database of posed facial expressions, Extended Cohn-Kanade (CK+[1]), CERT achieves an average recognition performance (probability of correctness on a two-alternative forced choice (2AFC) task between one positive and one negative example) of 90.1\% when analyzing facial actions. On a spontaneous facial expression dataset, CERT achieves an accuracy of nearly 80\%. In a standard dual core laptop, CERT can process 320 {\texttimes} 240 video images in real time at approximately 10 frames per second.
}, keywords = {3D orientation, Accuracy, automatic real-time facial expression recognition, CERT, computer expression recognition toolbox, Detectors, dual core laptop, Emotion recognition, Encoding, extended Cohn-Kanade, Face, face recognition, facial action unit coding system, facial expression dataset, Facial features, FACS, Gold, Image coding, software tool, software tools, two-alternative forced choice task}, isbn = {978-1-4244-9140-7}, author = {Littlewort, G. and Whitehill, J. and Wu, T. and Fasel, I. and Frank, M. and Movellan, J. and Bartlett, M.} } @article {74, title = {When a robot is social: Spatial arrangements and multimodal semiotic engagement in the practice of social robotics}, journal = {Social Studies of Science}, volume = {41}, year = {2011}, month = {12/2011}, pages = {893-926}, chapter = {893}, abstract = {Social roboticists design their robots to function as social agents in interaction with humans and other robots. Although we do not deny that the robot{\textquoteright}s design features are crucial for attaining this aim, we point to the relevance of spatial organization and coordination between the robot and the humans who interact with it. We recover these interactions through an observational study of a social robotics laboratory and examine them by applying a multimodal interactional analysis to two moments of robotics practice. We describe the vital role of roboticists and of the group of preverbal infants, who are involved in a robot{\textquoteright}s design activity, and we argue that the robot{\textquoteright}s social character is intrinsically related to the subtleties of human interactional moves in laboratories of social robotics. This human involvement in the robot{\textquoteright}s social agency is not simply controlled by individual will. Instead, the human{\textendash}machine couplings are demanded by the situational dynamics in which the robot is lodged.
}, keywords = {body, design, gesture, human{\textendash}robot interaction, laboratory, social agency, social robotics, spatial organization}, issn = {0306-3127, 1460-3659}, author = {Ala{\v c}, M. and Movellan, J. and Tanaka, F.} } @article {70, title = {Infomax Control of Eye Movements}, journal = {IEEE Transactions on Autonomous Mental Development}, volume = {2}, year = {2010}, pages = {91-107}, chapter = {91}, abstract = {Recently, infomax methods of optimal control have begun to reshape how we think about active information gathering. We show how such methods can be used to formulate the problem of choosing where to look. We show how an optimal eye movement controller can be learned from subjective experiences of information gathering, and we explore in simulation properties of the optimal controller. This controller outperforms other eye movement strategies proposed in the literature. The learned eye movement strategies are tailored to the specific visual system of the learner-we show that agents with different kinds of eyes should follow different eye movement strategies. Then we use these insights to build an autonomous computer program that follows this approach and learns to search for faces in images faster than current state-of-the-art techniques. The context of these results is search in static scenes, but the approach extends easily, and gives further efficiency gains, to dynamic tracking tasks. A limitation of infomax methods is that they require probabilistic models of uncertainty of the sensory system, the motor system, and the external world. In the final section of this paper, we propose future avenues of research by which autonomous physical agents may use developmental experience to subjectively characterize the uncertainties they face.
}, keywords = {active information gathering, autonomous computer program, autonomous physical agent, Computer vision, dynamic tracking task, Eye movement, eye movement strategy, face detection, faces, Infomax control, motor system, object detection, optimal control, optimal eye movement controller, policy gradient, probabilistic model, sensory system, static scenes, Visual Perception, visual search, visual system}, issn = {1943-0604}, author = {Butko, N. and Movellan, J.} } @article {71, title = {Warning: The author of this document may have no mental states. Read at your own risk}, journal = {Interaction Studies}, volume = {11}, year = {2010}, pages = {238-245}, chapter = {238}, author = {Movellan, J.} } @article {67, title = {Foundations for a New Science of Learning}, journal = {Science}, volume = {325}, year = {2009}, pages = {284-288}, chapter = {284}, abstract = {Human learning is distinguished by the range and complexity of skills that can be learned and the degree of abstraction that can be achieved compared with those of other species. Homo sapiens is also the only species that has developed formal ways to enhance learning: teachers, schools, and curricula. Human infants have an intense interest in people and their behavior and possess powerful implicit learning mechanisms that are affected by social interaction. Neuroscientists are beginning to understand the brain mechanisms underlying learning and how shared brain systems for perception and action support social learning. Machine learning algorithms are being developed that allow robots and computers to learn autonomously. New insights from many different fields are converging to create a new science of learning that may transform educational practices.
}, author = {Meltzoff, A. and Kuhl, P. and Movellan, J. and Sejnowski, T.} } @conference {64, title = {Learning to Make Facial Expressions}, booktitle = {IEEE 8th International Conference on Development and Learning, 2009. ICDL 2009}, year = {2009}, month = {06/2009}, publisher = {IEEE}, organization = {IEEE}, address = {Shanghai}, abstract = {This paper explores the process of self-guided learning of realistic facial expression production by a robotic head with 31 degrees of freedom. Facial motor parameters were learned using feedback from real-time facial expression recognition from video. The experiments show that the mapping of servos to expressions was learned in under one-hour of training time. We discuss how our work may help illuminate the computational study of how infants learn to make facial expressions.
}, keywords = {Actuators, Emotion recognition, face detection, face recognition, facial motor parameters, Feedback, Humans, learning (artificial intelligence), Machine Learning, Magnetic heads, Pediatrics, real-time facial expression recognition, Robot sensing systems, robotic head, Robots, self-guided learning, Servomechanisms, Servomotors}, isbn = {978-1-4244-4117-4}, author = {Wu, T. and Butko, N. and Ruvulo, P. and Bartlett, M. and Movellan, J.} } @conference {63, title = {Sociable robot improves toddler vocabulary skills}, booktitle = {2009 4th ACM/IEEE International Conference on Human-Robot Interaction (HRI)}, year = {2009}, month = {03/2009}, publisher = {IEEE}, organization = {IEEE}, address = {La Jolla, CA}, abstract = {We report results of a study in which a low cost sociable robot was immersed at an Early Childhood Education Center for a period of 2 weeks. The study was designed to investigate whether the robot, which operated fully autonomously during the intervention period, could improve target vocabulary skills of 18-24 month of age toddlers. The results showed a 27\% improvement in knowledge of the target words taught by the robot when compared to a matched set of control words. The results suggest that sociable robots may be an effective and low cost technology to enrich Early Childhood Education environments.
}, keywords = {Algorithms, autonomously operated robot, Early Childhood Education Center, Educational institutions, Educational robots, Games, human factors, Human-robot interaction, intervention period, Pediatrics, Robot sensing systems, robotics, sociable robot, social aspects of automation, time 2 week, toddler vocabulary skills, Ubiquitous computering, Vocabulary}, isbn = {978-1-60558-404-1}, author = {Movellan, J. and Eckhardt, M. and Virnes, M. and Rodriguez, A} } @article {69, title = {Toward Practical Smile Detection}, journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence}, volume = {31}, year = {2009}, month = {11/2009}, pages = {2106-2111}, chapter = {2106}, abstract = {Machine learning approaches have produced some of the highest reported performances for facial expression recognition. However, to date, nearly all automatic facial expression recognition research has focused on optimizing performance on a few databases that were collected under controlled lighting conditions on a relatively small number of subjects. This paper explores whether current machine learning methods can be used to develop an expression recognition system that operates reliably in more realistic conditions. We explore the necessary characteristics of the training data set, image registration, feature representation, and machine learning algorithms. A new database, GENKI, is presented which contains pictures, photographed by the subjects themselves, from thousands of different people in many different real-world imaging conditions. Results suggest that human-level expression recognition accuracy in real-life illumination conditions is achievable with machine learning technology. However, the data sets currently used in the automatic expression recognition literature to evaluate progress may be overly constrained and could potentially lead research into locally optimal algorithmic solutions.
}, keywords = {Algorithms, Artificial intelligence, Automated, automatic facial expression recognition research, Biological Pattern Recognition, Biometry, Computer simulation, Computer vision, Computer-Assisted, Face, Face and gesture recognition, face recognition, feature representation, human-level expression recognition accuracy, illumination conditions, Image databases, Image Enhancement, Image Interpretation, image registration image representation, learning (artificial intelligence), machine learning approaches, Machine Learning Models, n Humans, object detection, practical smile detection, Reproducibility of Results, Sensitivity and Specificity, Smiling, Subtraction Technique, training data set, visual databases}, issn = {0162-8828}, author = {Whitehill, J. and Littlewort, G. and Fasel, I. and Bartlett, M. and Movellan, J.} } @conference {58, title = {Auditory mood detection for social and educational robots}, booktitle = {IEEE International Conference on Robotics and Automation, 2008. ICRA 2008}, year = {2008}, month = {05/2008}, publisher = {IEEE}, organization = {IEEE}, address = {Pasadena, CA}, abstract = {Social robots face the fundamental challenge of detecting and adapting their behavior to the current social mood. For example, robots that assist teachers in early education must choose different behaviors depending on whether the children are crying, laughing, sleeping, or singing songs. Interactive robotic applications require perceptual algorithms that both run in real time and are adaptable to the challenging conditions of daily life. This paper explores a novel approach to auditory mood detection which was born out of our experience immersing social robots in classroom environments. We propose a new set of low-level spectral contrast features that extends a class of features which have proven very successful for object recognition in the modern computer vision literature. Features are selected and combined using machine learning approaches so as to make decisions about the ongoing auditory mood. We demonstrate excellent performance on two standard emotional speech databases (the Berlin Emotional Speech [W. Burkhardt et al., 2005], and the ORATOR dataset [H. Quast, 2001]). In addition we establish strong baseline performance for mood detection on a database collected from a social robot immersed in a classroom of 18-24 months old children [J. Movellan er al., 2007]. This approach operates in real time at little computational cost. It has the potential to greatly enhance the effectiveness of social robots in daily life environments.
}, keywords = {auditory mood detection, Computer vision, educational robot, Educational robots, Emotion recognition, emotional speech database, face detection, hearing, interactive robotic application, learning (artificial intelligence), Machine Learning, Mood Prototypes, object recognition, Robotics and Automation Robots, social mood, social robot, Speech, USA Councils}, isbn = {978-1-4244-1646-2}, author = {Ruvolo, P. and Fasel, I. and Movellan, J.} } @conference {60, title = {Automatic cry detection in early childhood education settings}, booktitle = {7th IEEE International Conference on Development and Learning, 2008. ICDL 2008}, year = {2008}, month = {08/2008}, publisher = {IEEE}, organization = {IEEE}, address = {Monterey, CA}, abstract = {We present results on applying a novel machine learning approach for learning auditory moods in natural environments [1] to the problem of detecting crying episodes in preschool classrooms. The resulting system achieved levels of performance approaching that of human coders and also significantly outperformed previous approaches to this problem [2].
}, keywords = {Acoustic noise, auditory moods, automatic cry detection, behavioural sciences computing, Deafness, early childhood education settings, education, Educational robots, Emotion recognition, human coders, Humans, learning (artificial intelligence), Machine Learning, Mood, preschool classrooms, Prototypes, Robustness, Working environment noise}, isbn = {978-1-4244-2661-4}, author = {Ruvolo, P. and Movellan, J.} } @conference {61, title = {A barebones communicative robot based on social contingency and Infomax Control}, booktitle = {The 17th IEEE International Symposium on Robot and Human Interactive Communication, 2008. RO-MAN 2008}, year = {2008}, month = {08/2008}, publisher = {IEEE}, organization = {IEEE}, address = {Munich}, abstract = {In this paper, we present a barebones robot which is capable of interacting with humans based on social contingency. It expands the previous work of a contingency detector into having both human-model updating (developmental capability) and policy improvement (learning capability) based on the framework of Infomax control. The proposed new controller interacts with humans in both active and responsive ways handling the turn-taking between them.
}, keywords = {Actuators, barebones communicative robot, Communication system control, Delay, Detectors, Human robot interaction, human-model updating capability, humanoid robots, Hydrogen, Infomax control, man-machine systems, Pediatrics, policy improvement, Robot control, Robot sensing systems, Scheduling, social contingency}, isbn = {978-1-4244-2212-8}, author = {Tanaka, F. and Movellan, J.} } @conference {59, title = {Building a more effective teaching robot using apprenticeship learning}, booktitle = {7th IEEE International Conference on Development and Learning, 2008. ICDL 2008}, year = {2008}, month = {08/2008}, publisher = {IEE}, organization = {IEE}, address = {Monterey, CA}, abstract = {What defines good teaching? While attributes such as timing, responsiveness to social cues, and pacing of material clearly play a role, it is difficult to create a comprehensive specification of what it means to be a good teacher. On the other hand, it is relatively easy to obtain examples of expert teaching behavior by observing a real teacher. With this inspiration as our guide, we investigated apprenticeship learning methods [1] that use data recorded from expert teachers as a means of improving the teaching abilities of RUBI, a social robot immersed in a classroom of 18-24 month old children. While this approach has achieved considerable success in mechanical control, such as automated helicopter flight [2], until now there has been little work on applying it to the field of social robotics. This paper explores two particular approaches to apprenticeship learning, and analyzes the models of teaching that each approach learns from the data of the human teacher. Empirical results indicate that the apprenticeship learning paradigm, though still nascent in its use in the social robotics field, holds promise, and that our proposed methods can already extract meaningful teaching models from demonstrations of a human expert.
}, keywords = {apprenticeship learning, automated helicopter flight, Automatic control, Data mining, Delay, education, Educational robots, expert teaching, Helicopters, Human-robot interaction, humanoid robots, Humans Learning systems, mechanical control, robot teaching, Robotics and Automation, RUBI social robot, time 18 month to 24 month, timing}, isbn = {978-1-4244-2661-4}, author = {Ruvolo, P. and Whitehill, J. and Virnes, M. and Movellan, J.} } @conference {57, title = {Visual saliency model for robot cameras}, booktitle = {IEEE International Conference on Robotics and Automation, 2008. ICRA 2008}, year = {2008}, month = {05/2008}, publisher = {IEEE}, organization = {IEEE}, address = {Pasadena, CA}, abstract = {Recent years have seen an explosion of research on the computational modeling of human visual attention in task free conditions, i.e., given an image predict where humans are likely to look. This area of research could potentially provide general purpose mechanisms for robots to orient their cameras. One difficulty is that most current models of visual saliency are computationally very expensive and not suited to real time implementations needed for robotic applications. Here we propose a fast approximation to a Bayesian model of visual saliency recently proposed in the literature. The approximation can run in real time on current computers at very little computational cost, leaving plenty of CPU cycles for other tasks. We empirically evaluate the saliency model in the domain of controlling saccades of a camera in social robotics situations. The goal was to orient a camera as quickly as possible toward human faces. We found that this simple general purpose saliency model doubled the success rate of the camera: it captured images of people 70\% of the time, when compared to a 35\% success rate when the camera was controlled using an open-loop scheme. After 3 saccades (camera movements), the robot was 96\% likely to capture at least one person. The results suggest that visual saliency models may provide a useful front end for camera control in robotics applications.
}, keywords = {Application software, approximation theory, Bayes methods, Bayesian methods, Bayesian model, camera control, Cameras, Central Processing Unit, Computational efficiency, Computational modeling, Explosions, fast approximation, human visual attention, Humans, Open loop systems, robot cameras, robot vision, Robot vision systems, robotic application, task free conditions, visual saliency model}, isbn = {978-1-4244-1646-2}, author = {Butko, N. and Zhang, L. and Cottrell, G. and Movellan, J.} } @conference {72, title = {Learning to Learn}, booktitle = {IEEE International Conference on Development and Learning}, year = {2007}, author = {Butko, N. and Movellan, J.} } @proceedings {35, title = {The RUBI Project: A Progress Report}, year = {2007}, author = {Tanaka, F. and Movellan, J. and Taylor, C. and Ruvolo, P. and Eckhardt, M.} } @article {34, title = {Socialization between toddlers and robots at an early childhood education center}, journal = {Proceedings of the National Academy of Science}, volume = {104}, year = {2007}, author = {Tanaka, F. and Cicourel, A. and Movellan, J.} } @proceedings {38, title = {Behavior Analysis of Children{\textquoteright}s Touch on a Small Humanoid Robot: Long-term Observation at a Daily Classroom over Three Months}, year = {2006}, month = {09/2006}, publisher = {Hatfield}, address = {United Kingdom}, author = {Tanaka, F. and Movellan, J.} } @proceedings {39, title = {Daily HRI evaluation at a classroom environment: Reports from dance interaction experiments}, year = {2006}, address = {Salt Lake City}, author = {Tanaka, F. and Movellan, J. and Fortenberry, B. and Aisaka, K.} } @conference {37, title = {The RUBI Project: Designing Everyday Robots by Immersion}, booktitle = {Fifth International Conference on Development and Learning (ICDL)}, year = {2006}, month = {06/2006}, address = {Bloomington, U.S.A}, author = {Tanaka, F. and Movellan, J.} } @proceedings {43, title = {Developing Dance Interaction between QRIO and Toddlers in a Classroom Environment: Plans for the First Steps}, year = {2005}, month = {08/2005}, pages = {223-228}, address = {Nashville, U.S.A.}, keywords = {Best Paper Award}, author = {Tanaka, F. and Fortenberry, B. and Aisaka, K. and Movellan, J.} } @article {40, title = {MPT: the Machine Perception Toolbox}, year = {2005}, author = {Fasel, I. and Fortenberry, B. and Movellan, J.} } @proceedings {42, title = {Plans for developing real-time dance interaction between qrio and toddlers in a classroom environment}, year = {2005}, address = {Osaka, Japan}, author = {Tanaka, F. and Fortenberry, B. and Aisaka, K. and Movellan, J.} } @proceedings {41, title = {The RUBI project: Origins, principles and first steps}, year = {2005}, address = {Osaka, Japan}, author = {Movellan, J. and Tanaka, F. and Fortenberry, B. and Aisaka, K.} } @conference {45, title = {Analysis of machine learning methods for real-time recognition of facial expressions from video}, booktitle = {Computer Vision and Pattern Recognition: Face Processing Workshop}, year = {2004}, author = {Littlewort, G. and Bartlett, M. and Fasel, I. and Chenu, J. and Movellan, J.} } @article {44, title = {Towards social robots: Automatic evaluation of human-robot interaction by face detection and expression classification}, journal = {Advances in Neural Information Processing Systems}, volume = {16}, year = {2004}, pages = {1563-1570}, chapter = {1563}, author = {Littlewort, G. and Bartlett, M. and Chenu, J. and Fasel, I. and Kanda, T. and Ishiguro, H. and Movellan, J.} }