drop table events;
drop table events_temp;
drop table subevents;
drop table user_events;
-- MySQL dump 9.08
--
-- Host: localhost Database: sc2003
---------------------------------------------------------
-- Server version 4.0.13-nt
--
-- Table structure for table 'events'
--
CREATE TABLE events (
eventID int(11) NOT NULL auto_increment,
date date default '0000-00-00',
startTime time default '00:00:00',
endTime time default '00:00:00',
room text,
chair text,
eventType text,
sessionTitle text,
PRIMARY KEY (eventID)
) TYPE=MyISAM;
--
-- Dumping data for table 'events'
--
INSERT INTO events VALUES (10655,'0000-00-00','00:00:00','00:00:00','Room','Chair','Event Type','Session Title or Main Title (Mandatory)');
INSERT INTO events VALUES (10656,'2003-11-16','08:30:00','12:00:00','','Carl Kesselman (USC Information Sciences Institute )','Tutorial','S06: The Grid: Software Standards for Cyberinfrastructure');
INSERT INTO events VALUES (10657,'2003-11-16','08:30:00','12:00:00','','Blaise M. Barney (Lawrence Livermore National Laboratory)','Tutorial','S05: An Introduction to the TotalView Debugger');
INSERT INTO events VALUES (10658,'2003-11-16','08:30:00','12:00:00','','Eric A. Jones (Enthought, inc. )','Tutorial','S08: Introduction to Scientific Computing with Python');
INSERT INTO events VALUES (10659,'2003-11-16','08:30:00','12:00:00','','David Allan Pease (IBM Research )','Tutorial','S07: High-Performance Storage');
INSERT INTO events VALUES (10660,'2003-11-16','08:30:00','12:00:00','','Carl J. Williams (NIST)','Tutorial','S10: An Introduction to Quantum Information');
INSERT INTO events VALUES (10661,'2003-11-16','08:30:00','17:00:00','','Remy Evard (Argonne National Laboratory )','Tutorial','S01: Production Linux Clusters 2003 - Architecture and System Software for Serious Computing');
INSERT INTO events VALUES (10662,'2003-11-16','08:30:00','17:00:00','','Alice Evelyn Koniges (LLNL)','Tutorial','S04: Real World Techniques for Scientific Applications of Scale');
INSERT INTO events VALUES (10663,'2003-11-16','08:30:00','17:00:00','','Darren James Kerbyson (Los Alamos National Laboratory )','Tutorial','S03: A practical approach to performance analysis and modeling of large-scale systems');
INSERT INTO events VALUES (10664,'2003-11-16','08:30:00','17:00:00','','Robert L. Grossman (University of Illinois at Chicago )','Tutorial','S02: A Tutorial Introduction to High Performance Data Transport');
INSERT INTO events VALUES (10665,'2003-11-16','08:30:00','17:00:00','','Tarek El-Ghazawi (GWU)','Tutorial','S09: Programming with the Partitioned Global Address Space Model');
INSERT INTO events VALUES (10666,'2003-11-16','13:30:00','17:00:00','','Timothy Glen Mattson (Intel)','Tutorial','S12: Advanced Topics in OpenMP');
INSERT INTO events VALUES (10667,'2003-11-16','13:30:00','17:00:00','','Ann L. Chervenak (University of Southern California)','Tutorial','S11: Grid Services for Data Management and Virtual Data');
INSERT INTO events VALUES (10668,'2003-11-16','13:30:00','17:00:00','','Eric A. Jones (Enthought, inc.)','Tutorial','S13: Scientific Computing with Python -- Advanced Topics');
INSERT INTO events VALUES (10669,'2003-11-16','13:30:00','17:00:00','','Ken Brodlie (University of Leeds)','Tutorial','S15: Distributed and Collaborative Visualization');
INSERT INTO events VALUES (10670,'2003-11-16','13:30:00','17:00:00','','Craig A. Stewart (Indiana University)','Tutorial','S14: Computational Biology');
INSERT INTO events VALUES (10671,'2003-11-17','08:00:00','17:30:00','Wyndham Phoenix Hotel - Navajo Room','Craig Lee (Aerospace Corp.)','Workshop','4th International Workshop on Grid Computing (Grid2003)');
INSERT INTO events VALUES (10672,'2003-11-17','08:30:00','12:00:00','','Philip John Mucci (ICL, UTK, NERSC )','Tutorial','M07: PERC Tools for Performance Data Gathering and Analysis');
INSERT INTO events VALUES (10673,'2003-11-17','08:30:00','12:00:00','','Thomas Stephen Lehmann (Intel Corporation )','Tutorial','M09: Cluster Construction Utilizing Selected Packages');
INSERT INTO events VALUES (10674,'2003-11-17','08:30:00','12:00:00','','Ed Kornkven (ARSC)','Tutorial','M08: Vector Performance Programming');
INSERT INTO events VALUES (10675,'2003-11-17','08:30:00','12:00:00','','Stephen Lau (NERSC/LBNL)','Tutorial','M10: Computer Protection at Open Scientific Facilities');
INSERT INTO events VALUES (10676,'2003-11-17','08:30:00','17:00:00','','Lisa C. Childers (Argonne National Laboratory )','Tutorial','M01: How to Build a Grid Service Using the Globus Toolkit(R) 3');
INSERT INTO events VALUES (10677,'2003-11-17','08:30:00','17:00:00','','David Bernholdt (Oak Ridge National Laboratory)','Tutorial','M05: Component Software for High-Performance Computing: Using the Common Component Architecture');
INSERT INTO events VALUES (10678,'2003-11-17','08:30:00','17:00:00','','Radhika Vullikanti (Cluster File Systems, Inc. )','Tutorial','M03: Lustre: A Scalable, High-Performance Distributed File System');
INSERT INTO events VALUES (10679,'2003-11-17','08:30:00','17:00:00','','Ewing Lusk (Argonne National Laboratory )','Tutorial','M04: Using MPI-2: Advanced Features of the Message-Passing Interface');
INSERT INTO events VALUES (10680,'2003-11-17','08:30:00','17:00:00','','Tarek El-Ghazawi (GWU)','Tutorial','M06: Reconfigurable Supercomputing Systems');
INSERT INTO events VALUES (10681,'2003-11-17','08:30:00','17:00:00','','John Towns (NCSA/Univ of Illinois)','Tutorial','M02: Applications in the TeraGrid Environment');
INSERT INTO events VALUES (10682,'2003-11-17','13:30:00','17:00:00','','Luiz DeRose (IBM Research )','Tutorial','M11: Performance Tools 101: Principles of Experimental Performance Measurement and Analysis');
INSERT INTO events VALUES (10683,'2003-11-17','13:30:00','17:00:00','','Michael Kistler (IBM)','Tutorial','M12: Power and Energy Conservation for Servers');
INSERT INTO events VALUES (10684,'2003-11-17','13:30:00','17:00:00','','Lars E. Jonsson (Intel Americas, Inc)','Tutorial','M13: HPC and InfiniBand Architecture in Practice');
INSERT INTO events VALUES (10685,'2003-11-17','13:30:00','17:00:00','','Wiliam Kirk Nickless (Argonne National Lab )','Tutorial','M14: Customer Owned Networks: Strategy, Implementation, and Example');
INSERT INTO events VALUES (10686,'2003-11-18','10:30:00','12:00:00','','Ruth Aydt (NCSA)','Paper','Cluster-Based Servers');
INSERT INTO events VALUES (10687,'2003-11-18','10:30:00','12:00:00','','Allen Malony (University of Oregon)','Paper','Tool Infrastructure');
INSERT INTO events VALUES (10688,'2003-11-18','10:30:00','12:00:00','','Lee Margetts (University of Manchester, MRCCS)','SC Global, Showcase','Showcase I');
INSERT INTO events VALUES (10689,'2003-11-18','10:30:00','17:00:00','','','Workshop','Petaflops Programming: Parallelism, Pain, and Perverse Programming Paradigms');
INSERT INTO events VALUES (10690,'2003-11-18','13:30:00','15:00:00','','Xian-He Sun (Illinois Institute of Technology )','Paper','Grid Support');
INSERT INTO events VALUES (10691,'2003-11-18','13:30:00','15:00:00','','Eileen Kraemer (University of Georgia)','Paper','Software Systems');
INSERT INTO events VALUES (10692,'2003-11-18','13:30:00','15:00:00','','John Feo (Sun Microsystems)','Paper','Compilation Techniques');
INSERT INTO events VALUES (10693,'2003-11-18','13:30:00','15:00:00','','Stephenie McLean (NCSA)','SC Global, BOF','Igniting Innovation: Cultivating Diverse High End Users from Emerging Communities');
INSERT INTO events VALUES (10694,'2003-11-18','15:30:00','17:00:00','','John Gustafson (Sun Microsystems Inc.)','Paper','Supercomputing Applications');
INSERT INTO events VALUES (10695,'2003-11-18','15:30:00','17:00:00','','Roland Wismüller (LRR-TUM, Technische Universität München)','Paper','Tools and Services for Grids');
INSERT INTO events VALUES (10696,'2003-11-18','15:30:00','17:00:00','','Bernd Mohr (Forschungszentrum Juelich)','Paper','Performance and Reliability');
INSERT INTO events VALUES (10697,'2003-11-18','15:30:00','17:00:00','','Brian Corrie (New Media Innovation Centre - Immersive Media Lab, Canada)','SC Global, BOF','Quality of Experience in Advanced Collaborative Environments');
INSERT INTO events VALUES (10698,'2003-11-18','17:00:00','19:00:00','Lobby 2','Michelle Hribar (Pacific University)','Poster','Posters Reception');
INSERT INTO events VALUES (10699,'2003-11-19','08:30:00','09:15:00','Ballroom','Fran Berman (San Diego Supercomputer Center)','Plenary','State of the Field: Judy Estrin and Frieder Seible');
INSERT INTO events VALUES (10700,'2003-11-19','09:15:00','10:30:00','Ballroom','Fran Berman (San Diego Supercomputer Center)','Plenary','State of the Field: Judy Estrin and Frieder Seible');
INSERT INTO events VALUES (10701,'2003-11-19','10:30:00','12:00:00','','Christine E. Cuicchi (NAVOCEANO/DoD MSRC)','HPC Challenge','HPC Challenge Presentations');
INSERT INTO events VALUES (10702,'2003-11-19','10:30:00','12:00:00','','Dhabaleswar Panda (The Ohio State University)','Paper','Networking');
INSERT INTO events VALUES (10703,'2003-11-19','10:30:00','12:00:00','','Robert Lucas (USC/ISI)','Paper','Performance Programming');
INSERT INTO events VALUES (10704,'2003-11-19','10:30:00','12:00:00','','Natalia Maltsev (Argonne National Laboratory)','SC Global, Showcase','Showcase II');
INSERT INTO events VALUES (10705,'2003-11-19','10:30:00','17:00:00','','Thomas M. Ruwart (University of Minnesota)','Workshop','Storage on the Lunatic Fringe: Beyond Peta-Scale Storage Systems');
INSERT INTO events VALUES (10706,'2003-11-19','13:30:00','15:00:00','','Adolfy Hoisie (Los Alamos National Lab)','Paper','Performance Analysis and Modeling');
INSERT INTO events VALUES (10707,'2003-11-19','13:30:00','15:00:00','','William Gropp (Argonne National Laboratory)','Paper','Gordon Bell Computational Methods');
INSERT INTO events VALUES (10708,'2003-11-19','13:30:00','15:00:00','','Mary Thomas (University of Texas)','Paper','Runtime Systems');
INSERT INTO events VALUES (10709,'2003-11-19','13:30:00','15:00:00','','Jennifer Teig von Hoffman (Boston University)','SC Global, Panel','Where Should the Access Grid Go After Version 2.x?');
INSERT INTO events VALUES (10710,'2003-11-19','15:30:00','17:00:00','','Srinivas Aluru (Iowa State University)','Paper','Algorithms and Programming');
INSERT INTO events VALUES (10711,'2003-11-19','15:30:00','17:00:00','','David Bailey (LBNL)','Paper','Gordon Bell Performance Evaluation');
INSERT INTO events VALUES (10712,'2003-11-19','15:30:00','17:00:00','','Larry Davis (DoD High Performance Computing Modernization Program)','Panel','High Performance Computing System Performance Modeling');
INSERT INTO events VALUES (10713,'2003-11-19','15:30:00','17:00:00','','Kelli Robyn Dipple (University of Manchester and Queensland University of Technology)','SC Global, Showcase','Use of Collaborative Technologies: Artistic and Cultural Instincts');
INSERT INTO events VALUES (10714,'2003-11-20','08:30:00','09:15:00','Ballroom','Henri Casanova (University of California San Diego)','Plenary','State of the Field: Jill Mesirov and David Culler');
INSERT INTO events VALUES (10715,'2003-11-20','09:15:00','10:30:00','Ballroom','Henri Casanova (University of California San Diego)','Plenary','State of the Field: Jill Mesirov and David Culler');
INSERT INTO events VALUES (10716,'2003-11-20','10:30:00','12:00:00','','Allan Snavely (San Diego Supercomputer Center)','Paper','Scheduling and Communication');
INSERT INTO events VALUES (10717,'2003-11-20','10:30:00','12:00:00','','Jose Munoz (DOE/NNSA)','Paper','Advanced Architectures');
INSERT INTO events VALUES (10718,'2003-11-20','10:30:00','12:00:00','','Gregor von Laszewski (Argonne National Laboratory)','Paper','Data Management in Grids');
INSERT INTO events VALUES (10719,'2003-11-20','10:30:00','12:00:00','','John Brooke (University of Manchester)','SC Global, Showcase','Showcase III');
INSERT INTO events VALUES (10720,'2003-11-20','13:30:00','15:00:00','Ballroom','Fred Johnson (US Department of Energy)','Plenary','Awards Session');
INSERT INTO events VALUES (10721,'2003-11-20','15:30:00','17:00:00','','Jeffrey Vetter (LLNL)','Paper','Performance Measurement and Analysis');
INSERT INTO events VALUES (10722,'2003-11-20','15:30:00','17:00:00','','Josep Torrellas (University of Illinois)','Paper','High Performance Input/Output');
INSERT INTO events VALUES (10723,'2003-11-20','15:30:00','17:00:00','','Maxine D. Brown (University of Illinois at Chicago, USA)','Panel','Strategies for Application-Empowered Networks');
INSERT INTO events VALUES (10724,'2003-11-20','15:30:00','17:00:00','','Steven J. Wallach (Chiaro Networks)','Panel','SuperNetworking Transforming Supercomputing');
INSERT INTO events VALUES (10725,'2003-11-20','15:30:00','17:00:00','','Darran Edmundson (ANU Supercomputer Facility Vizlab)','SC Global, Showcase','Showcase IV');
INSERT INTO events VALUES (10726,'2003-11-21','08:30:00','10:00:00','','David B. Nelson (National Coordination Office for Information Technology Research and Development (NCO/ITR&D) )','Panel','The High End Computing Revitalization Task Force');
INSERT INTO events VALUES (10727,'2003-11-21','08:30:00','10:00:00','','Jeremy Kepner (MIT Lincoln Laboratory)','Panel','HPC Productivity');
INSERT INTO events VALUES (10728,'2003-11-21','08:30:00','10:00:00','','Wu Feng (Los Alamos National Laboratory)','Panel','Battle of the Network Stars!');
INSERT INTO events VALUES (10729,'2003-11-21','10:30:00','12:00:00','','Rod Oldehoeft (Los Alamos National Laboratory)','Panel','Open Source Software Policy Issues for High Performance Computing');
INSERT INTO events VALUES (10730,'2003-11-21','10:30:00','12:00:00','','Bill Blake (SVP of Product Development, Netezza Corp.)','Panel','The Simplification of Supercomputing: Clustering, Appliances and Grid Computing');
INSERT INTO events VALUES (10731,'2003-11-21','10:30:00','12:00:00','','David Morton (MHPCC)','Panel','Goldilocks and the Three Bears Revisited: HPC Architecture Directions');
--
-- Table structure for table 'events_temp'
--
CREATE TABLE events_temp (
eventID int(11) NOT NULL auto_increment,
date date default '0000-00-00',
startTime time default '00:00:00',
endTime time default '00:00:00',
room text,
sessionTitle text,
subSessionTitle text,
chair text,
speaker text,
eventType text,
abstract text,
paperFilename text,
PRIMARY KEY (eventID)
) TYPE=MyISAM;
--
-- Dumping data for table 'events_temp'
--
INSERT INTO events_temp VALUES (3580,'0000-00-00','00:00:00','00:00:00','Room','Session Title or Main Title (Mandatory)','Sub-session (Optional)','Chair','Speaker/Presenter/Author','Event Type','Abstract','Paper pdf');
INSERT INTO events_temp VALUES (3581,'2003-11-19','10:30:00','12:00:00','','HPC Challenge Presentations','','Christine E. Cuicchi (NAVOCEANO/DoD MSRC)','','HPC Challenge','The HPC Challenge honors participants for innovative uses of high performance computing resources. It provides opportunities for contestants to showcase applications and platforms. The categories are:\n\nMost Innovative Data-Intensive Application: With the increasing ability to create, store, and re-access larger and larger datasets, one thing remains constant: the importance of mining such data to glean useful pieces of knowledge. The award will be presented to the entry that uses the most novel and/or inventive approaches in mining data, visualizing data, or a combination of these tasks.\n\nMost Geographically Distributed Application: As the Grid continues to decrease the virtual distance between computers around the world, the ability to solve challenging computational problems with combinations of diverse system architectures is continuing to strengthen. The award will be presented to the team with the most geographically distributed application to solve a significantly complex problem.\n\nThis session will allow teams to present their projects to the SC03 audience and the HPC Challenge judges. The actual award will be made at the Awards plenary session on Thursday.','');
INSERT INTO events_temp VALUES (3582,'2003-11-19','08:30:00','09:15:00','Ballroom','State of the Field: Judy Estrin and Frieder Seible','State of the Field: Is There Anything New on the Networking Horizon?','Fran Berman (San Diego Supercomputer Center)','Judy Estrin (Packet Design LLC)','Plenary','Networking - first in the form of local-area networks and later the Internet and the World Wide Web - was the major driver behind technology industry growth during the past couple of decades. That the growth of networking has slowed dramatically is due not just to a weak economy, but also to the fact that we have over-capacity due to aggressive build out during the bubble and the overall IT infrastructure market has matured.But this does not mean that innovation in networking has ceased. Granted, the next several years will see slower growth driven by evolutionary - not revolutionary - innovation. But there is a great deal to be excited about in new markets such as mobile networking and voice over IP. It is time to innovate not just in speeds and feeds, but in ways to improve operational efficiency to better utilize and run the critical infrastructures that we have in place. Networking must also evolve to support new server trends such as clustering or SANs. Over the longer term we will see growth for networking as we move toward ubiquitous consumer connectivity, with always-on high bandwidth that will permit new forms of entertainment. Even further out, the connectivity of embedded devices into ad hoc networks used in a variety of applications.','');
INSERT INTO events_temp VALUES (3583,'2003-11-19','09:15:00','10:30:00','Ballroom','State of the Field: Judy Estrin and Frieder Seible','State of the Field: Physical Infrastructure Assessment and Protection to Mitigate Natural and Man-made Disasters\nor\nThe Infrastructure Enabled Infrastructure','Fran Berman (San Diego Supercomputer Center)','Frieder Seible (University of California San Diego)','Plenary','The dependence on the physical infrastructure of roads, bridges, dams, water supply pipelines/aqueducts, ports and harbors, etc., is an integral part of and so ingrained in our culture that we are mostly unaware of its existence as long as functionality is provided. The aging of our structures, natural hazards such as earthquakes, floods, and fires, as well as man-made hazards such as terrorist attacks and accidents, threaten the functionality of our physical infrastructure and extraordinary expenditures are required to just maintain the status quo. Unless significant advances are made in monitoring, rehabilitating, and managing the existing physical infrastructure, our quality of life cannot be preserved. While some of these advances will come in the form of new materials and new structural concepts and systems, the major breakthrough will come from a ubiquitous cyberinfrastructure, consisting of distributed multi-use sensor nets, wireless and high speed networks, fully searchable data bases, and data mining tools to provide the operator and the end user with on-line information and knowledge, resulting in improved infrastructure systems management and use. The critical dependence of the Physical Infrastructure on the Cyberinfrastructure will be developed on numerous examples and changes on how we can better manage and use the physical infrastructure with cyberinfrastructure support will be speculated on from an engineering perspective.','');
INSERT INTO events_temp VALUES (3584,'2003-11-20','08:30:00','09:15:00','Ballroom','State of the Field: Jill Mesirov and David Culler','State of the Field: Computational Paradigms for Integrative Approaches to Genomic Medicine','Henri Casanova (University of California San Diego)','Jill P. Mesirov (Whitehead Institute/MIT Center for Genome Research)','Plenary','The completion of the human genome sequencing project, coupled with the ever increasing scale and throughput of biological experimentation, has the potential to greatly accelerate progress in biomedicine. Discoveries in this new realm of “high-dimensionality biology” are dependent both upon sophisticated computation and the ability to unify the analytical approaches of a variety of disciplines. However, the lack of an integrated computational environment that can provide both easy access to a set of universal analytic tools, and support the development and dissemination of novel algorithmic approaches, has resulted in the pace of data acquisition greatly outstripping that of meaningful data analysis.\n\nWe will describe some of the challenging computational problems in biomedicine, the techniques we use to address them, and a software infrastructure to support this highly interdisciplinary field of research.','');
INSERT INTO events_temp VALUES (3585,'2003-11-20','09:15:00','10:30:00','Ballroom','State of the Field: Jill Mesirov and David Culler','State of the Field: Networking the Physical World','Henri Casanova (University of California San Diego)','David E. Culler (University of California Berkeley)','Plenary','The SuperComputing conference ushered in computational simulation as the third pillar of science and, in its SCxy form, rode the wave of the killer micro. We are now seeing the emergence of a new interplay of science and computing technology. The sustained, dramatic advance of CMOS has made it possible to construct complete computing systems with processing, storage, sensing, and communication in a tiny volume at very low cost. Like the microscope, telescope, and computer, this technology will allow scientists and engineers to perceive what was previously invisible; in this case through deployment of dense networks of these sensors in the physical world. We decribe the emerging technology of sensor networks at several levels, including platform architecture, operating system design, self-organizing networks, sensor data processing, and programming environments. We will explore how it opens new avenues for computer networking to advance the scientific endeavor.','');
INSERT INTO events_temp VALUES (3586,'2003-11-20','13:30:00','15:00:00','Ballroom','Awards Session','','Fred Johnson (US Department of Energy)','','Plenary','SC2003 presents a wide range of awards that recognize the innovative hard work of conference participants and leaders in the field. The Gordon Bell Prizes reward practical uses of high-performance computers, including best performance of an application and best achievement in cost-performance. The Seymour Cray Computer Science and Engineering Award recognizes innovative contributions to high- performance computing systems that best exemplify the creative spirit of Seymour Cray. The Sidney Fernbach Memorial Award honors innovative uses of high-performance computing in problem solving. In addition, The conference gives prizes for this year\'s HPC Challenge, Bandwidth Challenge, Best Paper, and Best Student Paper. These prestigious honors are presented during a special ceremony held as a capstone to the SC conference.','');
INSERT INTO events_temp VALUES (3587,'2003-11-17','08:00:00','17:30:00','Wyndham Phoenix Hotel - Navajo Room','4th International Workshop on Grid Computing (Grid2003)','','Craig Lee (Aerospace Corp.)','Steering Committee:Craig Lee (Aerospace Corp.), Heinz Stockinger (CERN), Mark Baker (University of Portsmouth, UK), Rajkumar Buyya (University of Melbourne, Australia), Manish Parashar (Rutgers University)','Workshop','In the last few years, the Grid community has been growing very rapidly and several new technologies have been proposed. This goes along with the growing popularity of the Internet and the availability of powerful computers and high-speed networks as low-cost commodity components, and is changing the way we do computing. Several proposals and publications have been generated, and several Grid projects with research and production oriented goals are ongoing.\n\nGrid 2003 is an international meeting that brings together the Grid community of researchers, developers, practitioners, and users. The objective of Grid 2003 is to serve as a forum to present current and future work as well as to exchange research ideas in this field. Grid 2003 partially follows the focus from last year but extends it to production Grids and international testbeds. Sessions of refereed presentations include:\n\nGrid Infrastructure and Services\nPerformance Evaluation and Optimization\nPolicy and Security Management\nData Management\nTestbeds and Applications\nInformation Systems\nWork-in-Progress Papers\n\nFor more detailed information about the workshop, see http://www.gridcomputing.org/grid2003/.','');
INSERT INTO events_temp VALUES (3588,'2003-11-18','10:30:00','12:00:00','','Petaflops Programming: Parallelism, Pain, and Perverse Programming Paradigms','Petaflops Architectures: Parallelism, Pain, and Perverse Programming Paradigms','','Moderator: Candace Culhane (National Security Agency), Panelists: Mootaz Elnozahy (IBM), 3 others to be named','Workshop','Petaflop architectures now on the drawing boards will enable a sequence of scientific breakthroughs and herald a new era in computational science. At the same time, many petaflops architectures are likely to challenge systems designers, language developers, and compiler writers in totally new ways. Pity the poor applications programmer at the end of this chain, who will have to live with the mistakes of architects, language developers and compiler writers alike. A certain amount of \"pain\" in use of petaflops architectures is surely unavoidable – machines with hundreds of thousands of processors and awkward memory models will not be \"user friendly.\" Moreover, some experts believe that the intrinsic unreliability of hardware at the scale envisioned will force adoption of complex checkpoint and recovery strategies. We firmly believe, however, that much of this pain can be ameliorated by clever architects and programming model designers, assuming they evince a depth of understanding and subtlety of approach not universally evident in the past.\n\nIn this first panel of the three-part workshop, computer architects will talk about architectural trends and the shape of probable architectures at the beginning of the petaflops era. This panel will focus on the emerging structure of likely petascale architectures, and what will be expected/needed from programming models, languages, and end users to handle such systems. Questions the panelists should address include:\n\nWhat architectural features will be easy to exploit and yield good performance?\nWhat anticipated features will be complex and problematic for language designers, compiler writers and applications programmers alike?\nWhat \"features\" will be so complex that only the run-time environment should see them, and could lead to fragile or down right awful performance?\nWill architectures have so much complexity, and require so many levels of parallelism that our programming models need to reflect this same complexity?\nWhat are the trends?\nWhat will be the key things that programming models need to address?','');
INSERT INTO events_temp VALUES (3589,'2003-11-18','13:30:00','15:00:00','','Petaflops Programming: Parallelism, Pain, and Perverse Programming Paradigms','Petaflops Programming Models: Ameliorating Architectural Issues or Exacerbating Them?','','Moderator: Burton Smith (Cray), Panelists: Kathy Yelick (UC-Berkeley), Hans Zima (University of Vienna, NASA Jet Propulsion Laboratory), Larry Snyder (University of Washington), 1 other to be named','Workshop','Petaflop architectures now on the drawing boards will enable a sequence of scientific breakthroughs and herald a new era in computational science. At the same time, many petaflops architectures are likely to challenge systems designers, language developers, and compiler writers in totally new ways. Pity the poor applications programmer at the end of this chain, who will have to live with the mistakes of architects, language developers and compiler writers alike. A certain amount of \"pain\" in use of petaflops architectures is surely unavoidable – machines with hundreds of thousands of processors and awkward memory models will not be \"user friendly.\" Moreover, some experts believe that the intrinsic unreliability of hardware at the scale envisioned will force adoption of complex checkpoint and recovery strategies. We firmly believe, however, that much of this pain can be ameliorated by clever architects and programming model designers, assuming they evince a depth of understanding and subtlety of approach not universally evident in the past.\n\nThis panel will look at evolving and expected programming models, and the way language and compiler developers hope to address the challenges posed by petaflops architectures. Given the range of issues such systems are raising, which issues should the programming model attempt to address, and which should be passed on to the end user? Hot-button issues include: how to handle fault tolerance, very deep memory hierarchies, interoperability, coping with tens or hundreds of thousands of threads, the semantics of intelligent memory, and so on. Questions the panelists should address include:\n\nWhat ideas look promising to solve some these issues?\nWhat do future architectures need to incorporate to make it easier for programmers and compilers? \nIs it better to take an evolutionary or revolutionary approach?\nWhat about engineering/adoption issues? \nWill users adopt new languages?\nAre problem-solving environments be the way to go? \nWhat hooks does the language community really need in future architectures? \nIs compiler technology up to the task? ','');
INSERT INTO events_temp VALUES (3590,'2003-11-18','15:30:00','17:00:00','','Petaflops Programming: Parallelism, Pain, and Perverse Programming Paradigms','Petaflops Applications: Pity the Programmer Trying to Do Actual Applications','','Moderator: Alan Laub (UC - Davis), Panelists: Mike Merrill (National Security Agency), Chris Johnson (University of Utah), 2 others to be named','Workshop','Petaflop architectures now on the drawing boards will enable a sequence of scientific breakthroughs and herald a new era in computational science. At the same time, many petaflops architectures are likely to challenge systems designers, language developers, and compiler writers in totally new ways. Pity the poor applications programmer at the end of this chain, who will have to live with the mistakes of architects, language developers and compiler writers alike. A certain amount of \"pain\" in use of petaflops architectures is surely unavoidable – machines with hundreds of thousands of processors and awkward memory models will not be \"user friendly.\" Moreover, some experts believe that the intrinsic unreliability of hardware at the scale envisioned will force adoption of complex checkpoint and recovery strategies. We firmly believe, however, that much of this pain can be ameliorated by clever architects and programming model designers, assuming they evince a depth of understanding and subtlety of approach not universally evident in the past.\n\nIn this third panel a set of current users of high-end architectures will respond to the material presented during the other panels, and address the question of how well expected architectures and programming models will, in fact, serve the needs of the applications communities. This panel will also discuss the sociological issue of what level of maturity and usability a language needs in order to be adopted by the HPC community. Speakers in this panel will characterize their applications from the point of view of algorithms, architecture-factors (e.g. cross section bandwidth, cache issues) and programming model/language issues. They should identify the biggest challenges they foresee in exploiting promised petascale systems and their views on the most profitable directions to take in architecture, programming model, systems and compiler research. Questions the panelists should address include:\n\nWhat are the biggest challenges you foresee in using petaflops architectures?\nWhat do programming models need to provide you to allow you to exploit petaflops architectures productively?\nWhat constructs or abstractions would make it easier for you to map your application on petaflops architectures?\nAt what point in the maturity of a new programming language, would you be willing to try a new promising model? ','');
INSERT INTO events_temp VALUES (3591,'2003-11-19','10:30:00','17:00:00','','Storage on the Lunatic Fringe: Beyond Peta-Scale Storage Systems','','Thomas M. Ruwart (University of Minnesota)','','Workshop','This workshop will help paint a picture of the current and future application requirements that significantly push the envelope of storage systems and the concepts, architectures, and technologies being developed to meet these requirements. This includes describing current work on Object-based Storage Devices (OSD) and associated file systems and alternative approaches. This leads into a discussion of adding more intelligence into the storage devices making them aware of the data objects they store. Each presentation is expected to address one of the following questions:\n\nHow to achieve beyond 1PB/sec bandwidth from a single source (i.e. a file)\nHow to achieve a trillion storage accesses per second\nHow to manage beyond a trillion objects (i.e. files)\nHow to find any one object withing a trillion objects\nHow to find things insude any number of objects within a trillion objects\nSecurity of a trillion objects\nWhere existing concepts, architectures, and technologies break down\nHow to get storage devices to work \"for\" you and not \"against\" you\n\nSession 1: 10:30am-noon\nChair: Steve Louis (LLNL)\nCurrent and Future HPC Application Requirements for Storage Systems\n\n\nSession 2: 1:30pm-3:00pm\nChair: To Be Announced\nAdvanced Storage Concepts, Architectures, and Technologies\n\n\nSession 3: 3:30pm-5:00pm\nHow do we get there from here? Call to action, Roadmaps, Standards, ...etc.\nChair: Thomas Ruwart (University of Minnesota)','');
INSERT INTO events_temp VALUES (3592,'2003-11-18','17:00:00','19:00:00','Lobby 2','Posters Reception','','Michelle Hribar (Pacific University)','','Poster','Posters at SC2003 showcase the latest innovations and be prominently displayed throughout the conference. This evening Posters Reception, sponsored by AMD, features the posters and allows for a time for conference attendees to discuss the displays with the poster presenters. The poster session is a means of presenting timely research in a more casual setting, with the chance for greater in- depth, one-on-one dialogue that can proceed at its own pace.\n\nTopics of interest include:\n- scalable systems\n- performance evaluation and modeling\n- high performance networking\n- distributed computing systems\n- high performance I/O\n- programming environments and tools\n- novel computer architectures\n- visualization\n- distributed collaborations\n- parallel and distributed algorithms\n- architecture simulation\n- workload characterization\n- user experiences\n- optimization studies\n- parallel databases\n- data-and computation-intensive applications\n- large-scale databases and digital libraries\n- fault-tolerant architectures and system software','');
INSERT INTO events_temp VALUES (3593,'2003-11-16','13:30:00','17:00:00','','S12: Advanced Topics in OpenMP','S12: Advanced Topics in OpenMP','Timothy Glen Mattson (Intel)','Timothy Glen Mattson (Intel), Sanjiv Shah (Intel)','Tutorial','Content-Level: 5% Introductory 35% Intermediate 60% Advanced\n\nAbstract: OpenMP is an important standard for writing parallel applications for shared memory computers. Many HPC professionals have been exposed to OpenMP. Unfortunately, only a small number have mastered it to become OpenMP experts. \n\nThis tutorial will address that problem by covering the key topics a programmer must understand in order to become an OpenMP expert. We will address (1) how to take advantage of the performance oriented constructs in the OpenMP specifications, (2) compiler implementation issues and how they impact program performance, and (3) programming clusters of SMP nodes by mixing OpenMP and MPI. \n\nThroughout the tutorial, we will use examples to amplify each of the above points. We will collect these examples from real programs crafted by master OpenMP programmers.','S12_Out.pdf');
INSERT INTO events_temp VALUES (3594,'2003-11-16','13:30:00','17:00:00','','S11: Grid Services for Data Management and Virtual Data','S11: Grid Services for Data Management and Virtual Data','Ann L. Chervenak (University of Southern California)','Ann Chervenak (University of Southern California), Ewa Deelman (University of Southern California), Mike Wilde (Argonne National Laboratory)','Tutorial','Content-Level: 15% Introductory 70% Intermediate 15% Advanced\n\nAbstract: This tutorial will provide a detailed introduction to existing services for data management in grid computing environments. These services are essential for data-intensive applications that require the creation, replication, management and discovery of large numbers of files or data items. We will provide detailed descriptions and interactive demonstrations of six grid components: the GridFTP data transport protocol, the Reliable File Transfer (RFT) service, the Replica Location Service (RLS), the Metadata Catalog Service (MCS), the Chimera system for managing virtual data products, and the Pegasus system for planning and execution in grid environments. \n\nThis tutorial is intended for those who are interested in deploying and using a grid for data-intensive applications. The tutorial will focus on stable, existing grid components from Globus Toolkit versions 2.4 and 3.0. The tutorial will consist mainly of detailed examples of how to configure, deploy and use these grid services. The tutorial will include interactive demonstrations of the use of these tools.','S11_Out.pdf');
INSERT INTO events_temp VALUES (3595,'2003-11-16','13:30:00','17:00:00','','S13: Scientific Computing with Python -- Advanced Topics','S13: Scientific Computing with Python -- Advanced Topics','Eric A. Jones (Enthought, inc.)','Eric Jones (Entought), Travis Oliphant (Brigham Young University), Pat Miller (Lawrence Livermore National Laboratory)','Tutorial','Content-Level: 0% Introductory 50% Intermediate 50% Advanced\n\nAbstract: Python has emerged as an excellent choice for scientific computing because of its simple syntax, ease of use, and elegant multi-dimensional array arithmetic. Its interpreted evaluation allows it to serve as both the development language and the command line environment in which to explore data. Python also excels as a \"glue\" language that joins multiple legacy codes written in different languages together -- a common need in the scientific arena.\n\nThis half-day tutorial covers advanced topics in scientific computing such as integrating Python with other languages and parallel programming. Wrapping Fortran, C, and C++ codes, either for optimized speed or for accessing legacy code bases is covered in the middle section. Tools such as SWIG, f2py, and Boost Python are all discussed along with common pitfalls and good design practices. The final session covers parallel programming with an emphasis on pyMPI. This tutorial is a companion class to a morning session that introduces Python to the scientific community. A Windows version of Python (Enthought Edition) will be available on CD for attendees to install and use during the tutorial. The installation includes Python, Numeric, SciPy, wxPython, and VTK as well as other packages useful for scientific computing.','S13_Out.pdf');
INSERT INTO events_temp VALUES (3596,'2003-11-16','13:30:00','17:00:00','','S15: Distributed and Collaborative Visualization','S15: Distributed and Collaborative Visualization','Ken Brodlie (University of Leeds)','Ken Brodlie (University of Leeds), David Duce (Oxford Brookes University), Jason Wood (University of Leeds)','Tutorial','Content-Level: 20% Introductory 80% Intermediate 0% Advanced\n\nAbstract: Visualization is a key component in understanding large datasets, including those generated as output from simulations on high performance computers. Computational steering allows the adjustment of parameters as a simulation is running, on the basis of the visualized results. This tutorial will give a brief introduction to visualization, and proceed to cover two key aspects: distributed visualization and collaborative visualization. Distributed visualization is of essential importance for computational steering, allowing the simulation to run on a remote high performance computer, but with the steering control and the visualization on the desktop. Collaborative visualization is of increasing importance, as more and more scientific research is done in teams, often geographically distributed. The tutorial will be illustrated throughout by live demonstrations of all the concepts, including a demonstration of Grid-enabled visualization where an existing visualization system, IRIS Explorer, is combined with Globus middleware, to provide both distributed and collaborative visualization in a single framework.','S15_Out.pdf');
INSERT INTO events_temp VALUES (3597,'2003-11-16','13:30:00','17:00:00','','S14: Computational Biology','S14: Computational Biology','Craig A. Stewart (Indiana University)','Craig A. Stewart (Indiana University)','Tutorial','Content-Level: 15% Introductory 70% Intermediate 15% Advanced\n\nAbstract: Computational biology, bioinformatics, genomics, systems biology and related areas stand to be very important to the high performance community. There are tremendous opportunities to advance knowledge in biological and biomedical research areas through the use of high performance computing. This tutorial will begin with a brief overview of the essential biological bases for the current revolution in life sciences computing. Topics to be covered in depth include: sequence alignment and pattern matching; protein structure prediction; phylogenetics; systems biology; grid computing applications; and thoughts about the future of computational biology. This tutorial is intended for people who are interested in a rapid and useful introduction to computational biology and high performance computing. Tutorial attendees can expect to have a basic understanding of the area of computational biology and have a real feel for the nature of the work in this area as a result of hands-on experience with key applications. There will be hands-on exercises as part of the tutorial. A limited number of laptops will be provided and assigned on a first-come, first-served basis. Attendees with laptops and wireless network adapters are encouraged to bring them to the tutorial. Attendee laptops must have ssh installed in order to participate but be aware that there will be no support available to debug problems with attendee laptops. Hands-on exercises may also be done throughout the week at the \"Research in Indiana\" exhibit.','S14_Out.pdf');
INSERT INTO events_temp VALUES (3598,'2003-11-16','08:30:00','12:00:00','','S06: The Grid: Software Standards for Cyberinfrastructure','S6: The Grid: Software Standards for Cyberinfrastructure','Carl Kesselman (USC Information Sciences Institute )','Carl Kesselman (USC Information Sciences Institute)','Tutorial','Content-Level: 75% Introductory 25% Intermediate 0% Advanced\n\nAbstract: Market pressures in both business and science are enabling the emergence of a common framework for distributed computing. The favored candidate at this time for this common framework is \"the Grid.\" The Grid community, organized around the Global Grid Forum (GGF), has made considerable progress toward establishing a sense of what the Grid is and how it should be developed. This tutorial will introduce attendees to the concepts and current status of the Grid computing movement in science and business. Attendees will leave with a roadmap of the concepts, available products and technologies, standards activities, and R&D associated with the Grid.','S6_Out.pdf');
INSERT INTO events_temp VALUES (3599,'2003-11-16','08:30:00','17:00:00','','S01: Production Linux Clusters 2003 - Architecture and System Software for Serious Computing','S1: Production Linux Clusters 2003 - Architecture and System Software for Serious Computing','Remy Evard (Argonne National Laboratory )','Remy Evard (Argonne National Laboratory), Susan Coghlan (Argonne National Laboratory), Peter Beckman (Argonne National Laboratory), William Saphir (none)','Tutorial','Content-Level: 40% Introductory 50% Intermediate 10% Advanced\n\nAbstract: Linux clusters have become the dominant computing platform for small and mid range computing, and have substantial penetration into the upper echelon of the top500 list. Clusters are available from dozens of vendors and there are even more ways to run them. However, due in large part to the huge range of hardware and software options for building clusters, clusters still require a great deal of expertise to plan, deploy, and support. Building a complete, robust, and easily-managed production cluster is still a significant challenge today. \n\nThis tutorial will explain how to design your next cluster, plan for it, buy it, install it, run it, manage it, evaluate performance, and keep users happy on it. We will consider current hardware, describe proven management techniques, and discuss several modern cluster software systems while attempting to remain distribution and package neutral. Our goal is not to talk about how to cobble cheap PCs into a fast computer or to advocate a specific package, but to focus on making your next production supercomputer a Linux cluster. \n\nThis tutorial is a full-day tutorial. The handouts include practical, current information that can be directly applied to cluster selection and management.','S1_Out.pdf');
INSERT INTO events_temp VALUES (3600,'2003-11-16','08:30:00','17:00:00','','S04: Real World Techniques for Scientific Applications of Scale','S4: Real World Techniques for Scientific Applications of Scale','Alice Evelyn Koniges (LLNL)','Alice Koniges (LLNL), Mark Seager (LLNL), Rolf Rabenseifner (High Performance Computing Center, University of Stuttgart), David Eder (LLNL)','Tutorial','Content-Level: 20% Introductory 45% Intermediate 35% Advanced\n\nAbstract: Teraflop performance is no longer a thing of the future as complex integrated 3D simulations drive supercomputer development. Today, most HPC systems are clusters of SMP nodes ranging from dual-CPU-PC clusters to the largest systems at the world\'s major computing centers. What are the major issues facing application code developers today? How do the challenges vary from cluster computing to the complex hybrid architectures with super scalar and vector processors? Finally, what is our path both architecturally and algorithmically to petaflop performance? What skills and tools are required, both of the application developer and the system itself? In this tutorial we address these questions and give tips, tricks, and tools of the trade for large-scale application development. A special emphasis is given to mixed-mode (combined MPI/OpenMP) programming. In the introduction, we provide an overview of terminology, hardware and performance. We describe the latest issues in implementing scalable parallel programming. We draw from a series of large application suites and discuss specific challenges and problems encountered in parallel zing these applications. Additional topics cover parallel I/O, scripting languages and code wrappers. We conclude with a road map for the possible paths to petaflop computing. More information can be obtained from http://www.hlrs.de/people/rabenseifner/publ/SC2003-tutorial.html.','S4_Out.pdf');
INSERT INTO events_temp VALUES (3601,'2003-11-16','08:30:00','17:00:00','','S03: A practical approach to performance analysis and modeling of large-scale systems','S3: A practical approach to performance analysis and modeling of large-scale systems','Darren James Kerbyson (Los Alamos National Laboratory )','Adolfy Hoisie (Los Alamos National Laboratory), Darren J. Kerbyson (Los Alamos National Laboratory)','Tutorial','Content-Level: 30% Introductory 50% Intermediate 20% Advanced\n\nAbstract: This tutorial presents a practical approach to the performance modeling of large-scale, scientific applications on high performance systems. The defining characteristic of our tutorial involves the description of a proven modeling approach, developed at Los Alamos, of full-blown scientific codes, ranging from a few thousand to over 100,000 lines, that has been validated on systems containing 1,000’s of processors. The goal is to impart a detailed understanding of factors contributing to the resulting performance of an application when mapped onto a given HPC platform. Performance modeling is the only technique that can quantitatively elucidate this understanding. We show how models are constructed and demonstrate how they are used to predict, explain, diagnose, and engineer application performance in existing or future codes and/or systems. Notably, our approach does not require the use of specific tools but rather is applicable across commonly used environments. Moreover, since our performance models are parametric in terms of machine and application characteristics, they imbue the user with the ability to “experiment ahead” with different system configurations or algorithms/coding strategies. Both will be demonstrated in studies emphasizing the application of these modeling techniques including: verifying system performance, comparison of large-scale systems, and examination of possible future systems.','S3_Out.pdf');
INSERT INTO events_temp VALUES (3602,'2003-11-16','08:30:00','12:00:00','','S05: An Introduction to the TotalView Debugger','S5: An Introduction to the TotalView Debugger','Blaise M. Barney (Lawrence Livermore National Laboratory)','Blaise M. Barney (Lawrence Livermore National Laboratory)','Tutorial','Content-Level: 50% Introductory 25% Intermediate 25% Advanced\n\nAbstract: The TotalView debugger has become the \"de facto standard\" within the High Performance Computing industry for debugging cross-platform, multi-model parallel applications. TotalView\'s easy-to-use GUI provides the means to explore what an application is \"really\" doing at the deepest level. TotalView has been selected by the U.S. Department of Energy as the debugger of choice for its Advanced Simulation and Computing (ASCI) program. TotalView has likewise been selected by a growing number of telco, petroleum, aerospace, university and HPC organizations as their debugger of choice. \n\nThis tutorial begins by covering all of the essentials for using TotalView in a general programming environment. After covering these essentials, an emphasis is placed upon debugging parallel programs, including threaded, MPI, OpenMP and hybrid programs. In addition to the many screen captures of debug sessions presented in the tutorial, hands-on exercises are included to maximize benefit to the attendee. There will be a limited number of workstations provided for the hands-on portion of the tutorial, assigned on a first-come, first-served basis. You may participate in the hands-on portion of the tutorial using your own laptop, but be aware that there will be no support available to debug problems with attendee laptops. Attendee laptops must be configured for wireless access and must have an X11 environment and an ssh client installed.','S5_Out.pdf');
INSERT INTO events_temp VALUES (3603,'2003-11-16','08:30:00','17:00:00','','S02: A Tutorial Introduction to High Performance Data Transport','S2: A Tutorial Introduction to High Performance Data Transport','Robert L. Grossman (University of Illinois at Chicago )','Bill Allcock (Argonne National Laboratory), Robert Grossman (University of Illinois at Chicago), Steven Wallace (Indiana University)','Tutorial','Content-Level: 40% Introductory 40% Intermediate 20% Advanced\n\nAbstract: Developing high performance data intensive applications requires not only high performance computing resources but just as importantly high performance data transport linking them. With emerging 1, 2.5 and 10 Gigabit per second links, there is unprecedented opportunity for creating distributed data intensive applications. In this tutorial, we give an overviw of different protocols for high performance data transport and how to build applications using them.','S2_Out.pdf');
INSERT INTO events_temp VALUES (3604,'2003-11-16','08:30:00','17:00:00','','S09: Programming with the Partitioned Global Address Space Model','S9: Programming with the Partitioned Global Address Space Model','Tarek El-Ghazawi (GWU)','William Carlson (IDA/CCS), Tarek El-Ghazawi (gwu), Kathy Yelick (UCB), Robert Numrich (UMN)','Tutorial','Content-Level: 30% Introductory 50% Intermediate 20% Advanced\n\nAbstract: The partitioned global address space programming model, also known as the distributed shared address space model, has the potential to achieve a balance between ease-of-programming and performance. As in the shared-memory model, one thread may directly read and write memory allocated to another. At the same time, the model gives programmers control over features that are essential for performance, such as locality. \n\nThe model is receiving rising attention and there are now several compilers for languages based on this model. In this tutorial, we present the concepts associated with this model inclduding execution , synchronization, workload distribution, and memory consistency models. We then introduce three parallel programming language instances. These are Unified Parallel C or UPC; Co-Array FORTRAN, and Titanium, a JAVA-based language. It will be shown through experimental studies that these paradigms can deliver performance comparable with message passing, while maintaining the ease of programming of the shared memory model. \n\nThrough hands-on exercises on parallel systems, the audience will get a first hand exposure to these powerful paradigms. There will be a limited number of workstations provided for the hands-on portion of the tutorial, assigned on a first-come, first-served basis. You may participate in the hands-on portion of the tutorial using your own laptop, but be aware that there will be no support available to debug problems with attendee laptops. Attendee laptops must be configured for wireless access and must have ssh installed.','S9_Out.pdf');
INSERT INTO events_temp VALUES (3605,'2003-11-16','08:30:00','12:00:00','','S08: Introduction to Scientific Computing with Python','S8: Introduction to Scientific Computing with Python','Eric A. Jones (Enthought, inc. )','Eric A Jones (Enthought, inc), Travis Oliphant (Brigham Young University), Pat Miller (Lawrence Livermore National Laboratory)','Tutorial','Content-Level: 50% Introductory 50% Intermediate 0% Advanced\n\nAbstract: Python has emerged as an excellent choice for scientific computing because of its simple syntax, ease of use, and elegant multi-dimensional array arithmetic. Its interpreted evaluation allows it to serve as both the development langauge and the command line environment in which to explore data. Python also excels as a \"glue\" language that joins multiple legacy codes together that were written in differnt languages -- a common need in the scientific arena. \n\nThis tutorial introduces the Python programming language to scientists. The pace is fast and geared toward individuals already comfortable with a programming language such as Matlab, C, or Fortran. Attendees will learn the basic constructs of the language and how to do basic numerical analysis with Python. The 3rd section covers the SciPy library (www.scipy.org) that provides modules for linear algebra, signal processing, optimization, statistics, genetic algorithms, interpolation, ODE solvers, special functions, etc. The final section focuses on the Chaco graphics infrastructure for creating scientific plots. There is a companion tutorial in the afternoon that covers more advanced topics such as wrapping legacy Fortran and C/C++ codes as well as parallel programming with Python. A Windows version of Python (Enthought Edition) will be available on CD for attendees to install and use during the tutorial. The installation includes Python, Numeric, SciPy, wxPython, and VTK as well as other packages useful for scientific computing.','S8_Out.pdf');
INSERT INTO events_temp VALUES (3606,'2003-11-16','08:30:00','12:00:00','','S07: High-Performance Storage','S7: High-Performance Storage','David Allan Pease (IBM Research )','David Allan Pease (IBM Research), Darrell D.E. Long (U.C. Santa Cruz)','Tutorial','Content-Level: 25% Introductory 60% Intermediate 15% Advanced\n\nAbstract: The demand for storage in computing systems is growing at an unprecedented rate; this growth is fueled primarily by richer data content, the plummeting cost of raw storage, and the ability of almost everyone in our society to create and save data of every kind. This growth, in turn, is driving the need for higher-performance, easier-to-manage storage. Although storage device performance is constantly increasing, these increases are not keeping pace with the demand for storage performance. Traditional approaches to improving storage performance include caching, prefetching, and data striping and parallel I/O. New technologies include storage subsystems with advanced features like storage virtualization and remote replication, object-based storage, storage area networks (SANs), and SAN-based file systems. Current research is investigating features such as active disks and self-organizing, self-managing, self-optimizing storage systems. This tutorial introduces the components of modern storage systems and the factors and techniques involved in designing high-performance storage environments, and looks at current and future developments in the storage industry.','S7_Out.pdf');
INSERT INTO events_temp VALUES (3607,'2003-11-16','08:30:00','12:00:00','','S10: An Introduction to Quantum Information','S10: An Introduction to Quantum Information','Carl J. Williams (NIST)','Carl J. Williams (National Institute of Standards and Technology)','Tutorial','Content-Level: 60% Introductory 40% Intermediate 0% Advanced\n\nAbstract: Quantum information is revolutionary paradigm that puts information on a fundamental physical level and is creating a cross disciplinary interaction between physicists, mathematicians, and computer scientists. In the popular press, quantum computing is often presented as providing a massive parallel, new computational paradigm for solving computationally intractable problems like factoring. The first half of the tutorial will introduce the audience to the elementary concepts of quantum information and describe how these general concepts and ideas give rise to quantum communication and quantum computation. The second half of the tutorial will then begin with an overview of the state-of-the-art in quantum key distribution and the technological breakthroughs and bottlenecks in this rapidly developing field. This will be followed by a similar overview of the much more speculative and longer term problem of building a rudimentary quantum processor including a discussion of the numerous technological problems that must be overcome move from a rudimentary to a scalable quantum processor. The tutorial will then conclude with some remarks on the possible industrial implications of this technology in the hopes of providing a broader technological assessment of why quantum information is truly revolutionary and worth pursuing.','S10_Out.pdf');
INSERT INTO events_temp VALUES (3608,'2003-11-17','13:30:00','17:00:00','','M11: Performance Tools 101: Principles of Experimental Performance Measurement and Analysis','M11: Performance Tools 101: Principles of Experimental Performance Measurement and Analysis','Luiz DeRose (IBM Research )','Luiz DeRose (IBM Research), Bernd Mohr (Research Centre Juelich), Kevin London (ICL / University of Tennessee)','Tutorial','Content-Level: 20% Introductory 60% Intermediate 20% Advanced\n\nAbstract: Application developers are facing new and more intricate performance tuning and optimization problems as parallel architectures become more complex. Hence, there are a growing number of application developers who want to understand more about the performance characteristics of their applications. In this tutorial we will introduce the theory and practice of tools development, with an overview of the major issues, techniques, and resources for performance measurement of applications. Our goals are twofold: first, we will provide enough information, such that users who want to learn how to develop simple performance tools for instrumentation, measurement, and analysis of their own code could attempt to do in-house development, in order to fulfill their needs. Second, we will discuss some of the open problems in the area of performance measurement and analysis for researchers and students interested in working in this field. Areas covered will include instrumentation, performance measurement, performance data representation, analysis, and visualization techniques.','M11_Out.pdf');
INSERT INTO events_temp VALUES (3609,'2003-11-17','13:30:00','17:00:00','','M12: Power and Energy Conservation for Servers','M12: Power and Energy Conservation for Servers','Michael Kistler (IBM)','Ricardo Bianchini (Rutgers University), Ram Rajamony (IBM), Michael Kistler (IBM)','Tutorial','Content-Level: 30% Introductory 50% Intermediate 20% Advanced\n\nAbstract: Power and energy consumption have recently become key concerns for high-performance servers, especially when they are deployed in large cluster configurations as in compute farms and data centers. In fact, it has been reported that power and energy account for a significant fraction of the operational cost of such clusters. Furthermore, computing nodes in densely packed cluster systems also often overheat, leading to intermittent failures. These problems are likely to worsen as newer server-class processors offer higher levels of performance at the expense of increased power consumption. \n\nThis tutorial will present an in-depth look at techniques for power and energy management in standalone servers, clustered servers, and storage servers. We will start by motivating the topic, discuss various direct and indirect methods for measuring power/energy consumption, appropriate metrics to use, and where power and energy are expended in servers today. Next, we will address mechanisms for power management and the characteristics of server workloads from a power/energy conservation viewpoint. Components of power and energy conservation policies will then be discussed, followed by the actual policies to use. An important component of the tutorial will be a discussion on practical techniques. We will conclude with a discussion of the future challenges in this area.','M12_Out.pdf');
INSERT INTO events_temp VALUES (3610,'2003-11-17','13:30:00','17:00:00','','M13: HPC and InfiniBand Architecture in Practice','M13: HPC and InfiniBand Architecture in Practice','Lars E. Jonsson (Intel Americas, Inc)','Lars E Jonsson (Intel Americas, Inc.), Bill Magro (Intel Americas, Inc.)','Tutorial','Content-Level: 70% Introductory 30% Intermediate 0% Advanced\n\nAbstract: InfiniBand architecture (IBA) is an open standard interconnect architecture designed to meet the challenging I/O and inter-process messaging needs of clustered compute and data centers. IBA\'s native data rate of 1250 MByte/s in each direction leapfrogs most current interconnect technologies while still delivering the low latencies required by HPC applications. In the last year, a range of HPC-oriented InfiniBand products - host adapter cards, fat-tree topology switches, device drivers, and MPI implementations - have come to market, with initial deployments occurring in HPC centers worldwide. Now is the time for computer center managers and engineers to evaluate InfiniBand as an interconnect technology for future clusters. \n\nThis tutorial first introduces the InfiniBand architecture\'s key features and capabilities and then gives practical guidance about how to build and configure a cluster of (Linux) compute servers targeted to HPC users. In addition to an overview of InfiniBand architecture and products, we will cover a) hardware installation and configuration; b) switch topologies and associated routing and management software; c) user-level protocols and message passing interface (MPI) implementations; d) low-level and application-level performance results; and e) status of the open source InfiniBand Linux software infrastructure (infiniband.sourceforge.net).','M13_Out.pdf');
INSERT INTO events_temp VALUES (3611,'2003-11-17','13:30:00','17:00:00','','M14: Customer Owned Networks: Strategy, Implementation, and Example','M14: Customer Owned Networks: Strategy, Implementation, and Example','Wiliam Kirk Nickless (Argonne National Lab )','Bill Nickless (Argonne National Laboratory), Tony Rimovsky (NCSA)','Tutorial','Content-Level: 30% Introductory 70% Intermediate 0% Advanced\n\nAbstract: Within the past 5 years, the research, education, and technical computing community has deployed regional optical networks, which utilize Wave Division Multiplexing (WDM) technology to interconnect sites and resources. These networks can provide several advantages over circuits provided by the telecommunications industry, particularly related to direct costs and flexibility. \n\nThe first section of this tutorial (Strategy) will explore the strategic challenges involved in planning the deployment of an optical network. The second section (Implementation) will explore the technology and practices needed to create such an optical network. Much of the discussion will surround experiences and continuing challenges in the State of Illinois I-WIRE project.','M14_Out.pdf');
INSERT INTO events_temp VALUES (3612,'2003-11-17','08:30:00','17:00:00','','M01: How to Build a Grid Service Using the Globus Toolkit(R) 3','M1: How to Build a Grid Service Using the Globus Toolkit(R) 3','Lisa C. Childers (Argonne National Laboratory )','Lisa C Childers (The Globus Project), Charles A Bacon (The Globus Project), Ben Z Clifford (The Globus Project), Ravi K Madduri (The Globus Project)','Tutorial','Content-Level: 50% Introductory 50% Intermediate 0% Advanced\n\nAbstract: This full-day tutorial is designed to teach developers how to build a Grid Service that is compliant with the Open Grid Services Infrastructure (OGSI) Specification using the Globus Toolkit(R) 3 (GT3). The OGSI Specification is a community standard published by the Global Grid Forum that is expected to form the architectural foundation of next-generation Grid Computing. GT3 is the latest toolkit distribution from the Globus Project, which produces open source middleware used in building grids around the world. \n\nThe session begins with an introduction to the key concepts that define a Grid Service, including a tour of the OGSI Specification. The second section focuses on GT3, providing an overview of its architecture and functionality. The tutorial concludes with hands-on exercises in which attendees learn how to build, host and interact with an OGSI-Compliant Grid Service. \n\nThis tutorial is geared toward developers and technical managers who want to learn about the latest release of the Globus Toolkit and how to apply fundamental concepts outlined in the Open Grid Services Infrastructure Specification. In order to participate, attendees are required to bring a SC2003-network-enabled laptop to the tutorial. Tutorial participants will be expected to bring their own wireless-enabled laptops pre-loaded with a small set of open-source software. There will be no support available to debug problems with attendee laptops. A list of prerequisites for the tutorial is published at: http://www.globus.org/ogsa/tutorials/SC03/. Attendees must be able to run all the software listed in the prerequisites in order to participate in the hands-on portion of the tutorial.','M1_Out.pdf');
INSERT INTO events_temp VALUES (3613,'2003-11-17','08:30:00','12:00:00','','M07: PERC Tools for Performance Data Gathering and Analysis','M7: PERC Tools for Performance Data Gathering and Analysis','Philip John Mucci (ICL, UTK, NERSC )','Philip John Mucci (LBNL, UTK), Bronis R. de Supinski (LLNL), Celso Mendes (UIUC)','Tutorial','Content-Level: 25% Introductory 50% Intermediate 25% Advanced\n\nAbstract: This tutorial introduces a practical approach to the collection and analysis of performance data from production HPC applications. Attendees will emerge with a set of tools and techniques to explore the performance of their applications on the large parallel machines. This process consists of assessing the computational demands, capability and complexity of the application code, as well as understanding the efficiency of the mapping of an application to a specific architecture. To facilitate this, the tutorial will walk through the use of these tools on the Parallel Ocean Program or POP. POP was chosen because it is a widely used, compute-intensive production application and its performance has been extensively examined. The primary audience for this tutorial will be application developers needing to quantify the performance of their codes. The tools are also of interest to system designers, administrators and integrators looking to monitor and maximize throughput. Attendees should be familiar with at least one scientific application, parallel programming environment and HPC platform. In addition, they should have a rudimentary understanding of processor architectures, memory hierarchies and messaging passing. There will be a limited number of workstations provided for the hands-on portion of the tutorial, assigned on a first-come, first-served basis. You may participate in the hands-on portion of the tutorial using your own laptop, but be aware that there will be no support available to debug problems with attendee laptops. Attendee laptops must be configured for wireless access and must have an X11 environment and an ssh client installed.','M7_Out.pdf');
INSERT INTO events_temp VALUES (3614,'2003-11-17','08:30:00','12:00:00','','M09: Cluster Construction Utilizing Selected Packages','M9: Cluster Construction Utilizing Selected Packages','Thomas Stephen Lehmann (Intel Corporation )','Thomas Stephen Lehmann (Intel Corporation), Richard Mark Libby (Intel Corporation)','Tutorial','Content-Level: 40% Introductory 40% Intermediate 20% Advanced\n\nAbstract: There are many ways to initially create a High Performance Compute Cluster. These break down into two main categories, DIY (do it yourself) and the packaged approach. \n\nThe DIY approach is instructive in that you must install and configure all of the individual software components yourself. This gives you a better understanding of what’s “under the hood” and should be tried at least once on a small cluster to gain a good understanding of the details of clustering technology. \n\nHowever, for small to medium clusters with time and cost constraints a packaged approach is usually more practical if you can work within the constraints that these packages can bring. Once constructed, most of these “instant” clusters can be furthered tailored to match local needs. \n\nThis tutorial will show two such packages, one free (OSCAR) and one commercially available (Scyld). In hands-on laboratory sessions each attendee will have a chance to install and test a small cluster utilizing both packages.','M9_Out.pdf');
INSERT INTO events_temp VALUES (3615,'2003-11-17','08:30:00','17:00:00','','M05: Component Software for High-Performance Computing: Using the Common Component Architecture','M5: Component Software for High-Performance Computing: Using the Common Component Architecture','David Bernholdt (Oak Ridge National Laboratory)','Robert C Amstrong (Sandia National Laboratories), David E Bernholdt (Oak Ridge National Laboratory), Lori Freitag Diachin (Sandia National Laboratories), Wael R Elwasif (Oak Ridge National Laboratory), Daniel S Katz (Jet Propulsion Laboratory, California Institute of Technology), James A Kohl (Oak Ridge National Laboratory), Gary Kumfert (Lawrence Livermore National Laboratory), Lois Curfman McInnes (Argonne National Laboratory), Boyana Norris (Argonne National Laboratory), Craig E Rasmussen (Los Alamos National Laboratory), Jaideep Ray (Sandia National Laboratories), Sameer Shende (University of Oregon), Shujia Zhou (Northrop Grumman/TASC)','Tutorial','Content-Level: 25% Introductory 50% Intermediate 25% Advanced\n\nAbstract: This full-day tutorial will introduce participants to the Common Component Architecture (CCA) at both conceptual and practical levels. \n\nComponent-based approaches to software development increase software developer productivity by helping to manage the complexity of large-scale software applications and facilitating the reuse and interoperability of code. The CCA was designed specifically with the needs of high-performance scientific computing in mind. It takes a minimalist approach to support language-neutral component-based application development for both parallel and distributed computing without penalizing the underlying performance, and with a minimal cost to incorporate existing code into the component environment. The CCA environment is also well suited to the creation of domain-specific application frameworks, whereas traditional domain-specific frameworks lack the generality and extensibility of the component approach.\n\nWe will cover the concepts of components and the CCA in particular, the tools provided by the CCA environment, the creation of CCA-compatible components, and their use in scientific applications. We will use a combination of traditional presentation and live demonstration during the tutorial. The tools and example software will also be available for download. This presentation updates the SC2002 tutorial with advances in the CCA tools and technology (including Fortran 90 support) and more extensive user experience.','M5_Out.pdf');
INSERT INTO events_temp VALUES (3616,'2003-11-17','08:30:00','17:00:00','','M03: Lustre: A Scalable, High-Performance Distributed File System','M3: Lustre: A Scalable, High-Performance Distributed File System','Radhika Vullikanti (Cluster File Systems, Inc. )','Radhika Vullikanti (Cluster File Systems, Inc.), Thomas M. Ruwart (University of Minnesota), Robert Read (Cluster File Systems, Inc.)','Tutorial','Content-Level: 25% Introductory 50% Intermediate 25% Advanced\n\nAbstract: Lustre is a scalable and high-performance distributed file system. It is a highly modular next generation storage architecture that combines established, open standards, the Linux operating system, an open networking API and innovative protocols into a reliable, network-neutral data storage and retrieval solution. \n\nThis tutorial is aimed at providing a discussion of the object based storage devices and intelligent storage devices followed by a high-level overview of the Lustre file system accompanied with some in-depth discussion of some design concepts that make up this innovative file system. \n\nThe tutorial will be interesting to a broad audience – it will be useful for anyone just curious about Lustre, academia interested in the design concepts behind this high-performance file system, hardware manufacturers interested in development of intelligent storage devices and customers who might be exploring various storage solutions for their cluster installations.\n\nThe audience participating in this tutorial is expected to be familiar with basic file system concepts and UNIX shell driven work for the hands-on sessions. A laptop with wireless or ethernet networking and an SSH client will be required for the hands-on session of this tutorial.','M3_Out.pdf');
INSERT INTO events_temp VALUES (3617,'2003-11-17','08:30:00','17:00:00','','M04: Using MPI-2: Advanced Features of the Message-Passing Interface','M4: Using MPI-2: Advanced Features of the Message-Passing Interface','Ewing Lusk (Argonne National Laboratory )','William Gropp (Argonne National Laboratory), Ewing (Rusty) Lusk (Argonne National Laboratory), Rob Ross (Argonne National Laboratory), Rajeev Thakur (Argonne National Laboratory)','Tutorial','Content-Level: 20% Introductory 40% Intermediate 40% Advanced\n\nAbstract: This tutorial is about how to use MPI-2, the collection of advanced features that were added to MPI (Message-Passing Interface) by the second MPI Forum. These features include parallel I/O, one-sided communication, dynamic-process management, language interoperability, and some miscellaneous features. Implementations of MPI-2 (or significant subsets thereof) are now available both from vendors and from open-source projects. For example, the one-sided communication functions of MPI-2 are being used successfully in applications running on the Earth Simulator. In other words, MPI-2 can now really be used in practice.\n\nThis tutorial explains how to use MPI-2, particularly, how to use it in a way that results in high performance. We present each feature of MPI-2 in the form of a series of examples (in C, Fortran, and C++), starting with simple programs and moving on to more complex ones. We also discuss how to combine MPI with OpenMP. We assume that attendees are familiar with the basic message-passing concepts of MPI-1. \n\nThe tutorial will feature a hands-on session in which attendees will be able to run MPI-2 programs on their own laptops with the latest version of MPICH2, which we will distribute on CDs.','M4_Out.pdf');
INSERT INTO events_temp VALUES (3618,'2003-11-17','08:30:00','17:00:00','','M06: Reconfigurable Supercomputing Systems','M6: Reconfigurable Supercomputing Systems','Tarek El-Ghazawi (GWU)','Tarek El-Ghazawi (GWU), Maya Gokhale (LANL), Duncan Buell (USC), Kris Gaj (GMU)','Tutorial','Content-Level: 40% Introductory 40% Intermediate 20% Advanced\n\nAbstract: The synergistic advances in high-performance computing and in reconfigurable computing, based on field programmable gate arrays (FPGAs) have engendered a new class of supercomputing systems, namely reconfigurable supercomputing systems. Such systems inherently support both fine-grain and coarse-grain parallelism, and can dynamically tune their architecture to fit applications. Advances in this area have progressed at different levels. At the network level, researchers have extended job management systems to exploit networked reconfigurable resources in cluster and grid computing fashions. At the single system level, steps have been taken towards the development of massively parallel systems of microprocessors and reconfigurable computing capabilities. Programming such systems can be quite challenging as programming FPGA devices can essentially involve hardware design. This has been addressed by significant developments in compiler technologies and programming tools for these systems. This tutorial will introduce the field of reconfigurable supercomputing and the advances made so far in systems, programming tools, applications, and compiler technology. In addition to concepts and technology coverage, the tutorial offers a first-hand exposure to this field though carefully crafted hands-on experience on two state-of-the-art reconfigurable machines. There will be a limited number of workstations provided for the hands-on portion of the tutorial, assigned on a first-come, first-served basis. You may participate in the hands-on portion of the tutorial using your own laptop, but be aware that there will be no support available to debug problems with attendee laptops. Attendee laptops must be configured for wireless access and must have an ssh client installed.','M6_Out.pdf');
INSERT INTO events_temp VALUES (3619,'2003-11-17','08:30:00','12:00:00','','M08: Vector Performance Programming','M8: Vector Performance Programming','Ed Kornkven (ARSC)','Edward Kornkven (ARSC), Andrew Johnson (AHPCRC)','Tutorial','Content-Level: 25% Introductory 50% Intermediate 25% Advanced\n\nAbstract: Vector Performance Programming \n\nThis tutorial will provide an overview of the current state of the art regarding vector computer systems and their application to science. It consists of three parts:\n\n - Overview: the origin and evolution of vector systems, basic principles of the hardware and software, and differences between currently available vector systems and vector-like features in commodity processors.\n\n - Basic examples: identification of code structures that do and don\'t vectorize, modification of non-vectorizing structures to allow vectorization, vector libraries, and vector-friendly programming guidelines. This section will enable programmers to develop vector-friendly code and adapt existing codes to vectorize. \n\n - Advanced examples: tuning for memory bandwidth/architectures, caches and hybrid programming, combining parallel and vector optimization. Specific features of available vector platform hardware and software and when/how to exploit these will be considered.\n\nIllustrations will be included from currently available vector systems such as the Cray SV1, Cray X1, NEC SX6 and others. Illustrations will include short examples and real world applications, both those developed in a vector environment and those ported to vector systems from a workstation environment. This tutorial will be of interest to those developing codes that might run on vector systems in the future.','M8_Out.pdf');
INSERT INTO events_temp VALUES (3620,'2003-11-17','08:30:00','12:00:00','','M10: Computer Protection at Open Scientific Facilities','M10: Computer Protection at Open Scientific Facilities','Stephen Lau (NERSC/LBNL)','William T Kramer (NERSC/LBNL), Stephen Lau (NERSC/LBNL), Dr. Vern Paxson (LBNL/ICSI), James Rothfuss (LBNL)','Tutorial','Content-Level: 10% Introductory 50% Intermediate 40% Advanced\n\nAbstract: The ability for scientists to collaborate unfettered over networks is critical for high performance computational (HPC) environments. This need however is tempered by the realities of today\'s interconnected computational environments where protection from unauthorized access and usage is a necessity. How does one find an effective balance between the needs of an open scientific research facility and simultaneously protecting the site from attacks? What challenges lie ahead in high performance security? \n\nThis tutorial addresses these questions by exploring various topics of computer security as it relates to an open, high-performance computer facility. Some of the topics we will address are: \n\n1) The unique nature and demands within an HPC environment \n\n2) Addressing the needs of computer protection in an HPC environment \n\n3) An overview of current trends in attacks and incidents \n\n4) Intrusion detection in an HPC environment \n\n5) The future of high performance computing protection \n\nSCinet, the SC conference network, resembles networks at open scientific facilities. Some of the tools deployed at open scientific facilities are also deployed at SC for computer protection. We will show real network attack statistics collected at SC03 and explain how the techniques described in the tutorial are in use at SC.','M10_Out.pdf');
INSERT INTO events_temp VALUES (3621,'2003-11-17','08:30:00','17:00:00','','M02: Applications in the TeraGrid Environment','M2: Applications in the TeraGrid Environment','John Towns (NCSA/Univ of Illinois)','John Towns (NCSA), Nancy Wilkins-Diehr (SDSC), Sharon Brunett (CACR), Sandra Bittner (ANL), Derek Simmel (PSC)','Tutorial','Content-Level: 0% Introductory 60% Intermediate 40% Advanced\n\nAbstract: The TeraGrid is the foundation of the NSF’s national cyberinfrastructure program and is positioned to ignite the imaginations of new grid communities while delivering the next level of innovation in grid computing. It will connect scientific instruments, data collections and other unique resources as well as offer significant amounts of compute power. The TeraGrid includes 20 teraflops of computing power, 1 petabyte of data storage, high-resolution visualization environments, and grid services. The TeraGrid is anchored with Intel-based Linux clusters at ANL, Caltech, NCSA and the SDSC and an Alpha-based cluster at PSC that are connected by a 40 Gbps network.\n\nThis tutorial includes an overview of the TeraGrid environment and configuration and descriptions of available services. The software foundation is based on the NSF Middleware Initiative (NMI) and programming techniques learned in this tutorial will be applicable in many grid communities. Attendees can expect to learn to manage a grid identity and work through several usage scenarios by building and launching sample jobs. Several working applications will be used as examples to illustrate these capabilities. Attendees are expected to be familiar with Fortran or C programming, MPI and basic Unix environments \n\nNOTE: There will be a limited number of laptops provided for the hands-on portion of the tutorial, assigned on a first-come, first-served basis. You may participate in the hands-on portion of the tutorial using your own laptop, but be aware that there will be no support available to debug problems with attendee laptops. Information on laptop software and configuration requirements are available at: http://mithril.ncsa.uiuc.edu/SC03/Tutorial_M2/','M2_Out.pdf');
INSERT INTO events_temp VALUES (3622,'2003-11-18','13:30:00','14:00:00','','Grid Support','Synthesizing Realistic Computational Grids','Xian-He Sun (Illinois Institute of Technology )','Dong Lu (Northwestern University), Peter August Dinda (Northwestern university)','Paper','Realistic workloads are essential in evaluating middleware for computational grids. One important component is the raw grid itself: a network topology graph annotated with the hardware and software available on each node and link. This paper defines our requirements for grid generation and presents GridG, our extensible generator. We describe GridG in two steps: topology generation and annotation. For topology generation, we have both model and mechanism. We extend Tiers, an existing tool from the networking community, to produce graphs that obey recently discovered power laws of Internet topology. We also contribute to network topology theory by illustrating a contradiction between two laws and proposing a new version of one of them. For annotation, GridG captures intra- and inter-host correlations between attributes using conditional probability rules. We construct a set of rules, including one based on empirical evidence of OS concentration in subnets, that produce sensible host annotations.','');
INSERT INTO events_temp VALUES (3623,'2003-11-18','13:30:00','14:00:00','','Software Systems','Enabling the Efficient Use of SMP Clusters: The GAMESS/DDI Model','Eileen Kraemer (University of Georgia)','Ryan M. Olson (Iowa State University), Michael W. Schmidt (Iowa State University), Mark S. Gordon (Iowa State University), Alistair P. Rendell (Australian National University)','Paper','An important advance in cluster computing is the evolution from single processor clusters to multi-processor SMP clusters. Due to the increased complexity in the memory model on SMP clusters, new approaches are needed for applications that make use of distributed-memory paradigms. This paper presents new communications software developments that are designed to take advantage of SMP cluster hardware. Although the specific focus is on the central field of computational chemistry and materials science, as embodied in the popular electronic structure package GAMESS (General Atomic and Molecular Electronic Structure System), the impact of these new developments will be far broader in scope. Following a summary of the essential features of the distributed data interface (DDI) in the current implementation of GAMESS, the new developments for SMP clusters are described. The advantages of these new features are illustrated using timing benchmarks on several hardware platforms, using a typical computational chemistry application.','');
INSERT INTO events_temp VALUES (3624,'2003-11-18','13:30:00','14:00:00','','Compilation Techniques','A Compiler Analysis of Interprocedural Data Communication','John Feo (Sun Microsystems)','Yonghua Ding (Purdue University), Zhiyuan Li (Purdue University)','Paper','This paper presents a compiler analysis for data-communication for the purpose of transforming ordinary programs into ones that run on distributed systems. Such transformations have been used for process migration and computation offloading to improve the performance of mobile computing devices. In a client-server distributed environment, the efficiency of an application can be improved by careful partitioning of tasks between the server and the client. Optimal task partitioning depends on the tradeoff between the computation workload and the communication cost. Our compiler analysis, assisted by a minimum set of user assertions, estimates the amount of data communication between procedures. The paper also presents experimental results based on an implementation in the GCC compiler. The static estimates for several multimedia programs are compared against dynamic measurement performed using Shade, a SUN Microsystem\'s instruction-level simulator. The results show a high precision of the static analysis for most pairs of the procedures.','');
INSERT INTO events_temp VALUES (3625,'2003-11-18','13:30:00','15:00:00','','Igniting Innovation: Cultivating Diverse High End Users from Emerging Communities','','Stephenie McLean (NCSA)','Roscoe Giles (Boston University)','SC Global, BOF','It is important that high performance networking and computing (HPNC) serve the interests of broader communities, especially underserved communities like minority communities in the US. This BOF brings together participants in SC from minority serving institutions, groups interested in the \'digital divide\' at the level of the Grid, as well as other interested parties to discuss how to increase representation and participation of underserved peoples and communities in HPNC. The primary contact for this activity is Stephenie McLean, NCSA and Roscoe Giles, Boston University.','');
INSERT INTO events_temp VALUES (3626,'2003-11-18','10:30:00','11:00:00','','Cluster-Based Servers','An Efficient Data Location Protocol for Self-organizing Storage Clusters','Ruth Aydt (NCSA)','Hong Tang (University of California, Santa Barbara), Tao Yang (University of California, Santa Barbara)','Paper','Component additions and failures are common for large-scale storage clusters in production environments. To improve availability and manageability, we investigate and compare data location schemes for a large self-organizing storage cluster that can quickly adapt to the additions or departures of storage nodes. We further present an efficient location scheme that differentiates between small and large file blocks for reduced management overhead compared to uniform strategies. In our protocol, small blocks, which are typically in large quantities, are placed through consistent hashing. Large blocks, much fewer in practice, are placed through a usage-based policy, and their locations are tracked by Bloom filters. The proposed scheme results in improved space utilization even with non-uniform cluster nodes. To achieve high scalability and fault resilience, this protocol is fully distributed, relies only on soft states, and supports data replication. We demonstrate the effectiveness and efficiency of this protocol through trace-driven simulation.','');
INSERT INTO events_temp VALUES (3627,'2003-11-18','10:30:00','11:00:00','','Tool Infrastructure','MRNet: A Software-Based Multicast/Reduction Network for Scalable Tools','Allen Malony (University of Oregon)','Philip C. Roth (University of Wisconsin-Madison), Dorian C. Arnold (University of Wisconsin-Madison), Barton P. Miller (University of Wisconsin-Madison)','Paper','We present MRNet, a software-based multicast/reduction network for building scalable performance and system administration tools. MRNet supports multiple simultaneous, asynchronous collective communication operations. MRNet is flexible, allowing tool builders to tailor its process network topology to suit their tool’s requirements and the underlying system’s capabilities. MRNet is extensible, allowing tool builders to incorporate custom data reductions to augment its collection of built-in reductions. We evaluated MRNet in a simple test tool and also integrated into an existing, real-world performance tool with up to 512 tool back-ends. In the test tool, MRNet’s performance was comparable to that of previous tool infrastructure. In the real-world tool, we used MRNet not only for multicast and simple data reductions but also with custom histogram and clock skew detection reductions. Although the tool’s start-up protocol was already highly tuned, our tests of MRNet with 512 tool back-ends show significant improvements in average start-up latency.','');
INSERT INTO events_temp VALUES (3628,'2003-11-18','10:30:00','11:20:00','','Showcase I','Collaborative Virtual Design in Engineering','Lee Margetts (University of Manchester, MRCCS)','Lee Margetts (University of Manchester, MRCCS), Simon Bee (University of Salford)','SC Global, Showcase','Engineering design, particularly in the aerospace and automotive industries often involves several iterations through a design cycle before the final project emerges. This is because many different professionals are involved at different stages and may make minor modifications to the design, forcing the process back through earlier design phases. By integrating the different applications used into a single problem solving environment and bringing the professionals together, design times can be greatly reduced leading to increased productivity, turnover and customer satisfaction. \n\nUsing high performance computing and interactive virtual reality visualisation, such an environment is being developed at the Advanced Virtual Prototyping Research Centre (AVPRC). The core of this environment, Hydra, is a software framework that can deploy and control many interconnected applications, such as Computer Aided Design (CAD) and Finite Element Analysis (FEA), at geographically remote locations. Collaboration and computational steering can be conducted simultaneously from different ‘consoles’ – CAVES, Immersabenches and desktop computers. By integrating this problem solving environment with the Access Grid, collaborators can also benefit from multicast audio and visual communication with their colleagues. \n\nThen SCGlobal Showcase would demonstrate the use of the Access Grid in a new important application area - Virtual Prototyping. Perhaps the most significant innovation is the integration of the following emerging technologies into a single problem solving environment: \n\n· Collaboration (Access Grid) · Virtual Reality visualisation · Grid Computing · Computational Steering · Parallel engineering simulations','');
INSERT INTO events_temp VALUES (3629,'2003-11-18','11:00:00','11:30:00','','Tool Infrastructure','The Tool Daemon Protocol (TDP)','Allen Malony (University of Oregon)','Barton Miller (University of Wisconsin), Ana Cortés (Autonomous University of Barcelona), Miquel Senar (Autonomous University of Barcelona), Miron Livny (University of Wisconsin)','Paper','Runtime tools are crucial to program development. In desktop environments, we take tools for granted. In the Grid, it is difficult to find tools because of the complex interactions between applications, operating system and layers of job scheduling/management software. Therefore each runtime tool must be ported to run under each job management system; for m tools and n environments, the problem becomes an m*n effort, rather than m+n. The consequence is a paucity of tools in distributed and Grid computing environments. \n\nIn response, we analyzed several scheduling environments and runtime tools to better understand their interactions. We isolated what we believe are the essential interactions between tools, schedulers, resource manager, and applications. We propose a new standard, called the Tool Daemon Protocol, that codifies these interactions and provides the necessary communication functions. We implemented a pilot library and experimented with Parador, a prototype using the Paradyn Performance tools under Condor.','');
INSERT INTO events_temp VALUES (3630,'2003-11-18','11:00:00','11:30:00','','Cluster-Based Servers','Handling Heterogeneity in Shared-Disk File Systems','Ruth Aydt (NCSA)','Changxun Wu (Johns Hopkins University), Randal Burns (Johns Hopkins University)','Paper','We develop and evaluate a system for load management in shared-disk file systems built on clusters of heterogeneous computers. The system generalizes load balancing and server provisioning. It balances file metadata workload by moving file sets among cluster server nodes. It also responds to changing server resources that arise from failure and recovery and dynamically adding or removing servers. The system is adaptive and self-managing. It operates without any a-priori knowledge of workload properties or the capabilities of the servers. Rather, it continuously tunes load placement using a technique called adaptive, non-uniform (ANU) randomization. ANU randomization realizes the scalability and metadata reduction benefits of hash-based, randomized placement techniques. It also avoids hashing\'s drawbacks: load skew, inability to cope with heterogeneity, and lack of tunability. Simulation results show that our load-management algorithm performs comparably to a prescient algorithm.','');
INSERT INTO events_temp VALUES (3631,'2003-11-18','11:20:00','12:00:00','','Showcase I','Communities Collaborating to Bridge the Digital Divide: the Tribal Virtual Network','Maria Williams (College of Fine Arts, University of New Mexico)','Maria Williams (College of Fine Arts, University of New Mexico), Ron Solimon (Indian Pueblo Cultural Center), Vernon Lujan (Pojoaque Poeh Arts Center), Kevin Shendo (Jemez Walatowa Visitors Center), Lorene Willis (Jicarilla Apache Cultural Center), Tom Kennedy (Zuni Tribe)','SC Global, Showcase','The Tribal Virtual Network is a consortium of Native American tribal museums and education centers. It sees the Access Grid as not only a research tool for scientists, but as a collaborative ignition switch between communities. The TVN will 1) briefly discuss their unique application of the Access Grid, and 2) demonstrate how collaborative technologies are being used to overcome the \"Digital Divide\" by accessing education and training.\n\nThe intent of the 30-minute showcase demonstration is to:\n1. Clarify to the scientific community what the digital divide means to Native Americans \n2. Demonstrate how collaborative technologies can unite diverse communities.\n\nTVN consortium members will connect from New Mexico through their low-bandwidth, custom configured inSORS Access Grid nodes. They will start out by briefly introducing their respective tribes and cultural centers to visually show why so few minorities become researchers. Next, they will demonstrate a condensed version of an interactive, collaborative class in professional artist development. Finally, they will invite scientists and organizations across the Access Grid to collaborate with the consortium.','');
INSERT INTO events_temp VALUES (3632,'2003-11-18','11:30:00','12:00:00','','Cluster-Based Servers','Quantifying and Improving the Availability of High-Performance Cluster-Based Internet Services','Ruth Aydt (NCSA)','Kiran Nagaraja (Computer Science, Rutgers University), Neeraj Krishnan (Computer Science, Rutgers University), Ricardo Bianchini (Computer Science, Rutgers University), Richard P. Martin (Computer Science, Rutgers University), Thu D. Nguyen (Computer Science, Rutgers University)','Paper','Cluster-based servers can substantially increase performance when nodes cooperate to globally manage resources. We apply a quantification methodology to show, however, that following a cooperative strategy without additional compensating mechanisms results in a substantial availability loss. Specifically, we show that a sophisticated cooperative cluster-based web server gains a factor of 3 in performance but increases service unavailability by a factor of 10 over a non-cooperative version. We then show how to augment this web server with software components embodying a small set of high-availability techniques to regain the lost availability. Among other interesting observations, we show that the application of multiple high-availability techniques, each implemented independently in its own subsystem, can lead to inconsistent recovery actions. We also show that a novel technique called Fault Model Enforcement can be used to resolve such inconsistencies. Augmenting the server with these techniques led to a final expected availability of close to 99.99%.','');
INSERT INTO events_temp VALUES (3633,'2003-11-18','11:30:00','12:00:00','','Tool Infrastructure','Conservative Scheduling: Using Predicted Variance to Improve Scheduling Decisions in Dynamic Environments','Allen Malony (University of Oregon)','Lingyun Yang (Department of Computer Science, University of Chicago), Jennifer M. Schopf (Math&Computer Science Division, Argonne National Laboratory), Ian Foster (Math&Computer Science Division, Argonne National Laboratory)','Paper','In heterogeneous and dynamic environments, efficient execution of parallel computations can require mappings of tasks to processors with performance that is both irregular and time-varying. While adaptive domain decomposition techniques have been used to address heterogeneous resource capabilities, temporal variations in those capabilities have seldom been considered. We propose a conservative scheduling policy that uses information about expected future variance in resource capabilities to produce more efficient data mapping decisions. We first present techniques for predicting CPUload at some future time point, average CPUload for some future time interval, and variation of CPUload over some future time interval. We then present a family of stochastic scheduling algorithms that exploit such predictions when making data mapping decisions. Finally, we describe experiments in which we apply our techniques to an astrophysics application. The results demonstrate that conservative scheduling can produce execution times that are significantly faster and less variable than other techniques.','');
INSERT INTO events_temp VALUES (3634,'2003-11-18','14:00:00','14:30:00','','Grid Support','Traffic-based Load Balance for Scalable Network Emulation','Xian-He Sun (Illinois Institute of Technology )','Xin Liu (UCSD), Andrew A. Chien (UCSD)','Paper','Load balance is critical to achieving scalability for large network emulation studies, which are of compelling interest for emerging Grid, Peer to Peer, and other distributed applications and middleware. Achieving load balance in emulation is difficult because of irregular network structure and unpredictable network traffic. We formulate load balance as a graph partitioning problem and apply classical graph partitioning algorithms to it. Using a large-scale network emulation system called MaSSF, we explore three approaches for partitioning, based on purely static topology information, combining topology and application placement information, and combining topology and application profile data. These studies show that exploiting topology and application placement information can achieve reasonable load balance, but a profile-based approach further improves load balance for even large scale network emulation. In our experiments, PROFILE improves load balance by 50% to 66% and emulation time is reduced up to 50% compared to purely static topology-based approaches.','');
INSERT INTO events_temp VALUES (3635,'2003-11-18','14:00:00','14:30:00','','Software Systems','Remote Visualization by Browsing Image Based Databases with Logistical Networking','Eileen Kraemer (University of Georgia)','Jin Ding (University of Tennessee), Jian Huang (University of Tennessee), Micah Beck (University of Tennessee), Shaotao Liu (University of Tennessee), Terry Moore (University of Tennessee), Stephen Soltesz (University of Tennessee)','Paper','Although Image Based Rendering (IBR) techniques using plenoptic functions have some important advantages over other approaches to the visualization of large datasets, they depend on the interactive use of huge IBR databases, which creates corresponding problems with network latency and server load. Consequently, IBR techniques, such as Light Fields, have been largely ignored for remote visualization. In this paper we describe the application of Logistical Networking, a new approach to deploying storage as a shared communication resource, to create a remote visualization system based on Light Fields. Our system extends existing work by employing a modified method of parameterization and data organization that supports more efficient prefetching, caching and loss-less compression. Using this approach, we have been able to interactively browse multi-gigabyte, high-resolution Light Field databases across the Internet with latencies observed by the user that are comparable to local area network access.','');
INSERT INTO events_temp VALUES (3636,'2003-11-18','14:00:00','14:30:00','','Compilation Techniques','Automatic Type-Driven Library Generation for Telescoping Languages','John Feo (Sun Microsystems)','Arun Chauhan (Rice University), Cheryl McCosh (Rice University), Ken Kennedy (Rice University), Richard Hanson (Rice University)','Paper','Telescoping languages is a strategy to automatically generate highly-optimized domain-specific libraries. The key idea is to create specialized variants of library procedures through extensive offline processing. This paper describes a telescoping system, called ARGen, which generates high-performance Fortran or C libraries from prototype Matlab code for the linear algebra library, ARPACK. ARGen uses variable types to guide procedure specializations on possible calling contexts. \n\nARGen needs to infer Matlab types in order to speculate on the possible variants of library procedures, as well as to generate code. This paper shows that our type-inference system is powerful enough to generate all the variants needed for ARPACK automatically from the Matlab development code. The ideas demonstrated here provide a basis for building a more general telescoping system for Matlab.','');
INSERT INTO events_temp VALUES (3637,'2003-11-18','14:30:00','15:00:00','','Grid Support','A Self-Organizing Flock of Condors','Xian-He Sun (Illinois Institute of Technology )','Ali Raza Butt (Purdue University), Rongmei Zhang (Purdue University), Y. Charlie Hu (Purdue University)','Paper','Condor provides high throughput computing by leveraging idle-cycles on off-the-shelf desktop machines. It also supports flocking, a mechanism for sharing resources among Condor pools. Since Condor pools distributed over a wide area can have dynamically changing availability and sharing preferences, the current flocking mechanism based on static configurations can limit the potential of sharing resources across Condor pools. This paper presents a technique for resource discovery in distributed Condor pools using peer-to-peer mechanisms that are self-organizing, fault-tolerant, scalable, and locality-aware. Locality-awareness guarantees that applications are not shipped across long distances when nearby resources are available. Measurements using a synthetic job trace show that self-organized flocking reduces the maximum job wait time in queue for a heavily loaded pool by a factor of 10 compared to without flocking. Simulations of 1000 Condor pools are also presented and the results confirm that our technique discovers and utilizes physically nearby resources.','');
INSERT INTO events_temp VALUES (3638,'2003-11-18','14:30:00','15:00:00','','Compilation Techniques','Compiler Support for Exploiting Coarse-Grained Pipelined Parallelism','John Feo (Sun Microsystems)','Wei Du (Ohio State University), Renato Ferreira (Brasil), Gagan Agrawal (Ohio State University )','Paper','The emergence of grid and a new class of {em data-driven} applications is making a new form of parallelism desirable, which we refer to as {em coarse-grained pip elined} parallelism. Here, the computations associated with an application are carried out in several stages, which are executed on a pipeline of computing units. This paper reports on a compilation system developed to exploit this for m of parallelism. Our compiler is responsible for selecting a set of candidate {em filter boundaries}, determining the volume of communication required if a p articular boundary is chosen, performing the decomposition, and generating code in which each filter unpacks data from a received buffer, iterates over its element s, and packs and forwards a buffer to the next stage. The paper reports resul ts from a detailed evaluation of our current compiler using four data-driven appl ications.','');
INSERT INTO events_temp VALUES (3639,'2003-11-18','14:30:00','15:00:00','','Software Systems','Visualizing Large-Scale Earthquake Simulations','Eileen Kraemer (University of Georgia)','Kwan-Liu Ma (University of California at Davis), Aleksander Stompel (University of California at Davis), Jacobo Bielak (Carnegie Mellon University ), Omar Ghattas (Carnegie Mellon University), Eui Joong Kim (Carnegie Mellon University)','Paper','This paper presents a new parallel rendering algorithm and its performance for the visualization of time-varying unstructured volume data generated from very large-scale earthquake simulations. The algorithm is used to visualize 3D seismic wave propagation generated from a 0.5 Hz simulation of the Northridge earthquake, which is the highest resolution volume visualization of an earthquake simulation performed to date. This scalable high-fidelity visualization solution we provide to the scientists will allow them to explore in the temporal, spatial, and visualization domain of their data at high resolution. This new high resolution explorability, likely not presently available to most computational science groups, will help lead to many new insights.','');
INSERT INTO events_temp VALUES (3640,'2003-11-18','15:30:00','16:00:00','','Supercomputing Applications','Applications of Algebraic Multigrid to Large-Scale Finite Element Analysis of Whole Bone Micro-Mechanics on the IBM SP','John Gustafson (Sun Microsystems Inc.)','Mark F. Adams (Sandia National Laboratories), Harun H. Bayraktar (University of California, Berkeley), Tony M. Keaveny (University of California, Berkeley), Panayiotis Papadopoulos (University of California, Berkeley)','Paper','Accurate finite element analyses of whole bone require the solution of large sets of algebraic equations. Multigrid has proven to be an effective approach to the design of highly scalable linear solvers for solid mechanics problems. We present some of the first applications of scalable linear solvers, on massively parallel computers, to whole vertebral body structural analysis. We analyze the performance of our algebraic multigrid (AMG) methods on problems with over 537 million degrees of freedom on IBM SP (LLNL and SDSC) parallel computers. We demonstrate excellent parallel scalability, both in the algorithms and the implementations on IBM SPs, and analyze the nodal performance of the important AMG kernels on the IBM Power3 and Power4 architectures.','');
INSERT INTO events_temp VALUES (3641,'2003-11-18','15:30:00','16:00:00','','Tools and Services for Grids','Nondeterministic Queries in a Relational Grid Information Service','Roland Wismüller (LRR-TUM, Technische Universität München)','Peter Dinda (Northwestern University, Computer Science), Dong Lu (Northwestern University, Computer Science)','Paper','A Grid Information Service (GIS) stores information about the resources of a distributed computing environment and answers questions about it. We are developing RGIS, a GIS system based on the relational data model. RGIS users can write SQL queries that search for complex compositions of resources that meet collective requirements. Executing these queries can be very expensive, however. In response, we introduce the nondeterministic query, an extension to the SELECT statement, which allows the user (and RGIS) to trade off between the query\'s running time and the number of results. The results are a random sample of the deterministic results, which we argue is sufficient and appropriate. Herein we describe RGIS, the nondeterministic query extension, and its implementation. Our evaluation shows that a meaningful tradeoff between query time and results returned is achievable, and that the tradeoff can be used to keep query time largely independent of query complexity.','');
INSERT INTO events_temp VALUES (3642,'2003-11-18','15:30:00','16:00:00','','Performance and Reliability','Performance Comparison of MPI Implementations over InfiniBand, Myrinet and Quadrics','Bernd Mohr (Forschungszentrum Juelich)','Jiuxing Liu (The Ohio State University), Balasubramanian Chandrasekaran (The Ohio State University), Jiesheng Wu (The Ohio State University), Weihang Jiang (The Ohio State University), Sushmitha Kini (The Ohio State University), Weikuan Yu (The Ohio State University), Darius Buntinas (The Ohio State University), Pete Wyckoff (Ohio Supercomputer Center), D. K. Panda (The Ohio State University)','Paper','In this paper, we present a comprehensive performance comparison of MPI implementations over InfiniBand, Myrinet and Quadrics. Our performance evaluation consists of two major parts. The first part consists of a set of MPI level micro-benchmarks that characterize different aspects of MPI implementations. The second part of the performance evaluation consists of application level benchmarks. We have used the NAS Parallel Benchmarks and the sweep3D benchmark. We not only present the overall performance results, but also relate application communication characteristics to the information we acquired from the micro-benchmarks. Our results show that the three MPI implementations all have their advantages and disadvantages. For our 8-node cluster, InfiniBand can offer significant performance improvements for a number of applications compared with Myrinet and Quadrics when using the PCI-X bus. Even with just the PCI bus, InfiniBand can still perform better if the applications are bandwidth-bound.','');
INSERT INTO events_temp VALUES (3643,'2003-11-18','15:30:00','17:00:00','','Quality of Experience in Advanced Collaborative Environments','','Brian Corrie (New Media Innovation Centre - Immersive Media Lab, Canada)','Brian Corrie (New Media Innovation Centre - Immersive Media Lab, Canada), Andrew Patrick (IIT/NRC - Mutual Media Lab, Canada)','SC Global, BOF','Today’s digital media, interaction devices, and networking technologies have the opportunity to drastically alter the way people communicate and collaborate. Ubiquitous displays with advanced capabilities, combined with rich interaction methods, allow for extremely complex collaborative environments. In addition, modern networking allows us to have ad-hoc local communication between devices, wireless network connectivity, and very high-bandwidth, long-distance connectivity between remote sites. This combination of interaction and networking technologies allows us to connect interaction environments together in extremely complex ways. \n\nThe goal of an Advanced Collaborative Environment, however, is to bring together the right people and the right data at the right time in order to perform a complex task, solve problems, or simply discuss what is pressing at the time. It is not enough to be able to provide tools to users and hope that they use them effectively. It is through focusing on the individual and social needs of the users in the context of the collaboration task, that we will be able to adapt to the requirements of the situation and deliver the best Quality of Experience (QoE) to each user in the environment, regardless of task, technology, or individual. \n\nThis Birds of a Feather session will bring together individuals interested in human factors to explore the human and social needs of advanced collaborative environments. A round table discussion will be held with all participants having a brief opportunity to discuss their interests in this area. The goal of this BOF is to form the beginnings of a human factors group within the Access Grid collaboration community.','');
INSERT INTO events_temp VALUES (3644,'2003-11-18','16:00:00','16:30:00','','Supercomputing Applications','Parallel Multilevel Sparse Approximate Inverse Preconditioners in Large Sparse Matrix Computations','John Gustafson (Sun Microsystems Inc.)','Kai Wang (University of Kentucky), Jun Zhang (University of Kentucky), Chi Shen (University of Kentucky)','Paper','We investigate the use of the multistep successive preconditioning strategies (MSP) to construct a class of parallel multilevel sparse approximate inverse (SAI) preconditioners. We do not use independent set ordering, but a diagonal dominance based matrix permutation to build a multilevel structure. The purpose of introducing multilevel structure into SAI is to enhance the robustness of SAI for solving difficult problems. Forward and backward preconditioning iteration and two Schur complement preconditioning strategies are proposed to improve the performance and to reduce the storage cost of the multilevel preconditioners. One version of the parallel multilevel SAI preconditioner based on the MSP strategy is implemented. Numerical experiments for solving a few sparse matrices on a distributed memory parallel computer are reported.','');
INSERT INTO events_temp VALUES (3645,'2003-11-18','16:00:00','16:30:00','','Performance and Reliability','MPICH-V2: a Fault Tolerant MPI for Volatile Nodes based on Pessimistic Sender Based Message Logging','Bernd Mohr (Forschungszentrum Juelich)','Aurelien Bouteiller (CNRS-LRI), Franck Cappello (INRIA-LRI), Thomas Herault (CNRS-LRI), Geraud Krawezik (CNRS-LRI), Pierre Lemarinier (CNRS-LRI), Frederic Magniette (CNRS-LRI)','Paper','Execution of MPI applications on clusters and Grid deployments suffering from node and network failures motivates the use of fault tolerant MPI implementations. \n\nWe present MPICH-V2 (the second protocol of MPICH-V project), an automatic fault tolerant MPI implementation using an innovative protocol that removes the most limiting factor of the pessimistic message logging approach: reliable logging of in transit messages. MPICH-V2 relies on uncoordinated checkpointing, sender based message logging and remote reliable logging of message logical clocks. \n\nThis paper presents the architecture of MPICH-V2, its theoretical foundation and the performance of the implementation. We compare MPICH-V2 to MPICH-V1 and MPICH-P4 evaluating a) its point-to-point performance, b) the performance for the NAS benchmarks, c) the application performance when many faults occur during the execution. Experimental results demonstrate that MPICH-V2 provides performance close to MPICH-P4 for applications using large messages while reducing dramatically the number of reliable nodes compared to MPICH-V1.','');
INSERT INTO events_temp VALUES (3646,'2003-11-18','16:00:00','16:30:00','','Tools and Services for Grids','Optimizing Reduction Computations In a Distributed Environment','Roland Wismüller (LRR-TUM, Technische Universität München)','Tahsin Kurc (Ohio State University), Feng Lee (Ohio State University), Gagan Agrawal (Ohio State University), Umit Catalyurek (Ohio State University), Renato Ferreira (Ohio State University), Joel Saltz (Ohio State University)','Paper','We investigate runtime strategies for data-intensive applications that involve generalized reductions on large, distributed datasets. Our set of strategies includes replicated filter state, partitioned filter state, and hybrid options between these two extremes. We evaluate these strategies using emulators of three real applications, different query and output sizes, and a number of configurations. We consider execution in a homogenous cluster and in a distributed environment where only a subset of nodes host the data. Our results show replicating the filter state scales well and outperforms other schemes, if sufficient memory is available and sufficient computation is involved to offset the cost of global merge step. In other cases, hybrid is usually the best. Moreover, in almost all cases, the performance of the hybrid strategy is quite close to the best strategy. Thus, we believe that hybrid is an attractive approach when the relative performance of different schemes cannot be predicted.','');
INSERT INTO events_temp VALUES (3647,'2003-11-18','16:30:00','17:00:00','','Tools and Services for Grids','Job Scheduler Architecture and Performance in Computational Grid Environments','Roland Wismüller (LRR-TUM, Technische Universität München)','Hongzhang Shan (Lawrence Berkeley National Laboratory), Leonid Oliker (Lawrence Berkeley National Laboratory), Rupak Biswas (NASA Ames Research Center)','Paper','Computational grids hold great promise in utilizing geographically separated heterogeneous resources to solve large-scale complex scientific problems. However, a number of major technical hurdles, including distributed resource management and effective job scheduling, stand in the way of realizing these gains. In this paper, we propose a novel grid superscheduler architecture and three distributed job migration algorithms. We also model the critical interaction between the superscheduler and autonomous local schedulers. Extensive performance comparisons with ideal, central, and local schemes using real workloads from leading computational centers are conducted in a simulation environment. Additionally, synthetic workloads are used to perform a detailed sensitivity analysis of our superscheduler. Several key metrics demonstrate that substantial performance gains can be achieved via smart superscheduling in distributed computational grids.','');
INSERT INTO events_temp VALUES (3648,'2003-11-18','16:30:00','17:00:00','','Performance and Reliability','Hierarchical Dynamics, Interarrival Times, and Performance','Bernd Mohr (Forschungszentrum Juelich)','Stephen D Kleban (Sandia National Laboratories), Scott H Clearwater (Sandia National Laboratories)','Paper','We report on a model of the distribution of job submission interarrival times in supercomputers. Interarrival times are modeled as a consequence of a complicated set of decisions between users, the queuing algorithm, and other policies. This cascading hierarchy of decision-making processes leads to a particular kind of heavy-tailed distribution. Specifically, hierarchically constrained systems suggest that fatter tails are due to more levels coming into play in the overall decision-making process. The key contribution of this paper is that heavier tails resulting from more complex decision-making processes, that is more hierarchical levels, will lead to overall worse performance, even when the average interarrival time is the same. Finally, we offer some suggestions for how to overcome these issues and the tradeoffs involved.','');
INSERT INTO events_temp VALUES (3649,'2003-11-18','16:30:00','17:00:00','','Supercomputing Applications','Parallel Particle-In-Cell Simulation of Colliding Beams in High Energy Accelerators','John Gustafson (Sun Microsystems Inc.)','Ji Qiang (Lawrence Berkeley National Laboratory), Miguel A. Furman (Lawrence Berkeley National), Robert D. Ryne (Lawrence Berkeley National Laboratory)','Paper','In this paper we present a self-consistent simulation model of colliding beams in high energy accelerators. The model, which is based on a particle-in-cell method, uses a new developed shifted-Green function algorithm for the efficient calculation of the beam-beam interaction. In the parallel implementation we studied various strategies to deal with the particular nature of the colliding beam system -- a system in which there can be significant particle movement between beam-beam collisions. We chose a particle-field decomposition approach instead of the conventional domain decomposition or particle decomposition approach. The particle-field approach leads to good load balance, reduced communication cost, and shows the best scalability on an IBM SP3 among the three parallel implementations. A performance test of the beam-beam model on a Cray T3E, IBM SP3, and a PC cluster is presented. As an application, we studied the effect of long-range collisions on antiproton lifetime in the Fermilab Tevatron.','');
INSERT INTO events_temp VALUES (3650,'2003-11-19','13:30:00','14:00:00','','Performance Analysis and Modeling','The Case of the Missing Supercomputer Performance: Achieving Optimal Performance on the 8,192 Processors of ASCI Q','Adolfy Hoisie (Los Alamos National Lab)','Fabrizio Petrini (Los Alamos National Laboratory), Darren J. Kerbyson (Los Alamos National Laboratory), Scott Pakin (Los Alamos National Laboratory)','Paper','In this paper we describe how we improved the effective performance of ASCI Q, the world\'s second-fastest supercomputer, to meet our expectations. Using an arsenal of performance-analysis techniques including analytical models, custom microbenchmarks, full applications, and simulators, we succeeded in observing a serious -- but previously undetectable -- performance problem. We identified the source of the problem, eliminated the problem, and \"closed the loop\" by demonstrating improved application performance. We present our methodology and provide insight into performance analysis that is immediately applicable to other large-scale cluster-based supercomputers.','');
INSERT INTO events_temp VALUES (3651,'2003-11-19','13:30:00','14:00:00','','Gordon Bell Computational Methods','High Resolution Forward and Inverse Earthquake Modeling on Terascale Computers','William Gropp (Argonne National Laboratory)','Volkan Akcelik (Carnegie Mellon University), Jacobo Bielak (Carnegie Mellon University), George Biros (Courant Institute, New York University), Ioannis Epanomeritakis (Carnegie Mellon University), Antonio Fernandez (Carnegie Mellon University), Omar Ghattas (Carnegie Mellon University), Eui Joong Kim (Carnegie Mellon University), David O\'Hallaron (Carnegie Mellon University), Tiankai Tu (Carnegie Mellon University)','Paper','For earthquake simulations to play an important role in the reduction of seismic risk, they must be capable of high resolution and high fidelity. We have developed algorithms and tools for earthquake simulation based on multiresolution hexahedral meshes. We have used this capability to carry out 1 Hz simulations of the 1994 Northridge earthquake in the LA Basin using 100 million grid points. Our code sustains 0.9 teraflop/s for 12 hours on 2048 AlphaServer processors at 87% parallel efficiency. Because of uncertainties in characterizing earthquake source and basin material properties, a critical remaining challenge is to invert for source and material parameter fields for complex 3D basins from records of past earthquakes. Towards this end, we present results for material and source inversion of high-resolution models of basins undergoing antiplane motion using parallel scalable inversion algorithms that overcome many of the difficulties particular to inverse heterogeneous wave propagation problems.','');
INSERT INTO events_temp VALUES (3652,'2003-11-19','13:30:00','14:00:00','','Runtime Systems','ParADE: An OpenMP Programming Environment for SMP Cluster Systems','Mary Thomas (University of Texas)','Yang-Suk Kee (Dr.), Jin-Soo Kim (Prof.), Soonhoi Ha (Prof.)','Paper','The demands for programming environments to exploit clusters of symmetric multiprocessors (SMPs) are increasing. In this paper, we present a new programming environment, called ParADE, to enable easy, portable, and high-performance programming on SMP clusters. It is an OpenMP programming environment on top of a multi-threaded software distributed shared memory (SDSM) system with a variant of home-based lazy release consistency (HLRC). To boost performance, the ParADE runtime system provides explicit message-passing primitives to make it a hybrid-programming environment. Collective communication primitives are used for the synchronization and work-sharing directives associated with small variables, lessening the synchronization overhead and avoiding the implicit barriers of work-sharing directives. The OpenMP translator bridges the gap between the OpenMP abstraction and the hybrid programming interfaces of the runtime system. The experiments with several NAS benchmarks and real applications on a Linux-based cluster show promising results, overcoming the poor performance of the conventional SDSM-based approaches.','');
INSERT INTO events_temp VALUES (3653,'2003-11-19','13:30:00','15:00:00','','Where Should the Access Grid Go After Version 2.x?','','Jennifer Teig von Hoffman (Boston University)','Joyce F. Williams-Green (Winston-Salem State University), Ian Foster (Argonne National Laboratory), Daniel A. Reed (NCSA/Alliance), Ulrich Lang (High Performance Computing Center Stuttgart (HLRS) )','SC Global, Panel','This panel gathers some of the top thinkers in grid technologies, all of whom have extensive experience with the Access Grid, to discuss their visions for this collaborative technology. Discussion will include (but not be limited to) questions about necessary technical enhancements to the core Access Grid Toolkit, applications which should be built based on this Toolkit, and new directions in which the AG community must expand to reach its potential.','');
INSERT INTO events_temp VALUES (3654,'2003-11-19','10:30:00','11:00:00','','Networking','A Configurable Network Protocol for Cluster Based Communications using Modular Hardware Primitives on an Intelligent NIC','Dhabaleswar Panda (The Ohio State University)','Ranjesh G. Jaganathan (Clemson University), Keith D. Underwood (Sandia National Laboratories), Ron R. Sass (Clemson University)','Paper','The high overhead of generic protocols like TCP/IP provides strong motivation for the development of a better protocol architecture for cluster-based parallel computers. Reconfigurable computing has a unique opportunity to contribute hardware level protocol acceleration while retaining the flexibility to adapt to changing needs. This paper focuses on work to create a set of parameterizable components that can be put together as needed to obtain a customized protocol for each application. To study the feasibility of such an architecture, hardware components were built that can be stitched together as needed to provide the required functionality. Feasibility is demonstrated using four different protocol configurations, namely: (1) unreliable packet transfer; (2) reliable, unordered message transfer without duplicate elimination; (3) reliable, unordered message transfer with duplicate elimination; and (4) reliable, ordered message transfer with duplicate elimination. The different configurations illustrate trade-offs between chip space and functionality while reducing processor overhead.','');
INSERT INTO events_temp VALUES (3655,'2003-11-19','10:30:00','11:00:00','','Performance Programming','SCALLOP: A Highly Scalable Parallel Poisson Solver in Three Dimensions','Robert Lucas (USC/ISI)','Gregory T. Balls (University of California, San Diego/SDSC), Scott B. Baden (University of California, San Diego), Phillip Colella (Lawrence Berkeley National Laboratory)','Paper','SCALLOP is a highly scalable solver and library for elliptic partial differential equations on regular block-structured domains. SCALLOP avoids high communication overheads algorithmically by taking advantage of the locality properties inherent to solutions to elliptic PDEs. Communication costs are small, on the order of a few percent of the total running time on up to 1024 processors of NPACI\'s and NERSC\'s IBM Power-3 SP sytems. SCALLOP trades off numerical overheads against communication. These numerical overheads are independent of the number of processors for a wide range of problem sizes. SCALLOP is implicitly designed for infinite domain (free space) boundary conditions, but the algorithm can be reformulated to accommodate other boundary conditions. The SCALLOP library is built on top of the KeLP programming system and runs on a variety of platforms.','');
INSERT INTO events_temp VALUES (3656,'2003-11-19','10:30:00','11:15:00','','Showcase II','Bioinformatics Scientific Workspace of the Future','Natalia Maltsev (Argonne National Laboratory)','Natalia Maltsev (Argonne National Laboratory)','SC Global, Showcase','The Alliance Scientific Workspaces of the Future expedition is designed to create partnerships between technology developers and end users to deploy and further develop next generation high-end collaborative and network based scientific visualization tools and systems designed to meet the specific needs of distributed applications communities. The SWOF expedition uses the Access Grid 2.0 virtual venue as an organizing resource for the various technologies and applications data. Technologies include large scale tiled displays, passive 3D displays, distributed rendering code, personal and large scale Access Grid nodes, speech to text services, portals technology and the Access Grid 2.0 software toolkit. The AG 2.0 venue provides a services-based infrastructure for deploying, locating and using third party supplied web services.\n\nThe bioinformatics SWOF expedition uses this technology to create a virtual collaboratory. A bioinformatics venue has been created and is used to store data, applications, workflows, documents, results, data base pointers and node specific services such as tiled displays and 3D displays. Collaborators from Argonne National Laboratory, Oak Ridge National Laboratory, the Medical College of Wisconsin and NCSA have contributed to and make use of the collaboratory. The bioinformatics group at Argonne National Laboratory are creating an Access Grid enabled bioinformatics portal to front end a high throughput genomic server.\n\nWe propose an SCGlobal showcase demonstration of the bioinformatics SWOF technology. The demonstration will take the form of bioinformatics experts and biologists using their tools in a collaborative way to research a biology problem. We anticipate demonstrating the use of portal technology to run sequences through the high throughput server, the use of high end displays for visualization of results, the use of the AG 2.0 collaborative tools for analysis and documentation and last, the demonstration of the effectiveness of using these technologies to facilitate remote collaboration.','');
INSERT INTO events_temp VALUES (3657,'2003-11-19','11:00:00','11:30:00','','Performance Programming','Parallel Iterative Solvers of GeoFEM with Selective Blocking Preconditioning for Nonlinear Contact Problems on the Earth Simulator','Robert Lucas (USC/ISI)','KENGO NAKAJIMA (RIST)','Paper','An efficient parallel iterative method with selective blocking preconditioning has been developed for symmetric multiprocessor (SMP) cluster architectures with vector processors such as the Earth Simulator. This method is based on a three-level hybrid parallel programming model, which includes message passing for inter-SMP node communication, loop directives by OpenMP for intra-SMP node parallelization and vectorization for each processing element (PE). This method provides robust and smooth convergence and excellent vector and parallel performance in 3D geophysical simulations with contact conditions performed on the Earth Simulator. The selective blocking preconditioning is much more efficient than ILU(1) and ILU(2). Performance for the complicated Southwest Japan model with more than 23 M DOF on 10 SMP nodes (80 PEs) of the Earth Simulator was 161.7 GFLOPS, corresponding to 25.3% of the peak performance for hybrid programming model, and 190.4 GFLOPS (29.8% of the peak performance) for flat MPI, respectively.','');
INSERT INTO events_temp VALUES (3658,'2003-11-19','11:00:00','11:30:00','','Networking','Optimizing 10-Gigabit Ethernet for Network of Workstations, Clusters, and Grids','Dhabaleswar Panda (The Ohio State University)','Wu-chun Feng (Los Alamos National Laboratory), Justin (Gus) Hurwitz (Los Alamos National Laboratory), Harvey B. Newman (California Institute of Technology), Sylvain Ravot (California Institute of Technology), Roger Les Cottrell (Stanford Linear Accelerator Center), Olivier Martin (CERN), Fabrizio Coccetti (Stanford Linear Accelerator Center), Cheng Jin (California Institute of Technology), David Weig (California Institute of Technology), Steven Low (California Institute of Technology)','Paper','This paper presents a case study of the 10-Gigabit Ethernet (10GigE) adapter from Intel. Specifically, with appropriate optimizations to the configurations of the 10GigE adapter and TCP, we demonstrate that the 10GigE adapter can perform well in local-area, storage-area, system-area, and wide-area networks.\n\nIn local-area, storage-area, and system-area networks, we achieved over 4-Gb/s end-to-end throughput and 20-us end-to-end latency between applications running on less capable, lower-end PCs. In the wide-area network, we broke the recently-set Internet2 Land Speed Record by 2.5 times by sustaining an end-to-end TCP/IP throughput of 2.38 Gb/s between Sunnyvale, California and Geneva, Switzerland (i.e., 10,037 kilometers). Thus, the above results indicate that 10GigE may be a cost-effective solution across a multiude of network environments.','');
INSERT INTO events_temp VALUES (3659,'2003-11-19','11:15:00','12:00:00','','Showcase II','Grid and Web services for problem solving environments - Towards collaborative visualisation','Lakshmi Sastry (CCLRC Rutherford Appleton Laboratory, UK)','Lakshmi Sastry (CCLRC Rutherford Appleton Laboratory, UK), John Brooke (University of Manchester)','SC Global, Showcase','Much research and development expertise had gone into developing generic as well as domain specific data analysis, computational steering and visualisation software tools. The use of such tools has become an integral part of modern scientific research. Consequently much resources have been invested in the training, and learning of such tools by individual and teams of scientists who have also developed highly customised, robust data analysis applications using these tools that form the core of their daily scientific activity. At the same time, advances in experimental techniques, instrumentation and high performance computing produce massive datasets from the simulation of physical processes. The scalability requirements to handle such massive datasets for data analysis tasks are beyond the capabilities of their existing data analysis and visualisation systems and algorithms. This is especially true of a large multidisciplinary research support organisations such as the Central Laboratory of the Research Councils (CCLRC) which operates several large scale scientific facilities for the UK research and industrial communities including accelerators, lasers, telescopes, satellites and supercomputers alongside its active participation in scientific research in astronomy, biology, chemistry, environmental science and physics. The users and scientific partners of these large scale facilities are situated across the world.
The data analysis challenge is to cater to such a wide range of complex multidisciplinary high performance applications that use myriad methodologies and software packages that ambitiously aim to process, visualise and produce gigabytes of data at every finely discretised spatio-temporal and other parametric space of the problem domain. The motivation behind our Web and Grid services based applications and visualisation services is to provide a generic toolkit that can be used to harness the power of the Grid computing and the dynamic flexibility of the Web/Grid services based architecture and make it available to familiar data analysis and problem solving environments. This Grid Applications Portals Toolkit (GAPtk – http://ws1.esc.rl.ac.uk/web/projects/gaptk ) toolkit will provide utilities, services, protocols and configurable high-level application programming interfaces. These can be accessed via customised self-contained application portals built using domain specific tools such as MATLAB, CDAT and Live Access Server. There is also the increasing requirement to explore the ways to exploit emerging technologies such as Access Grid (http://www.accessgrid.org) for collaborative working for increasing productivity of geographically distributed project partnerships.','');
INSERT INTO events_temp VALUES (3660,'2003-11-19','11:30:00','12:00:00','','Performance Programming','Multi-Constraint Mesh Partitioning for Contact/Impact Computations','Robert Lucas (USC/ISI)','George Karypis (Department of Computer Science & Engineering, University of Minnesota)','Paper','We present a novel approach for decomposing contact/impact computations. Effective decomposition of these computations poses a number of challenges as it needs to both balance the computations and minimize the amount of communication that is performed during the finite element and the contact search phase. Our approach achieves the first goal by partitioning the underlying mesh such that it simultaneously balances both the work that is performed during the finite element phase and that performed during contact search phase, while producing subdomains whose boundaries consist of piecewise axes-parallel lines or planes. The second goal is achieved by using a decision tree to decompose the space into rectangular or box-shaped regions that contain contact points from a single partition. Our experimental evaluation on a sequence of 100 meshes, shows that this new approach can reduce the overall communication overhead over existing algorithms.','');
INSERT INTO events_temp VALUES (3661,'2003-11-19','11:30:00','12:00:00','','Networking','Scalable Hardware-Based Multicast Trees','Dhabaleswar Panda (The Ohio State University)','Salvador Coll (Technical University of Valencia), Jose Duato (Technical University of Valencia), Fabrizio Petrini (Los Alamos National Laboratory), Francisco J. Mora (Technical University of Valencia)','Paper','This paper presents an algorithm for implementing optimal hardware-based multicast trees, on networks that provide hardware support for collective communication. Although the underlying methodology is general enough to be applied in other, present and future, technologies, the Quadrics network has been chosen as state-of-the-art interconnect where applying hardware-based multicast trees. The proposed mechanism is intended to improve the performance of collective communication patterns, in those cases where the hardware support cannot be directly used, for instance, due to some faulty nodes. This scheme provides significant reduction on multicast latencies compared to the system primitives, which use multicast trees based on unicast communication. A backtracking algorithm to find the optimal solution to the problem is presented. In addition, a greedy algorithm is presented and shown to provide near-optimal solutions. Finally, our experimental results show the good performance and scalability of the proposed multicast tree in comparison to traditional unicast-based multicast trees.','');
INSERT INTO events_temp VALUES (3662,'2003-11-19','14:00:00','14:30:00','','Runtime Systems','Dyn-MPI: Supporting MPI on Non Dedicated Clusters','Mary Thomas (University of Texas)','D. Brent Weatherly (University of Georgia), David K. Lowenthal (University of Georgia), Mario Nakazawa (University of Georgia), Franklin Lowenthal (California State University -- Hayward)','Paper','Distributing data is a fundamental problem in implementing efficient distributed-memory parallel programs. The problem becomes more difficult in environments where the participating nodes are not dedicated to a parallel application. We are investigating the data distribution problem in non dedicated environments in the context of explicit message-passing programs. \n\nTo address this problem, we have designed and implemented an extension to MPI called Dynamic MPI (Dyn-MPI). The key component of Dyn-MPI is its run-time system, which efficiently and automatically redistributes data on the fly when there are changes in the application or the underlying environment. Dyn-MPI supports efficient memory allocation, precise measurement of system load and computation time, and node removal. Performance results show that programs that use Dyn-MPI execute efficiently in non dedicated environments, including up to almost a three-fold improvement compared to programs that do not redistribute data and a 25% improvement over standard adaptive load balancing techniques.','');
INSERT INTO events_temp VALUES (3663,'2003-11-19','14:00:00','14:30:00','','Performance Analysis and Modeling','Early Evaluation of the Cray X1','Adolfy Hoisie (Los Alamos National Lab)','Thomas H. Dunigan, Jr. (Oak Ridge National Laboratory), Mark R. Fahey (Oak Ridge National Laboratory), James B. White III (Oak Ridge National Laboratory), Patrick H. Worley (Oak Ridge National Laboratory)','Paper','Oak Ridge National Laboratory installed a 32 processor Cray X1 in March, 2003, and will have a 256 processor system installed by October, 2003. In this paper, we describe our initial evaluation of the X1 architecture, focusing on microbenchmarks, kernels, and application codes that highlight the performance characteristics of the X1 architecture and indicate how to use the system most efficiently.','');
INSERT INTO events_temp VALUES (3664,'2003-11-19','14:00:00','14:30:00','','Gordon Bell Computational Methods','IPSAP : A High-performance Parallel Finite Element Code for Large-scale Structural Analysis Based on Domain-wise Multifrontal Technique','William Gropp (Argonne National Laboratory)','Seung Jo Kim (Department of Aerospace Engineering, Seoul National University, KOREA), Chang Sung Lee (Department of Aerospace Engineering, Seoul National Universtiy, KOREA), Jeong Ho Kim (High Performance Computing and Networking Supercomputing Center, Korea), Minsu Joh (High Performance Computing and Networking Supercomputing Center, Korea), Sangsan Lee (High Performance Computing and Networking Supercomputing Center, Korea)','Paper','Most of researches for large-scale parallel structural analysis have focused on iterative solution methods since direct solution methods generally have many difficulties and disadvantages for large-scale problems. However, due to the numerical robustness of direct methods that guarantees the solution to be obtained within estimated time, direct methods are much more desirable for general application of large-scale structural analysis, if the difficulties and disadvantages can be overcome. In this research, we propose the domain-wise multifrontal solver as an efficient direct solver that can overcome most of these difficulties and disadvantages. By using our own structural analysis code IPSAP which uses the proposed solver, we can solve the largest problem ever solved by direct solvers and can sustain 191 Gflop/s with 256 CPUs on our self-made cluster system, Pegasus. By implementing the block Lanczos algorithm using our solver, IPSAP can solve eigen problems with 7 millions of DOFs within one hour.','');
INSERT INTO events_temp VALUES (3665,'2003-11-19','14:30:00','15:00:00','','Performance Analysis and Modeling','Evaluation of Cache-based Superscalar and Cacheless Vector Architectures for Scientific Computations','Adolfy Hoisie (Los Alamos National Lab)','Leonid Oliker (Lawrence Berkeley National Laboratory), Andrew Canning (Lawrence Berkeley National Laboratory), Jonathan Carter (Lawrence Berkeley National Laboratory), John Shalf (Lawrence Berkeley National Laboratory), David Skinner (Lawrence Berkeley National Laboratory), Stephane Ethier (Princeton University), Rupak Biswas (NASA Ames Research Center), Jahed Djomehri (Computer Sciences Corporation), Rob Van der Wijngaart (Computer Sciences Corporation)','Paper','The growing gap between sustained and peak performance for scientific applications is a well-known problem in high end computing. The recent development of parallel vector systems offers the potential to bridge this gap for many computational science codes and deliver a substantial increase in computing capabilities. This paper examines the intranode performance of the NEC SX-6 vector processor and the cache-based IBM Power3/4 superscalar architectures across a number of scientific computing areas. First, we present the performance of a microbenchmark suite that examines low-level machine characteristics. Next, we study the behavior of the NAS Parallel Benchmarks. Finally, we evaluate the performance of several scientific computing codes. Results demonstrate that the SX-6 achieves high performance on a large fraction of our applications and often significantly outperforms the cache-based architectures. However, certain applications are not easily amenable to vectorization and would require extensive algorithm and implementation reengineering to utilize the SX-6 effectively.','');
INSERT INTO events_temp VALUES (3666,'2003-11-19','14:30:00','15:00:00','','Runtime Systems','An Evaluation of a Framework for the Dynamic Load Balancing of Highly Adaptive and Irregular Parallel Applications','Mary Thomas (University of Texas)','Kevin J. Barker (College of William and Mary), Nikos P. Chrisochoides (College of William and Mary)','Paper','We present an evaluation of a flexible framework and runtime software system for the dynamic load balancing of asynchronous and highly adaptive and irregular applications. These applications, which include parallel unstructured and adaptive mesh refinement, serve as building blocks for a large class of scientific applications. Extensive study has lead to the development of solutions to the dynamic load balancing problem for loosely synchronous and computation intensive programs; however, these methods are not suitable for asynchronous and highly adaptive applications. We evaluate a new software framework which includes support for an Active Messages style communication mechanism, global name space, transparent object migration, and preemptive decision making. Our results from both a 3-dimensional parallel advancing front mesh generation program, as well as a synthetic micro-benchmark, indicate that this new framework out-performs two existing general-purpose, well-known, and widely used software systems for the dynamic load balancing of adaptive and irregular parallel applications.','');
INSERT INTO events_temp VALUES (3667,'2003-11-19','14:30:00','15:00:00','','Gordon Bell Computational Methods','A new parallel kernel-independent fast multipole method','William Gropp (Argonne National Laboratory)','Lexing Ying (New York University), George Biros (New York University), Denis Zorin (New York University), Harper Langston (New York University)','Paper','We present a new adaptive fast multipole algorithm and its parallel implementation. The algorithm is kernel-independent in the sense that the acceleration of the computation of the far field does not rely on any analytic expansions, but only uses kernel evaluations. The new method enables scalable simulations for many important problems in science and engineering. Examples include viscous flows, fracture mechanics and screened Coulombic interactions. Our MPI based parallel implementation logically separates the computation and communication phases to avoid synchronization in the upward and downward computation passes, and it enables us to fully exploit computation and communication overlapping. We measure isogranular and fixed-size scalability for a variety of kernels on the Pittsburgh Supercomputing Center\'s TCS-1 Alphaserver on up to 2048 processors. Our largest experiments reached 1.2 billion unknowns, for which we have achieved 1 Tflops/s peak performance and 0.7 Tflops/s sustained performance. Overall, our implementation achieves excellent parallel efficiency.','');
INSERT INTO events_temp VALUES (3668,'2003-11-19','15:30:00','16:00:00','','Algorithms and Programming','A Million-Fold Speed Improvement in Genomic Repeats Detection','Srinivas Aluru (Iowa State University)','John W. Romein (Vrije Universiteit, Amsterdam), Jaap Heringa (Vrije Universiteit, Amsterdam), Henri E. Bal (Vrije Universiteit, Amsterdam)','Paper','This paper presents a novel, parallel algorithm for generating top alignments. Top alignments are used for finding internal repeats in biological sequences like proteins and genes. Our algorithm replaces an older, sequential algorithm (Repro), which was prohibitively slow for sequence lengths higher than 2000. The new algorithm is an order of magnitude faster (O(n^3) rather than O(n^4)). \n\nThe paper presents a three-level parallel implementation of the algorithm: using SIMD multimedia extensions found on present-day processors (a novel technique that can be used to parallelize any application that performs many sequence alignments), using shared-memory parallelism, and using distributed-memory parallelism. It allows processing the longest known proteins (nearly 35000 amino acids). We show exceptionally high speed improvements: between 548 and 889 on a cluster of 64 dual-processor machines, compared to the new sequential algorithm. Especially for long sequences, extreme speed improvements over the old algorithm are obtained.','');
INSERT INTO events_temp VALUES (3669,'2003-11-19','15:30:00','16:00:00','','Gordon Bell Performance Evaluation','Performance evaluation and tuning of GRAPE-6 --- towards 40 \"real\" Tflops','David Bailey (LBNL)','Junichiro Makino (Department of Astronomy, School of Science, University of Tokyo), Eiichiro Kokubo (National Astronomical Observatory of Japan), Toshiyuki Fukushige (Department of General System Studies, College of Arts and Sciences, University of Tokyo), Hiroshi Daisaka (Department of Astronomy, School of Science, University of Tokyo)','Paper','In this paper, we describe the performance characteristics of GRAPE-6, the sixth-generation special-purpose computer for gravitational many-body problems. GRAPE-6 consists of 2048 custom pipeline chips, each of which integrates six pipeline processors specialized for the calculation of gravitational interaction between particles. The GRAPE hardware performs the evaluation of the interaction. The frontend processors perform all other operations, such as the time integration of the orbits of particles, I/O, on-the-fly analysis etc. The theoretical peak speed of GRAPE-6 is 63.4 Tflops. We present the result of benchmark runs, and discuss the performance characteristics. We also present the measured performance for a few real scientific applications. The best performance so far achieved with real applications is 35.3 Tflops.','');
INSERT INTO events_temp VALUES (3670,'2003-11-19','15:30:00','17:00:00','','High Performance Computing System Performance Modeling','','Larry Davis (DoD High Performance Computing Modernization Program)','Allan Snavely (SDSC), Jack Dongarra (University of Tennessee), Walt Brooks (NASA Ames), David Bailey (NERSC), Henry Newman (Instrumental), John McCalpin (IBM)','Panel','This panel consists of government and government-sponsored researchers in the area of high performance computing system performance modeling. Benchmarks are structured to address a range of target configurations. They provide the High Performance Computing Modernization Program (HPCMP) with accurate performance information on available high performance computing (HPC) capabilities. The intent is to provide a set of program source code listings, makefiles, runtime scripts, input files, and validated results files which represents the type of computational work performed on HPC resources. Questions addressed by the panelists include:\n\n1. Describe a benchmark structure.\nThe benchmark is divided into 2 parts (1) hardware performance and systems tests and (2) applications test.\n\n2. Describe a synthetic performance test.\nThe tests are to be run one time with a standard scheduler w/no changes to the default priorities. Special rules apply to I/O tests.\n\n3. Describe application tests.\nMultiple test cases using a suite of codes that are to be run using the standard system scheduler.\n\n4. Allowed changes.<\nVendors are only allowed to change the source code to the extent needed to get the program to execute and provide correct output.','');
INSERT INTO events_temp VALUES (3671,'2003-11-19','15:30:00','17:00:00','','Use of Collaborative Technologies: Artistic and Cultural Instincts','Informed use and development of collaborative technologies','Kelli Robyn Dipple (University of Manchester and Queensland University of Technology)','Kelli Robyn Dipple (University of Manchester and Queensland University of Technology)','SC Global, Showcase','A distributed presentation / panel involving key speaker presentations and group discussion, remotely from Manchester UK, Brisbane Australia, and Gainsville Florida. Discussion will focus on research and workshop activities undertaken over Access Grid facilities in Brisbane, Sydney, Manchester, Gainsville and Amsterdam, throughout 2002 - 2003. In Pheonix, audiences will have a screen based experience of the discussion with distributed video documentation and website.\n\nDiscussion topics: Informed use and development of collaborative technologies \n1. The breadth of communication technologies available. Where does the Access Grid sit in relation to the larger context of available communication technologies. Parallel developments for realtime communication using hand held devices and wireless connectivity. The convergent evolution and hybrid systems. \n2. Practice, development, form and presentation on the Access Grid. What are the limitations and expansions? \n3. The consequences of distribution in juxtaposition to the advantages. Its impact on socialization, relational expectations and effective communication. \n4. What does this mean for artists and for scientists? What kind of contribution do the arts have towards the development of scientific tools?\n\nDocumented research projects for presentation : \n1. Navigating Gravity - distributed performance, multiple site scripting and access grid aesthetics, research and development project (Gainsville, Manchester and Sydney). \n2. Telematics and networking performance workshops over the grid (Brisbane, Amsterdam, Sydney) \n3. SC Global 02 / remote panel session on multiple site live events and comparative methodologies for simultaneous distributions (University of Manchester and University of Sydney Viz Lab)','');
INSERT INTO events_temp VALUES (3672,'2003-11-19','16:00:00','16:30:00','','Gordon Bell Performance Evaluation','A 14.6 billion degrees of freedom, 5 teraflops, 2.5 terabyte earthquake simulation on the Earth Simulator','David Bailey (LBNL)','Dimitri Komatitsch (California Institute of Technology), Seiji Tsuboi (Institute for Frontier Research on Eath Evolution, JAMSTEC), Chen Ji (California Institute of Technology), Jeroen Tromp (California Institute of Technology)','Paper','We use 1944 processors of the Earth Simulator to model seismic wave propagation resulting from large earthquakes. Simulations are conducted based upon the spectral-element method, a high-degree finite-element technique with an exactly diagonal mass matrix. We use a very large mesh with 5.5 billion grid points (14.6 billion degrees of freedom). We include the full complexity of the Earth, i.e., a three-dimensional wave-speed and density structure, a 3-D crustal model, ellipticity as well as topography and bathymetry. A total of 2.5 terabytes of memory is needed. Our implementation is purely based upon MPI, with loop vectorization on each processor. We obtain an excellent vectorization ratio of 99.3%, and we reach a performance of 5 teraflops (30% of the peak performance) on 38% of the machine. The very high resolution of the mesh allows us to perform fully three-dimensional calculations at seismic periods as low as 5 seconds.','');
INSERT INTO events_temp VALUES (3673,'2003-11-19','16:00:00','16:30:00','','Algorithms and Programming','GridSAT: A Chaff-based Distributed SAT Solver for the Grid','Srinivas Aluru (Iowa State University)','Wahid Chrabakh (UC Santa Barbara), Rich Wolski (UC Santa Barbara)','Paper','We present GridSAT, a parallel and complete satisfiability solver designed to solve non-trivial SAT problem instances using a large number of widely distributed and heterogeneous resources. \n\nThe GridSAT parallel algorithm uses intelligent backtracking, distributed and carefully scheduled sharing of learned clauses, and clause reduction. Our implementation focuses on dynamic resource acquisition and release to optimize application execution. We show how the large number of computational resources that are available from a Grid can be managed effectively for the application by an automatic scheduler and effective implementation. GridSAT execution speed is compared against the best sequential solver as rated by the SAT2002 competition using a wide variety of problem instances. The results show that GridSAT delivers speed-up for all but one of the test problem instances that are of significant size. In addition, we describe how GridSAT has solved previously unsolved satisfiability problems and the domain science contribution these results make.','');
INSERT INTO events_temp VALUES (3674,'2003-11-19','16:30:00','17:00:00','','Gordon Bell Performance Evaluation','The Space Simulator: Modeling the Universe from Supernovae to Cosmology','David Bailey (LBNL)','Michael S. Warren (LANL), Chris L. Fryer (LANL), M. Patrick Goda (LANL)','Paper','The Space Simulator is a 294-processor Beowulf cluster with theoretical peak performance just below 1.5 Teraflop/s. It is based on the Shuttle XPC SS51G mini chassis. Each node consists of a 2.53 GHz Pentium 4 processor, 1 Gb of 333 MHz DDR SDRAM, an 80 Gbyte Maxtor hard drive, and a 3Com 3C996B-T gigabit ethernet card. The network is made up of a Foundry FastIron 1500 and 800 Gigabit Ethernet switch. Each individual node cost less than $1000, and the entire system cost under $500,000. The cluster achieved Linpack performance of 665.1 Gflop/s on 288 processors in October 2002, making it the 85th fastest computer in the world according to the 20th TOP500 list. Performance has since improved to 757.1 Linpack Gflop/s, ranking at #90 on the 21st TOP500 list. This is the first machine in the TOP100 to surpass Linpack price/performance of 1 dollar per Mflop/s.','');
INSERT INTO events_temp VALUES (3675,'2003-11-19','16:30:00','17:00:00','','Algorithms and Programming','HPC.NET - are CLI-based Virtual Machines Suitable for High Performance Computing?','Srinivas Aluru (Iowa State University)','Werner Vogels (Cornell University)','Paper','The Common Language Infrastructure is a new, standardized virtual machine that is likely to become popular on several platforms. In this paper we review whether this technology has any future in the high-performance computing community, for example by targeting the same application space as the Java-Grande Forum. We review the technology by benchmarking three implementations of the CLI and compare those with the results on Java virtual machines.','');
INSERT INTO events_temp VALUES (3676,'2003-11-20','10:30:00','11:00:00','','Scheduling and Communication','Improving the Scalability of Parallel Jobs by adding Parallel Awareness to the Operating System','Allan Snavely (San Diego Supercomputer Center)','Terry Jones (LLNL), William Tuel (IBM), Larry Brenner (IBM), Jeff Fier (IBM), Patrick Caffrey (IBM), Shawn Dawson (LLNL), Rob Neely (LLNL), Robert Blackmore (IBM), Brian Maskell (AWE), Paul Tomlinson (AWE), Mark Roberts (AWE)','Paper','A parallel application benefits from scheduling policies that include a global perspective of the application. As the interactions among cooperating processes increase, mechanisms to ameliorate waiting within one or more of the processes become more important. Collective operations such as barriers and reductions are extremely sensitive to even usually harmless events such as context switches. For the last 18 months, we have been researching the impact of random short-lived interruptions such as timer-decrement processing and periodic daemon activity, and developing strategies to minimize their impact on large processor-count SPMD bulk-synchronous programming styles. We present a novel co-scheduling scheme for improving performance of fine-grain collective activities such as barriers and reductions, describe an implementation consisting of operating system kernel modifications and run-time system, and present a set of results comparing the technique with traditional operating system scheduling. Our results indicate a speedup of over 300% on synchronizing collectives.','');
INSERT INTO events_temp VALUES (3677,'2003-11-20','10:30:00','11:00:00','','Advanced Architectures','Merrimac: Supercomputing with Streams','Jose Munoz (DOE/NNSA)','William J. Dally (Stanford University), Patrick Hanrahan (Stanford University), Mattan Erez (Stanford University), Timothy J. Knight (Stanford University), Francois Labonte (Stanford University), Jung-Ho Ahn (Stanford University), Nuwan Jayasena (Stanford University), Ujval J. Kapasi (Stanford University), Abhishek Das (Stanford University), Jayanth Gummaraju (Stanford University), Ian Buck (Stanford University)','Paper','Merrimac uses stream architecture and advanced interconnection networks to give an order of magnitude more performance per unit cost than cluster-based scientific computers built from the same technology. Organizing the computation into streams and exploiting the resulting locality using a register hierarchy enables a stream architecture to reduce the memory bandwidth required by representative applications by an order of magnitude or more. Hence a processing node with a fixed bandwidth (expensive) can support an order of magnitude more arithmetic units (inexpensive). This in turn allows a given level of performance to be achieved with fewer nodes (a 1-PFLOPs machine, for example, with just 8,192 nodes) resulting in greater reliability, and simpler system management. We sketch the design of Merrimac, a streaming scientific computer that can be scaled from a $20K 2 TFLOPS workstation to a $20M 2PFLOPS supercomputer and present the results of some initial application experiments on this architecture.','');
INSERT INTO events_temp VALUES (3678,'2003-11-20','10:30:00','11:00:00','','Data Management in Grids','A Metadata Catalog Service for Data Intensive Applications','Gregor von Laszewski (Argonne National Laboratory)','Gurmet Singh (Information Sciences Institute, University of Southern California), Ann Chervenak (Information Sciences Institute, University of Southern California), Ewa Deelman (Information Sciences Institute, University of Southern California), Carl Kesselman (Information Sciences Institute, University of Southern California), Mary Manohar (Information Sciences Institute, University of Southern California), Sonal Patil (Information Sciences Institute, University of Southern California), Laura Pearlman (Information Sciences Institute, University of Southern California)','Paper','Today’s advances in computational, storage and network technologies as well as middleware such as the Globus Toolkit, allow scientists to expand their scientific horizons and develop sophisticated data intensive application. These applications produce and analyze terabytes and petabytes of data that are distributed in millions of files or objects. In order to efficiently manage the large data sets, the metadata about the data needs to be managed. There are various types of metadata, and it is likely that a range of metadata services will exist in grid environments that are specialized for different types of metadata cataloguing and discovery. In this paper, we present a design of a Metadata Catalog Service (MCS) that provides a mechanism for storing and accessing descriptive metadata and allows users to query for data items based on desired attributes. We describe our experience in using the MCS with several applications and present a scalability study of the service.','');
INSERT INTO events_temp VALUES (3679,'2003-11-20','10:30:00','11:30:00','','Showcase III','Computational Steering in a Collaborative Environment','John Brooke (University of Manchester)','John Brooke (University of Manchester), Thomas Eickermann (Research Centre Juelich), Uwe Woessner (HLRS)','SC Global, Showcase','In this showcase we will present live running simulations which are integrated into the Access Grid in a variety of different ways. An example of this is the use of vnc to distribute a desktop on which the simulation is being displayed. Another example is the redirection of the visualization into vic to make 3D animations available over the Access Grid. Other examples that will be explored are the use of OpenGL Vizserver to direct the output of a graphics supercomputer located on the Grid to the AG locations. We will also utilize the ability of the next generation AG software to directly link with visualization toolkits such as vtk, AVS/Express, or COVISE as an integrated part of the Virtual Venue if this functionality is available at the time of SC2003.','');
INSERT INTO events_temp VALUES (3680,'2003-11-20','11:00:00','11:30:00','','Advanced Architectures','Protein Explorer: A Petaflops Special-Purpose Computer System for Molecular Dynamics Simulations','Jose Munoz (DOE/NNSA)','Makoto Taiji (RIKEN), Tetsu Narumi (RIKEN), Yousuke Ohno (RIKEN), Noriyuki Futatsugi (RIKEN), Atsushi Suenaga (RIKEN), Naoki Takada (RIKEN), Akihiko Konagaya (RIKEN)','Paper','We are developing the `Protein Explorer\' system, a petaflops special-purpose computer system for molecular dynamics simulations. The Protein Explorer is a PC cluster equipped with special-purpose engines that calculate nonbonded interactions between atoms, which is the most time-consuming part of the simulations. A dedicated LSI `MDGRAPE-3 chip\' performs these force calculations at a speed of 165 gigaflops or higher. The system will have 6,144 MDGRAPE-3 chips to achieve a nominal peak performance of one petaflop. The system will be completed in 2006. In this paper, we describe the project plans and the architecture of the Protein Explorer.','');
INSERT INTO events_temp VALUES (3681,'2003-11-20','11:00:00','11:30:00','','Data Management in Grids','Grid-Based Galaxy Morphology Analysis for the National Virtual Observatory','Gregor von Laszewski (Argonne National Laboratory)','Ewa Deelman (ISI), Raymond Plante (NCSA), Carl Kesselman (USC/ISI), Gurmeet Singh (USC/ISI), Mei Su (USC/ISI), Gretchen Greene (Space Telescope Science Institute), Robert Hanisch (Space Telescope Science Institute), Niall Gaffney (Space Telescope Science Institute), Antonio Volpicelli (Space Telescope Science Institute), James Annis (Fermi Lab), Vijay Sekhri (Fermi Lab), Tamas Budavari (John Hopkins University), Maria Nieto-Santisteban (John Hopkins University), William O\'Mullane (John Hopkins University), David Bohlender (Canadian Astrophysical Data Center), Tom McGlynn (NASA), Arnold Rots (Simthsonian Astrophysical Observatory), Olga Pevunova (NASA)','Paper','As part of the development of the National Virtual Observatory (NVO), a data grid for astronomy, we have developed a prototype science application to explore the dynamical history of galaxy clusters by analyzing the galaxies’ morphologies. The purpose of the prototype is to explore how grid-based technologies can be used to provide specialized computational services within the NVO environment. Although we describe the scientific goals of the application, this paper focuses on the key technology components, particularly Chimera and Pegasus which are used to create and manage the computational workflow. We illustrate how the components were connected and driven from the application’s portal.','');
INSERT INTO events_temp VALUES (3682,'2003-11-20','11:00:00','11:30:00','','Scheduling and Communication','BCS MPI: a New Approach in the System Software Design for Large-Scale Parallel Computers','Allan Snavely (San Diego Supercomputer Center)','Juan Fernandez (LANL), Fabrizio Petrini (LANL), Eitan Frachtenberg (LANL)','Paper','Buffered CoScheduled (BCS) MPI proposes a new approach to design the communication libraries for large-scale parallel machines. The emphasis of BCS MPI is on the global coordination of a large number of processes rather than in the traditional optimization of the local performance of a pair of communicating processes. BCS MPI delays the interprocessor communication in order to schedule globally the communication pattern and it is designed on top of a minimal set of collective communication primitives. In this paper we describe a prototype implementation of BCS MPI and its communication protocols. The experimental results, executed on a set of scientific applications representative of the ASCI workload, show that BCS MPI is only marginally slower than the production-level MPI, but much simpler to implement, debug and analyze.','');
INSERT INTO events_temp VALUES (3683,'2003-11-20','11:30:00','12:00:00','','Advanced Architectures','Early Experience with Scientific Programs on the Cray MTA-2','Jose Munoz (DOE/NNSA)','Wendell Anderson (Naval Research Laboratory), Preston Briggs (Cray, Inc.), C. Stephen Hellberg (Naval Research Laboratory), Daryl W. Hess (Naval Research Laboratory), Alexei Khokhlov (University of Chicago), Marco Lanzagorta (Scientific and Engineering Solutions), Robert Rosenberg (Naval Research Laboratory)','Paper','We describe our experiences porting and tuning three scientific programs to the Cray MTA-2, paying particular attention to the problems posed by I/O. We have measured the performance of each program over many different machine configurations and we report on the scalability of each program. In addition, we compare the performance of the MTA with that of an SGI Origin running all three programs.','');
INSERT INTO events_temp VALUES (3684,'2003-11-20','11:30:00','12:00:00','','Scheduling and Communication','Scalable NIC-based reduction on Large-scale Clusters','Allan Snavely (San Diego Supercomputer Center)','Adam Moody (Ohio State University), Juan Fernandez (LANL), Fabrizio Petrini (LANL), Dhabaleswar K. Panda (Ohio State University)','Paper','Over the last decades, researchers have developed many efficient reduction algorithms. However, all these algorithms assume that the reduction processing takes place on the host CPU. Modern Network Interface Cards (NICs) sport programmable processors and thus introduce a fresh variable into the equation. This raises the following interesting challenge: Can we take advantage of modern NICs to implement fast reduction operations? In this paper, we take on this challenge in the context of large-scale clusters. Through experiments on a 960-node, 1920-processor cluster we show that NIC-based reductions indeed perform with reduced latency and improved consistency and scalability over host-based algorithms for the common case. In the largest configuration tested ---1812 processors--- our NIC-based algorithm can sum a single element vector in 73 microseconds with 32-bit integers and in 118 microseconds with 64-bit floating-point numbers, an improvement, respectively, of 121% and 39% with respect to the production level MPI library.','');
INSERT INTO events_temp VALUES (3685,'2003-11-20','11:30:00','12:00:00','','Data Management in Grids','The Livny and Plank-Beck Problems: Studies in Data Movement on the Computational Grid','Gregor von Laszewski (Argonne National Laboratory)','Matthew S. Allen (University of California, Santa Barbara), Rich Wolski (University of California, Santa Barbara)','Paper','Over the last few years the Grid Computing research community has become interested in developing data intensive applications for the Grid. These applications face significant challenges because their widely distributed nature makes it difficult to access data with reasonable speed. In order to address this problem, we feel that the Grid community needs to develop and explore data movement challenges that represent problems encountered in these applications. In this paper, we will identify two such problems that we have dubbed the Livny Problem and the Plank-Beck Problem. We will also present data movement scheduling techniques that we have developed to address these problems.','');
INSERT INTO events_temp VALUES (3686,'2003-11-20','11:30:00','12:00:00','','Showcase III','Splat Theremin','Joe Reitzer (TRECC-NCSA-UIUC)','Joe Reitzer (TRECC-NCSA-UIUC)','SC Global, Showcase','Splat Theremin will be a collaborative art piece developed by Joe Reitzer TRECC/NCSA, Marcus Thiebaux ISI/USC, Tom Coffin ACCESS DC/NCSA, Alexander Horn. Splat Theremin will utilize the AG 2.0 software, Linux Graphics Cluster with Tile Display, Geowall and Immersadek. TRECC, ACCESS, and EVL will be the main sites involved. \n\nSplat Theremin will provide an interactive means to manipulate volumetric data. The data will be represented visually through the user of Splatting rendering techniques utlizing cluster visualization on TRECC\'s Linux graphics cluster with tile display. The participants at the Phoenix Civic Plaza will be able to manipulate data of the volume interactively by means of a custom built short range tracking system. Behavioral software that reacts to the input of the tracking system will control parameters of how the data is manipulated and rendered. The Access Grid 2.0 software will be used for additional manipulation of the volumetric data. The participants at other Access Grid locations can interact with the volume rendering appearing on the tile display through the use of extracting luminosity of the AG video streams. Also the number of AG nodes connected to the SC Global Showcase site will possibly influence other parameters of how the data is manipulated. All data will be selectively available for display on the Geowall at TRECC, ACCESS, and any other site that participates. The Immersadesk at ACCESS DC would also be available for possible remote interaction from participants. Remote sites would be able to choose a specific tile of the tiled display. The choice of tile(s) display will be a distributed stream like that of DPPT.\n\nThere are numerous contributions of combining these diverse technologies into a homogenous system for the immediate and remote creation of artwork. The experience would provide collaborative insight through the use of this multi-system integration in the generation of visually interesting animations. The live and digital interaction would be similar to how a Theremin creates sound and music except with multiple people and machines interacting in a live jam session. Each users experience would be different every time.','');
INSERT INTO events_temp VALUES (3687,'2003-11-20','15:30:00','16:00:00','','Performance Measurement and Analysis','Efficient, Unified, and Scalable Performance Monitoring for Multiprocessor Operating Systems','Jeffrey Vetter (LLNL)','Robert W. Wisniewski (IBM T.J. Watson Research), Bryan Rosenburg (IBM T.J. Watson Research)','Paper','Programming, understanding, and tuning the performance of large multiprocessor operating systems is challenging. Crucial to achieving good performance is understanding the system\'s behavior. \n\nWe have developed an efficient, unified, and scalable tracing infrastructure that allows for correctness debugging, performance debugging, and performance monitoring of an operating system. The infrastructure allows variable-length events to be logged without locking and provides random access to the event stream. The infrastructure allows cheap and parallel logging of events by applications, libraries, servers, and the kernel. The infrastructure was designed for K42, a new open-source research kernel designed to scale near perfectly on large cache-coherent 64-bit multiprocessor systems. The techniques are generally applicable, and have been integrated into LTT (Linux Trace Toolkit). We describe the implementation of the infrastructure, how we used the facility, e.g., analyzing lock contention, to understand and achieve K42\'s scalable performance, and the lessons we learned. The infrastructure has been invaluable.','');
INSERT INTO events_temp VALUES (3688,'2003-11-20','15:30:00','16:00:00','','High Performance Input/Output','Fast Parallel Non-Contiguous File Access','Josep Torrellas (University of Illinois)','Joachim Worringen (NEC C&C Research Lab), Jesper Larson Traff (NEC C&C Research Lab), Hubert Ritzdorf (NEC C&C Research Lab)','Paper','Many applications of parallel I/O perform non-contiguous file accesses, but only few file system interfaces support non-contiguous access. In contrast, the most commonly used parallel programming interface, MPI, supports parallel I/O through its MPI-IO interface. Within this interface, non-contiguous accesses are supported by the use of derived MPI datatypes. Unfortunately, current MPI-IO implementations suffer from low performance of such non-contiguous accesses when compared to the performance of the storage system for contiguous accesses although a considerable amount of work has been done in this area. In this paper we analyze an important bottleneck in current implementations of MPI-IO, and present a new technique termed listless i/o to perform non-contiguous access with MPI-IO. On the NEC SX-series of parallel vector computers, listless i/o is able to increase the bandwidth for non-contiguous file access by sometimes more than a factor of 500 when compared to the traditional approach.','');
INSERT INTO events_temp VALUES (3689,'2003-11-20','15:30:00','17:00:00','','Strategies for Application-Empowered Networks','','Maxine D. Brown (University of Illinois at Chicago, USA)','Thomas A. DeFanti (University of Illinois at Chicago, USA), Larry Landweber (National Science Foundation, USA), Kees Neggers (SURFnet, The Netherlands), Harvey B. Newman (Caltech, USA), Bill St. Arnaud (CANARIE, Canada)','Panel','E-Science faces unprecedented challenges in the coming decade, in terms of: (1) the data-intensiveness of the work (as the data being processed, distributed and analyzed moves from terabytes to petabytes to exabytes), (2) the complexity of the data (extracting detail from overwhelming datasets generated by instruments), (3) the timeliness of data transfers (whether bulk transfers for remote storage, smaller transfers for distributing computing and analysis, or real-time transfers for collaboration), and (4) the global extent and multi-level peer group structure of the collaborations, leading to the need for international teams to collaborate and share data-intensive work in fundamentally new ways. This panel discusses the key roles and issues facing new networking infrastructures taking shape worldwide to tackle the data tsunami coming this decade.','');
INSERT INTO events_temp VALUES (3690,'2003-11-20','15:30:00','17:00:00','','SuperNetworking Transforming Supercomputing','','Steven J. Wallach (Chiaro Networks)','Daniel J. Blumenthal (University of California, Santa Barbara), Andrew A. Chien (University of California, San Diego), Jason Leigh (University of Illinois at Chicago), Larry Smarr (University of California, San Diego), Rick L. Stevens (Argonne National Laboratory/University of Chicago)','Panel','For the last decade, Moore\'s Law has dominated supercomputing architecture, since it was on a steeper exponential than either bandwidth or storage. Furthermore, during the 1990s, the commoditization of processors allowed for super-exponential growth in computing power through the parallelization of processors -- that is, we were able to multiply Moore\'s Law for the growth of individual processor speeds by the 500-fold increase in the number of processors in a single parallel computer (from 4 processors in the early 90s to 2000 processors today). \n\nA fundamental architectural shift has occurred in this decade, in that storage and particularly networking bandwidth are growing much faster than Moore’s Law. The super-exponential in bandwidth is caused by parallelization in the number of Lambdas, independent light paths down a single fiber optic, multiplied by the increase of the bandwidth of the individual light paths (DWDM). The TeraGrid was the first example of a national-scale supercomputer with dedicated optical paths -- 4x10Gbps. The Panel reviews the basic engineering trends in processors, storage and optics, and then examines a number of federally funded projects which are exploring the vision laid out by Steve Wallach at Supercomputing 2000 in which a petaflop computer by 2010 will be an optical switch with compute and storage peripherals. In addition, the Panel examines how dedicated multi-Lambda optical circuits could radically change the architecture of distributed cyberinfrastructure and the ability for application end-users to use that infrastructure to carry out 21st-century scientific research.','');
INSERT INTO events_temp VALUES (3691,'2003-11-20','15:30:00','15:45:00','','Showcase IV','Contextual Backgrounds: AG on the Beach','Darran Edmundson (ANU Supercomputer Facility Vizlab)','Darran Edmundson (ANU Supercomputer Facility Vizlab)','SC Global, Showcase','Green and blue screen compositing is widely used in television and film to merge foreground actors with background scenes. A classic example is the television weather report in which the otherwise information-less pixels surrounding the reporter\'s \"talking head\" are replaced by real data in the form of weather maps and images. The Access Grid, with wall display real-estate being a precious commodity, is particularly well suited to this technique. Users can be combined with their data in meaningful ways to aid understanding for remote viewers. Additionally, with multiple cameras in play, replacing each camera\'s background with a perspective-correct view of the virtual background helps to unify the otherwise disjoint streams coming from a node. \n\nFair enough, so why not paint one\'s Access Grid green and be done with it? Because while the end result is compelling, green screen studios are not at all pleasant environments. They require large sets to minimize color spill from the screen onto the actors plus highly-controlled lighting to ensure even screen color for automated keying. In contrast, the Access Grid - where the distinction between participant and viewer is highly blurred - needs to be a comfortable space conducive to multi-way communication. (Regardless, our small 6m x 4.5m retrofitted room precluded any thought of traditional green screening). \n\nBy covering the walls of our AG Node with a 3M retroreflective material and placing rings of illuminating green LEDs around our camera lenses, viewers at other nodes perceive brilliant green backgrounds in all outgoing video streams. (Users in the node itself perceive the walls to have a shimmering blue color.) However, prior to transmitting, we use software and hardware to remove the chroma green and, as we know the current position and orientation of the camera lense, composite camera-correct background images into the outgoing video stream. The result, at least for viewers at other nodes, is that our node participants appear embedded in a virtual world of our choosing. Allowing remote operation of our pan/tilt cameras helps to complete the illusion. In this showcase we demonstrate the technology with both scientific user background data and the more contrived example alluded to in the showcase title - namely, our node attendees in landlocked Canberra placed on a beautiful Australian beach.','');
INSERT INTO events_temp VALUES (3692,'2003-11-20','15:45:00','16:30:00','','Showcase IV','Transpacific Synergistic Entertainment','Kazuyuki Shudo (National Institute of Advanced Industrial Science and Technology (AIST), Japan)','Kazuyuki Shudo (National Institute of Advanced Industrial Science and Technology (AIST), Japan)','SC Global, Showcase','The mode of entertainment has not taken place with the Access Grid technology. We explore an informal entertainment use of the technology and future requirements for the use will become clear.\n\nIn this event, participants from countries around the Pacific Ocean share exciting experience and synergistic feeling in a particular mode of entertainment. Entertainment companies, AIST and Waseda University have started testing our prototype and we are now building up our experience toward SC Global and our business.\n\nChallenges: Those countries have very different cultures from each other and their attitudes toward the entertainment are also divergent. It is a social and cultural challenge to share the mode of entertainment. Of course, other participants are welcome. Bandwidth of transpacific Internet lines are relatively narrow compared with U.S. domestic lines. And further, lines between Asia-Pacific countries are not good as domestic lines. SC Global can be one of precious opportunities to evaluate the current state of Internet of this region. The real obstacle to entertainment uses will be latency due to a long distance between Asia and U.S.','');
INSERT INTO events_temp VALUES (3693,'2003-11-20','16:00:00','16:30:00','','High Performance Input/Output','Parallel netCDF: A High-Performance Scientific I/O Interfac','Josep Torrellas (University of Illinois)','Jianwei Li (ECE Department, Northwestern University), Wei-keng Liao (ECE Department, Northwestern University), Alok Choudhary (ECE Department, Northwestern University), Robert Ross (MCS Division, Argonne National Laboratory), Rajeev Thakur (MCS Division, Argonne National Laboratory), William Gropp (MCS Division, Argonne National Laboratory), Rob Latham (MCS Division, Argonne National Laboratory), Andrew Siegel (MCS Division, Argonne National Laboratory), Brad Gallagher (ASCI Flash Center, University of Chicago), Michael Zingale (UCO/Lick Observatory, University of California, Santa Cruz)','Paper','Dataset storage, exchange, and access play a critical role in scientific applications. For such purposes netCDF serves as a portable, efficient file format and programming interface, which is popular in numerous scientific application domains. However, the original interface does not provide an efficient mechanism for parallel data storage and access. \n\nIn this work, we present a new parallel interface for writing and reading netCDF datasets. This interface is derived with minimal changes from the serial netCDF interface but defines semantics for parallel access and is tailored for high performance. The underlying parallel I/O is achieved through MPI-IO, allowing for substantial performance gains through the use of collective I/O optimizations. We compare the implementation strategies and performance with HDF5. Our tests indicate programming convenience and significant I/O performance improvement with this parallel netCDF (PnetCDF) interface.','');
INSERT INTO events_temp VALUES (3694,'2003-11-20','16:00:00','16:30:00','','Performance Measurement and Analysis','Memory Profiling using Hardware Counters','Jeffrey Vetter (LLNL)','Marty Itzkowitz (Sun Microsystems), Brian J.N. Wylie (Sun Microsystems), Christopher Aoki (Sun Microsystems), Nicolai Kosche (Sun Microsystems)','Paper','Although memory performance is often a limiting factor in application performance, most tools only show performance data relating to the instructions in the program, not to its data. In this paper, we describe a technique for directly measuring the memory profile of an application. We describe the tools and their user model, and then discuss a particular code, the MCF benchmark from SPEC CPU 2000. We show performance data for the data structures and elements, and discuss the use of the data to improve program performance. Finally, we discuss extensions to the work to provide feedback to the compiler for prefetching and to generate additional reports from the data.','');
INSERT INTO events_temp VALUES (3695,'2003-11-20','16:30:00','17:00:00','','High Performance Input/Output','Grid -Based Parallel Data Streaming implemented for the Gyrokinetic Toroidal Code','Josep Torrellas (University of Illinois)','Scott Alan Klasky (PPPL), Stephane Ethier (PPPL), Zhihong Lin (UC Irvine), Kevin Martins (PPPL), Doug McCune (PPPL), Ravi Samtaney (PPPL)','Paper','We have developed a threaded parallel data streaming approach using Globus to transfer multi-terabyte simulation data from a remote supercomputer to the scientist’s home analysis/visualization cluster, as the simulation executes, with negligible overhead. Data transfer experiments show that this concurrent data transfer approach is more favorable compared with writing to local disk and then transferring this data to be post-processed. The present approach is conducive to using the grid to pipeline the simulation with post-processing and visualization. We have applied this method to the Gyrokinetic Toroidal Code (GTC), a 3-dimensional particle-in-cell code used to study micro-turbulence in magnetic confinement fusion from first principles plasma theory.','');
INSERT INTO events_temp VALUES (3696,'2003-11-20','16:30:00','17:00:00','','Performance Measurement and Analysis','Identifying and Exploiting Spatial Regularity in Data Memory References','Jeffrey Vetter (LLNL)','Tushar Mohan (Lawrence Berkeley National Lab), Bronis R. de Supinski (LLNL), Sally A. McKee (CSL Cornell), Frank Mueller (NCSU), Andy Yoo (LLNL), Martin Schulz (CSL, Conell)','Paper','The growing processor/memory performance gap causes the performance of many codes to be limited by memory accesses. Strided memory accesses forming streams can be targeted by optimizations such as prefetching, relocation, remapping, and vector loads. Undetected, they can be a significant source of memory stalls in loops. The concept of locality fails to capture the existence of streams in a program\'s memory accesses. \n\nFirst, we define spatial regularity as a means to discuss the presence and effects of streams. Second, we develop measures to quantify spatial regularity, and we design and implement an on-line, parallel algorithm to detect streams in running applications. Third, we use examples from real codes and common benchmarks to illustrate how derived stream statistics can be used to guide the application of profile-driven optimizations. Overall, we demonstrate the benefits of our novel regularity metric as an instrument to detect potential for optimizations affecting memory performance.','');
INSERT INTO events_temp VALUES (3697,'2003-11-20','16:30:00','17:00:00','','Showcase IV','Closing Comments, SC Global Chair','Jennifer Teig von Hoffman (Boston University)','Jennifer Teig von Hoffman (Boston University)','SC Global, Showcase','A summary of this year\'s SC Global conference, including information about participating sites, and the technical and production infrastructure that supported them.','');
INSERT INTO events_temp VALUES (3698,'2003-11-21','10:30:00','12:00:00','','Open Source Software Policy Issues for High Performance Computing','','Rod Oldehoeft (Los Alamos National Laboratory)','Paul Gottlieb (DOE), Terry Bollinger (MITRE), Tony Stanco (The Center of Open Source and Government), Tim Witham (Open Source Development Lab)','Panel','Panelists have been selected based on their recent activities in OSS. They will describe their activities and any resulting conclusions or recommendations. Observations about \"best practices\" or \"lessons learned\" will be elicited from them.','');
INSERT INTO events_temp VALUES (3699,'2003-11-21','10:30:00','12:00:00','','The Simplification of Supercomputing: Clustering, Appliances and Grid Computing','','Bill Blake (SVP of Product Development, Netezza Corp.)','Marshall Peterson (CTO of the J. Craig Venter Science Foundation), Steve Oberlin (Founder and CEO of Unlimted Scale), Dr. Andrew Grimshaw (Founder and CTO of Avaki), Mark Seager (Assistant Department Head for Terascale Systems, Lawrence Livermore National Laboratory), Russ Miller (Director for the Center of Computational Research at SUNY-Buffalo)','Panel','In the beginning, supercomputing involved utilizing gigantic machines stored in huge rooms at only the top locations. Today, however, the supercomputing world is experiencing a change-powerful yet lower cost forms of supercomputing are becoming commonplace thanks to the emergence of practices such as the use of Linux clustering, grid computing and the onset of appliances. Future computing sites, especially in the life sciences, will combine these components into massive data factories that pipeline automated instrumentation, supercomputers and analytic databases and then integrate the results with geographically separated compute and data grids.','');
INSERT INTO events_temp VALUES (3700,'2003-11-21','10:30:00','12:00:00','','Goldilocks and the Three Bears Revisited: HPC Architecture Directions','','David Morton (MHPCC)','Steve Scott (Cray Inc.), Rob Pennington (NCSA), Bill Pulleyblank (IBM)','Panel','This panel follows in the steps of an HPC architectural discussion called Goldilocks and the Three Bears chaired by Bob Borchers at SC95. While the names and some of the topics have changed, HPC architectural directions continues to be a hot area of discussion. During this session, three experts from widely divergent schools of HPC thought will discuss where the industry is headed and why their particular architecture will be an important part of that future. The questions that will be posed to this panel include: \n\n1) Why do you think that the architecture you are most closely associated with is important and relevant? \n\n2) Where do you see it evolving over time? \n\n3) Do you see any other current HPC architectures becoming less important over time? \n\n4) Do you think that customized HPC hardware can compete on a price/delivered performance basis long term with COTS microprocessors and systems?','');
INSERT INTO events_temp VALUES (3701,'2003-11-21','08:30:00','10:00:00','','The High End Computing Revitalization Task Force','','David B. Nelson (National Coordination Office for Information Technology Research and Development (NCO/ITR&D) )','John Grosh (General Engineer, Office of the Under Secretary of Defense for Science and Technology), Alan Laub (SciDAC Director, DOE Office of Science ), Daniel A. Reed (NCSA and the University of Illinois at Urbana-Champaign )','Panel','The President’s FY 2004 budget included a directive to develop a plan to guide future Federal investments in high end computing. Established in March 2003, the High End Computing Revitalization Task Force (HECRTF), coordinated through the National Science and Technology Council, has been preparing this plan, which is scheduled to be delivered in the fall of 2003. The plan will set forth a roadmap for fiscal year (FY) 2005 through FY 2009 Federal investments in three areas:\n\n* Research and development in core HEC technologies, including identification of key technologies for new generations of HEC systems, alternative coordinated multiagency plans, and approaches to enable both revolutionary and evolutionary advances and diffusion into industry\n\n* Provision of HEC resources to Federal and Federally-funded researchers, including alternative plans to help reduce capability and capacity gaps, design specifications and performance targets linked to application domain requirements and user needs, minimizing time to solution, and access by potential Federal and non-Federal user communities beyond those using HEC systems funded or hosted by Federal agencies\n\n* Recommendations about Federal HEC procurement practices, such as practical performance measures, ways to derive system performance targets, ways to measure total cost of ownership, and ways to improve HEC acquisitions processes\n\nIn April 2003 the HECRTF solicited white papers that provide technical facts and information about these three areas. In response to the solicitation, which was extended to a wide spectrum of stakeholders, almost 80 papers were received.\n\nOn June 16 through 18, 2003, the Computer Research Association (CRA) sponsored a Workshop on the Road Map for the Revitalization of High End Computing that brought together 200 academic, industry, and government researchers and managers to address HEC revitalization issues and publish a report, which is scheduled to be publicly released in the fall of 2003. The workshop had working groups that addressed HEC topics including (1) enabling technologies, (2) COTS-based architectures, (3) custom-based architectures, (4) run times and operating systems, (5) programming environments and tools, (6) performance modeling, metrics, and specifications, (7) application-driven system requirements, and (8) procurement, accessibility, and cost of ownership.\n\nThe panelists will draw on these activities to address the following questions:\n\nWhat are the purposes, plans, schedule, and results to date of the Federal government’s High End Computing Revitalization Task Force (HECRTF)?\n\nWhat input to the HECRTF was provided by academia and industry in white papers solicited by the HECRTF in the spring of 2003 and at the June 2003 Workshop on the Road Map for the Revitalization of High End Computing?\n\nWhat is the current status of the Federal government’s plans to revitalize high end computing in the U.S.?\n\nWhat are the views of academia and industry about HECRTF efforts?','');
INSERT INTO events_temp VALUES (3702,'2003-11-21','08:30:00','10:00:00','','HPC Productivity','','Jeremy Kepner (MIT Lincoln Laboratory)','Ken Kennedy (Rice University), David Kuck (Intel), Mark Snir (University of Illinois), Thomas Sterling (CalTech/NASA JPL), Bob Numrich (University of Minnesota), John Gustafson (Sun)','Panel','The value of a HPC system to a user includes many factors, such as: execution time on a particular problem, software development time, direct and indirect costs. The DARPA High Productivity Computing Systems is focused on providing a new generation of economically viable high productivity computing systems for the national security and industrial user community in the 2007-2010 timeframe. The goal is to provide systems that double in productivity (or value) every 18 months.\n\nThis program has initiated a fundamental reassessment of how we define and measure performance, programmability, portability, robustness and ultimately productivity in the HPC domain. The panelists will present their (new) views on two fundamental questions:\n\n Q1: How should we define and measure productivity in HPC?\n Q2: What are the implications for HPC designers and users?','');
INSERT INTO events_temp VALUES (3703,'2003-11-21','08:30:00','10:00:00','','Battle of the Network Stars!','','Wu Feng (Los Alamos National Laboratory)','Fabrizio Petrini (Los Alamos National Laboratory), Dhabaleswar K. Panda (The Ohio State University), Jeffrey S. Chase (Duke University), Bradley Booth (Intel Corporation), Allyn Romanow (Cisco Systems), Anthony Skjellum (MPI Software Technology)','Panel','In the tradition of the ABC TV show that pitted Hollywood stars from different networks against each other, this panel reprises the show where the network stars in this case are InfiniBand, Myrinet, Quadrics, SCI, and 10-Gigabit Ethernet. \n\nThe panel will address two major sets of questions: [1] Which interconnect is the best for high-performance computing and why? [2] What are the future trends in high-performance networking, and what are the implications of these trends?\n\nQuestions for the \"head-to-head\" battle of network interconnects: \n[1] Each panelist was invited onto the panel due to their expertise with specific network interconnects. Why is \"your\" solution the better one? \n[2] Given that we, as a community, focus on the typical quantitative measures of latency and throughput for network interconnects, what other ways should we be evaluating interconnects? \n[3] Will the \"status quo\" in networking continue? That is, Ethernet as a commodity interconnect that often doubles as a cheap commodity solution for clusters with InfiniBand, Quadrics, and Myrinet \"relegated\" to high-end and more costly clusters. \n[4] With 10-Gigabit Ethernet processors on the horizon, will RDMA/TCP/10GigE be sufficient in matching the performance of Quadrics, InfiniBand, and Myrinet? And if so, what does this foretell of the future of these latter interconnects?\n[5] What assumptions must interconnects make about the underlying architecture (or what assumptions would they like to make)? PCI-X? PCI Express? Intel\'s \"Communications Streaming Architecture\" or network co-processor? InfiniBand? \n\nQuestions for \"Future Trends and Implications\": \n[1] Moore\'s Law forecasts the doubling of processor speeds every 18 months. Arguably network speeds have been doubling every 12 months on average. Will there come a time where we focus more on \"supernetworking\"? \n[2] \"Sockets or Bust?\": Does the interface to the network have to be a sockets interface? Or will application programmers be willing to rely on \"inefficient\" MPI software to hide such details? \n[3] In five years, how will today's interconnects evolve and/or compete in high-performance computing? \n[4] InfiniBand started out as a high-performance I/O technology but has evolved into a general network interconnect for high-performance clusters. Will it replace Myrinet or Quadrics as the costlier high-performance interconnect for high-end clusters? \n[5] What implications, if any, are there for direct-access file systems (DAFS)?','');
--
-- Table structure for table 'subevents'
--
CREATE TABLE subevents (
subeventID int(11) NOT NULL auto_increment,
date date default '0000-00-00',
startTime time default '00:00:00',
endTime time default '00:00:00',
room text,
subSessionTitle text,
speaker text,
abstract text,
paperFilename text,
eventID int(11) default '0',
PRIMARY KEY (subeventID)
) TYPE=MyISAM;
--
-- Dumping data for table 'subevents'
--
INSERT INTO subevents VALUES (1071,'0000-00-00','00:00:00','00:00:00','Room','Sub-session (Optional)','Speaker/Presenter/Author','Abstract','Paper pdf',10655);
INSERT INTO subevents VALUES (1072,'2003-11-16','08:30:00','12:00:00','','S6: The Grid: Software Standards for Cyberinfrastructure','Carl Kesselman (USC Information Sciences Institute)','Content-Level: 75% Introductory 25% Intermediate 0% Advanced\n\nAbstract: Market pressures in both business and science are enabling the emergence of a common framework for distributed computing. The favored candidate at this time for this common framework is \"the Grid.\" The Grid community, organized around the Global Grid Forum (GGF), has made considerable progress toward establishing a sense of what the Grid is and how it should be developed. This tutorial will introduce attendees to the concepts and current status of the Grid computing movement in science and business. Attendees will leave with a roadmap of the concepts, available products and technologies, standards activities, and R&D associated with the Grid.','S6_Out.pdf',10656);
INSERT INTO subevents VALUES (1073,'2003-11-16','08:30:00','12:00:00','','S5: An Introduction to the TotalView Debugger','Blaise M. Barney (Lawrence Livermore National Laboratory)','Content-Level: 50% Introductory 25% Intermediate 25% Advanced\n\nAbstract: The TotalView debugger has become the \"de facto standard\" within the High Performance Computing industry for debugging cross-platform, multi-model parallel applications. TotalView\'s easy-to-use GUI provides the means to explore what an application is \"really\" doing at the deepest level. TotalView has been selected by the U.S. Department of Energy as the debugger of choice for its Advanced Simulation and Computing (ASCI) program. TotalView has likewise been selected by a growing number of telco, petroleum, aerospace, university and HPC organizations as their debugger of choice. \n\nThis tutorial begins by covering all of the essentials for using TotalView in a general programming environment. After covering these essentials, an emphasis is placed upon debugging parallel programs, including threaded, MPI, OpenMP and hybrid programs. In addition to the many screen captures of debug sessions presented in the tutorial, hands-on exercises are included to maximize benefit to the attendee. There will be a limited number of workstations provided for the hands-on portion of the tutorial, assigned on a first-come, first-served basis. You may participate in the hands-on portion of the tutorial using your own laptop, but be aware that there will be no support available to debug problems with attendee laptops. Attendee laptops must be configured for wireless access and must have an X11 environment and an ssh client installed.','S5_Out.pdf',10657);
INSERT INTO subevents VALUES (1074,'2003-11-16','08:30:00','12:00:00','','S8: Introduction to Scientific Computing with Python','Eric A Jones (Enthought, inc), Travis Oliphant (Brigham Young University), Pat Miller (Lawrence Livermore National Laboratory)','Content-Level: 50% Introductory 50% Intermediate 0% Advanced\n\nAbstract: Python has emerged as an excellent choice for scientific computing because of its simple syntax, ease of use, and elegant multi-dimensional array arithmetic. Its interpreted evaluation allows it to serve as both the development langauge and the command line environment in which to explore data. Python also excels as a \"glue\" language that joins multiple legacy codes together that were written in differnt languages -- a common need in the scientific arena. \n\nThis tutorial introduces the Python programming language to scientists. The pace is fast and geared toward individuals already comfortable with a programming language such as Matlab, C, or Fortran. Attendees will learn the basic constructs of the language and how to do basic numerical analysis with Python. The 3rd section covers the SciPy library (www.scipy.org) that provides modules for linear algebra, signal processing, optimization, statistics, genetic algorithms, interpolation, ODE solvers, special functions, etc. The final section focuses on the Chaco graphics infrastructure for creating scientific plots. There is a companion tutorial in the afternoon that covers more advanced topics such as wrapping legacy Fortran and C/C++ codes as well as parallel programming with Python. A Windows version of Python (Enthought Edition) will be available on CD for attendees to install and use during the tutorial. The installation includes Python, Numeric, SciPy, wxPython, and VTK as well as other packages useful for scientific computing.','S8_Out.pdf',10658);
INSERT INTO subevents VALUES (1075,'2003-11-16','08:30:00','12:00:00','','S7: High-Performance Storage','David Allan Pease (IBM Research), Darrell D.E. Long (U.C. Santa Cruz)','Content-Level: 25% Introductory 60% Intermediate 15% Advanced\n\nAbstract: The demand for storage in computing systems is growing at an unprecedented rate; this growth is fueled primarily by richer data content, the plummeting cost of raw storage, and the ability of almost everyone in our society to create and save data of every kind. This growth, in turn, is driving the need for higher-performance, easier-to-manage storage. Although storage device performance is constantly increasing, these increases are not keeping pace with the demand for storage performance. Traditional approaches to improving storage performance include caching, prefetching, and data striping and parallel I/O. New technologies include storage subsystems with advanced features like storage virtualization and remote replication, object-based storage, storage area networks (SANs), and SAN-based file systems. Current research is investigating features such as active disks and self-organizing, self-managing, self-optimizing storage systems. This tutorial introduces the components of modern storage systems and the factors and techniques involved in designing high-performance storage environments, and looks at current and future developments in the storage industry.','S7_Out.pdf',10659);
INSERT INTO subevents VALUES (1076,'2003-11-16','08:30:00','12:00:00','','S10: An Introduction to Quantum Information','Carl J. Williams (National Institute of Standards and Technology)','Content-Level: 60% Introductory 40% Intermediate 0% Advanced\n\nAbstract: Quantum information is revolutionary paradigm that puts information on a fundamental physical level and is creating a cross disciplinary interaction between physicists, mathematicians, and computer scientists. In the popular press, quantum computing is often presented as providing a massive parallel, new computational paradigm for solving computationally intractable problems like factoring. The first half of the tutorial will introduce the audience to the elementary concepts of quantum information and describe how these general concepts and ideas give rise to quantum communication and quantum computation. The second half of the tutorial will then begin with an overview of the state-of-the-art in quantum key distribution and the technological breakthroughs and bottlenecks in this rapidly developing field. This will be followed by a similar overview of the much more speculative and longer term problem of building a rudimentary quantum processor including a discussion of the numerous technological problems that must be overcome move from a rudimentary to a scalable quantum processor. The tutorial will then conclude with some remarks on the possible industrial implications of this technology in the hopes of providing a broader technological assessment of why quantum information is truly revolutionary and worth pursuing.','S10_Out.pdf',10660);
INSERT INTO subevents VALUES (1077,'2003-11-16','08:30:00','17:00:00','','S1: Production Linux Clusters 2003 - Architecture and System Software for Serious Computing','Remy Evard (Argonne National Laboratory), Susan Coghlan (Argonne National Laboratory), Peter Beckman (Argonne National Laboratory), William Saphir (none)','Content-Level: 40% Introductory 50% Intermediate 10% Advanced\n\nAbstract: Linux clusters have become the dominant computing platform for small and mid range computing, and have substantial penetration into the upper echelon of the top500 list. Clusters are available from dozens of vendors and there are even more ways to run them. However, due in large part to the huge range of hardware and software options for building clusters, clusters still require a great deal of expertise to plan, deploy, and support. Building a complete, robust, and easily-managed production cluster is still a significant challenge today. \n\nThis tutorial will explain how to design your next cluster, plan for it, buy it, install it, run it, manage it, evaluate performance, and keep users happy on it. We will consider current hardware, describe proven management techniques, and discuss several modern cluster software systems while attempting to remain distribution and package neutral. Our goal is not to talk about how to cobble cheap PCs into a fast computer or to advocate a specific package, but to focus on making your next production supercomputer a Linux cluster. \n\nThis tutorial is a full-day tutorial. The handouts include practical, current information that can be directly applied to cluster selection and management.','S1_Out.pdf',10661);
INSERT INTO subevents VALUES (1078,'2003-11-16','08:30:00','17:00:00','','S4: Real World Techniques for Scientific Applications of Scale','Alice Koniges (LLNL), Mark Seager (LLNL), Rolf Rabenseifner (High Performance Computing Center, University of Stuttgart), David Eder (LLNL)','Content-Level: 20% Introductory 45% Intermediate 35% Advanced\n\nAbstract: Teraflop performance is no longer a thing of the future as complex integrated 3D simulations drive supercomputer development. Today, most HPC systems are clusters of SMP nodes ranging from dual-CPU-PC clusters to the largest systems at the world\'s major computing centers. What are the major issues facing application code developers today? How do the challenges vary from cluster computing to the complex hybrid architectures with super scalar and vector processors? Finally, what is our path both architecturally and algorithmically to petaflop performance? What skills and tools are required, both of the application developer and the system itself? In this tutorial we address these questions and give tips, tricks, and tools of the trade for large-scale application development. A special emphasis is given to mixed-mode (combined MPI/OpenMP) programming. In the introduction, we provide an overview of terminology, hardware and performance. We describe the latest issues in implementing scalable parallel programming. We draw from a series of large application suites and discuss specific challenges and problems encountered in parallel zing these applications. Additional topics cover parallel I/O, scripting languages and code wrappers. We conclude with a road map for the possible paths to petaflop computing. More information can be obtained from http://www.hlrs.de/people/rabenseifner/publ/SC2003-tutorial.html.','S4_Out.pdf',10662);
INSERT INTO subevents VALUES (1079,'2003-11-16','08:30:00','17:00:00','','S3: A practical approach to performance analysis and modeling of large-scale systems','Adolfy Hoisie (Los Alamos National Laboratory), Darren J. Kerbyson (Los Alamos National Laboratory)','Content-Level: 30% Introductory 50% Intermediate 20% Advanced\n\nAbstract: This tutorial presents a practical approach to the performance modeling of large-scale, scientific applications on high performance systems. The defining characteristic of our tutorial involves the description of a proven modeling approach, developed at Los Alamos, of full-blown scientific codes, ranging from a few thousand to over 100,000 lines, that has been validated on systems containing 1,000’s of processors. The goal is to impart a detailed understanding of factors contributing to the resulting performance of an application when mapped onto a given HPC platform. Performance modeling is the only technique that can quantitatively elucidate this understanding. We show how models are constructed and demonstrate how they are used to predict, explain, diagnose, and engineer application performance in existing or future codes and/or systems. Notably, our approach does not require the use of specific tools but rather is applicable across commonly used environments. Moreover, since our performance models are parametric in terms of machine and application characteristics, they imbue the user with the ability to “experiment ahead” with different system configurations or algorithms/coding strategies. Both will be demonstrated in studies emphasizing the application of these modeling techniques including: verifying system performance, comparison of large-scale systems, and examination of possible future systems.','S3_Out.pdf',10663);
INSERT INTO subevents VALUES (1080,'2003-11-16','08:30:00','17:00:00','','S2: A Tutorial Introduction to High Performance Data Transport','Bill Allcock (Argonne National Laboratory), Robert Grossman (University of Illinois at Chicago), Steven Wallace (Indiana University)','Content-Level: 40% Introductory 40% Intermediate 20% Advanced\n\nAbstract: Developing high performance data intensive applications requires not only high performance computing resources but just as importantly high performance data transport linking them. With emerging 1, 2.5 and 10 Gigabit per second links, there is unprecedented opportunity for creating distributed data intensive applications. In this tutorial, we give an overviw of different protocols for high performance data transport and how to build applications using them.','S2_Out.pdf',10664);
INSERT INTO subevents VALUES (1081,'2003-11-16','08:30:00','17:00:00','','S9: Programming with the Partitioned Global Address Space Model','William Carlson (IDA/CCS), Tarek El-Ghazawi (gwu), Kathy Yelick (UCB), Robert Numrich (UMN)','Content-Level: 30% Introductory 50% Intermediate 20% Advanced\n\nAbstract: The partitioned global address space programming model, also known as the distributed shared address space model, has the potential to achieve a balance between ease-of-programming and performance. As in the shared-memory model, one thread may directly read and write memory allocated to another. At the same time, the model gives programmers control over features that are essential for performance, such as locality. \n\nThe model is receiving rising attention and there are now several compilers for languages based on this model. In this tutorial, we present the concepts associated with this model inclduding execution , synchronization, workload distribution, and memory consistency models. We then introduce three parallel programming language instances. These are Unified Parallel C or UPC; Co-Array FORTRAN, and Titanium, a JAVA-based language. It will be shown through experimental studies that these paradigms can deliver performance comparable with message passing, while maintaining the ease of programming of the shared memory model. \n\nThrough hands-on exercises on parallel systems, the audience will get a first hand exposure to these powerful paradigms. There will be a limited number of workstations provided for the hands-on portion of the tutorial, assigned on a first-come, first-served basis. You may participate in the hands-on portion of the tutorial using your own laptop, but be aware that there will be no support available to debug problems with attendee laptops. Attendee laptops must be configured for wireless access and must have ssh installed.','S9_Out.pdf',10665);
INSERT INTO subevents VALUES (1082,'2003-11-16','13:30:00','17:00:00','','S12: Advanced Topics in OpenMP','Timothy Glen Mattson (Intel), Sanjiv Shah (Intel)','Content-Level: 5% Introductory 35% Intermediate 60% Advanced\n\nAbstract: OpenMP is an important standard for writing parallel applications for shared memory computers. Many HPC professionals have been exposed to OpenMP. Unfortunately, only a small number have mastered it to become OpenMP experts. \n\nThis tutorial will address that problem by covering the key topics a programmer must understand in order to become an OpenMP expert. We will address (1) how to take advantage of the performance oriented constructs in the OpenMP specifications, (2) compiler implementation issues and how they impact program performance, and (3) programming clusters of SMP nodes by mixing OpenMP and MPI. \n\nThroughout the tutorial, we will use examples to amplify each of the above points. We will collect these examples from real programs crafted by master OpenMP programmers.','S12_Out.pdf',10666);
INSERT INTO subevents VALUES (1083,'2003-11-16','13:30:00','17:00:00','','S11: Grid Services for Data Management and Virtual Data','Ann Chervenak (University of Southern California), Ewa Deelman (University of Southern California), Mike Wilde (Argonne National Laboratory)','Content-Level: 15% Introductory 70% Intermediate 15% Advanced\n\nAbstract: This tutorial will provide a detailed introduction to existing services for data management in grid computing environments. These services are essential for data-intensive applications that require the creation, replication, management and discovery of large numbers of files or data items. We will provide detailed descriptions and interactive demonstrations of six grid components: the GridFTP data transport protocol, the Reliable File Transfer (RFT) service, the Replica Location Service (RLS), the Metadata Catalog Service (MCS), the Chimera system for managing virtual data products, and the Pegasus system for planning and execution in grid environments. \n\nThis tutorial is intended for those who are interested in deploying and using a grid for data-intensive applications. The tutorial will focus on stable, existing grid components from Globus Toolkit versions 2.4 and 3.0. The tutorial will consist mainly of detailed examples of how to configure, deploy and use these grid services. The tutorial will include interactive demonstrations of the use of these tools.','S11_Out.pdf',10667);
INSERT INTO subevents VALUES (1084,'2003-11-16','13:30:00','17:00:00','','S13: Scientific Computing with Python -- Advanced Topics','Eric Jones (Entought), Travis Oliphant (Brigham Young University), Pat Miller (Lawrence Livermore National Laboratory)','Content-Level: 0% Introductory 50% Intermediate 50% Advanced\n\nAbstract: Python has emerged as an excellent choice for scientific computing because of its simple syntax, ease of use, and elegant multi-dimensional array arithmetic. Its interpreted evaluation allows it to serve as both the development language and the command line environment in which to explore data. Python also excels as a \"glue\" language that joins multiple legacy codes written in different languages together -- a common need in the scientific arena.\n\nThis half-day tutorial covers advanced topics in scientific computing such as integrating Python with other languages and parallel programming. Wrapping Fortran, C, and C++ codes, either for optimized speed or for accessing legacy code bases is covered in the middle section. Tools such as SWIG, f2py, and Boost Python are all discussed along with common pitfalls and good design practices. The final session covers parallel programming with an emphasis on pyMPI. This tutorial is a companion class to a morning session that introduces Python to the scientific community. A Windows version of Python (Enthought Edition) will be available on CD for attendees to install and use during the tutorial. The installation includes Python, Numeric, SciPy, wxPython, and VTK as well as other packages useful for scientific computing.','S13_Out.pdf',10668);
INSERT INTO subevents VALUES (1085,'2003-11-16','13:30:00','17:00:00','','S15: Distributed and Collaborative Visualization','Ken Brodlie (University of Leeds), David Duce (Oxford Brookes University), Jason Wood (University of Leeds)','Content-Level: 20% Introductory 80% Intermediate 0% Advanced\n\nAbstract: Visualization is a key component in understanding large datasets, including those generated as output from simulations on high performance computers. Computational steering allows the adjustment of parameters as a simulation is running, on the basis of the visualized results. This tutorial will give a brief introduction to visualization, and proceed to cover two key aspects: distributed visualization and collaborative visualization. Distributed visualization is of essential importance for computational steering, allowing the simulation to run on a remote high performance computer, but with the steering control and the visualization on the desktop. Collaborative visualization is of increasing importance, as more and more scientific research is done in teams, often geographically distributed. The tutorial will be illustrated throughout by live demonstrations of all the concepts, including a demonstration of Grid-enabled visualization where an existing visualization system, IRIS Explorer, is combined with Globus middleware, to provide both distributed and collaborative visualization in a single framework.','S15_Out.pdf',10669);
INSERT INTO subevents VALUES (1086,'2003-11-16','13:30:00','17:00:00','','S14: Computational Biology','Craig A. Stewart (Indiana University)','Content-Level: 15% Introductory 70% Intermediate 15% Advanced\n\nAbstract: Computational biology, bioinformatics, genomics, systems biology and related areas stand to be very important to the high performance community. There are tremendous opportunities to advance knowledge in biological and biomedical research areas through the use of high performance computing. This tutorial will begin with a brief overview of the essential biological bases for the current revolution in life sciences computing. Topics to be covered in depth include: sequence alignment and pattern matching; protein structure prediction; phylogenetics; systems biology; grid computing applications; and thoughts about the future of computational biology. This tutorial is intended for people who are interested in a rapid and useful introduction to computational biology and high performance computing. Tutorial attendees can expect to have a basic understanding of the area of computational biology and have a real feel for the nature of the work in this area as a result of hands-on experience with key applications. There will be hands-on exercises as part of the tutorial. A limited number of laptops will be provided and assigned on a first-come, first-served basis. Attendees with laptops and wireless network adapters are encouraged to bring them to the tutorial. Attendee laptops must have ssh installed in order to participate but be aware that there will be no support available to debug problems with attendee laptops. Hands-on exercises may also be done throughout the week at the \"Research in Indiana\" exhibit.','S14_Out.pdf',10670);
INSERT INTO subevents VALUES (1087,'2003-11-17','08:00:00','17:30:00','Wyndham Phoenix Hotel - Navajo Room','','Steering Committee:Craig Lee (Aerospace Corp.), Heinz Stockinger (CERN), Mark Baker (University of Portsmouth, UK), Rajkumar Buyya (University of Melbourne, Australia), Manish Parashar (Rutgers University)','In the last few years, the Grid community has been growing very rapidly and several new technologies have been proposed. This goes along with the growing popularity of the Internet and the availability of powerful computers and high-speed networks as low-cost commodity components, and is changing the way we do computing. Several proposals and publications have been generated, and several Grid projects with research and production oriented goals are ongoing.\n\nGrid 2003 is an international meeting that brings together the Grid community of researchers, developers, practitioners, and users. The objective of Grid 2003 is to serve as a forum to present current and future work as well as to exchange research ideas in this field. Grid 2003 partially follows the focus from last year but extends it to production Grids and international testbeds. Sessions of refereed presentations include:\n\nGrid Infrastructure and Services\nPerformance Evaluation and Optimization\nPolicy and Security Management\nData Management\nTestbeds and Applications\nInformation Systems\nWork-in-Progress Papers\n\nFor more detailed information about the workshop, see http://www.gridcomputing.org/grid2003/.','',10671);
INSERT INTO subevents VALUES (1088,'2003-11-17','08:30:00','12:00:00','','M7: PERC Tools for Performance Data Gathering and Analysis','Philip John Mucci (LBNL, UTK), Bronis R. de Supinski (LLNL), Celso Mendes (UIUC)','Content-Level: 25% Introductory 50% Intermediate 25% Advanced\n\nAbstract: This tutorial introduces a practical approach to the collection and analysis of performance data from production HPC applications. Attendees will emerge with a set of tools and techniques to explore the performance of their applications on the large parallel machines. This process consists of assessing the computational demands, capability and complexity of the application code, as well as understanding the efficiency of the mapping of an application to a specific architecture. To facilitate this, the tutorial will walk through the use of these tools on the Parallel Ocean Program or POP. POP was chosen because it is a widely used, compute-intensive production application and its performance has been extensively examined. The primary audience for this tutorial will be application developers needing to quantify the performance of their codes. The tools are also of interest to system designers, administrators and integrators looking to monitor and maximize throughput. Attendees should be familiar with at least one scientific application, parallel programming environment and HPC platform. In addition, they should have a rudimentary understanding of processor architectures, memory hierarchies and messaging passing. There will be a limited number of workstations provided for the hands-on portion of the tutorial, assigned on a first-come, first-served basis. You may participate in the hands-on portion of the tutorial using your own laptop, but be aware that there will be no support available to debug problems with attendee laptops. Attendee laptops must be configured for wireless access and must have an X11 environment and an ssh client installed.','M7_Out.pdf',10672);
INSERT INTO subevents VALUES (1089,'2003-11-17','08:30:00','12:00:00','','M9: Cluster Construction Utilizing Selected Packages','Thomas Stephen Lehmann (Intel Corporation), Richard Mark Libby (Intel Corporation)','Content-Level: 40% Introductory 40% Intermediate 20% Advanced\n\nAbstract: There are many ways to initially create a High Performance Compute Cluster. These break down into two main categories, DIY (do it yourself) and the packaged approach. \n\nThe DIY approach is instructive in that you must install and configure all of the individual software components yourself. This gives you a better understanding of what’s “under the hood” and should be tried at least once on a small cluster to gain a good understanding of the details of clustering technology. \n\nHowever, for small to medium clusters with time and cost constraints a packaged approach is usually more practical if you can work within the constraints that these packages can bring. Once constructed, most of these “instant” clusters can be furthered tailored to match local needs. \n\nThis tutorial will show two such packages, one free (OSCAR) and one commercially available (Scyld). In hands-on laboratory sessions each attendee will have a chance to install and test a small cluster utilizing both packages.','M9_Out.pdf',10673);
INSERT INTO subevents VALUES (1090,'2003-11-17','08:30:00','12:00:00','','M8: Vector Performance Programming','Edward Kornkven (ARSC), Andrew Johnson (AHPCRC)','Content-Level: 25% Introductory 50% Intermediate 25% Advanced\n\nAbstract: Vector Performance Programming \n\nThis tutorial will provide an overview of the current state of the art regarding vector computer systems and their application to science. It consists of three parts:\n\n - Overview: the origin and evolution of vector systems, basic principles of the hardware and software, and differences between currently available vector systems and vector-like features in commodity processors.\n\n - Basic examples: identification of code structures that do and don\'t vectorize, modification of non-vectorizing structures to allow vectorization, vector libraries, and vector-friendly programming guidelines. This section will enable programmers to develop vector-friendly code and adapt existing codes to vectorize. \n\n - Advanced examples: tuning for memory bandwidth/architectures, caches and hybrid programming, combining parallel and vector optimization. Specific features of available vector platform hardware and software and when/how to exploit these will be considered.\n\nIllustrations will be included from currently available vector systems such as the Cray SV1, Cray X1, NEC SX6 and others. Illustrations will include short examples and real world applications, both those developed in a vector environment and those ported to vector systems from a workstation environment. This tutorial will be of interest to those developing codes that might run on vector systems in the future.','M8_Out.pdf',10674);
INSERT INTO subevents VALUES (1091,'2003-11-17','08:30:00','12:00:00','','M10: Computer Protection at Open Scientific Facilities','William T Kramer (NERSC/LBNL), Stephen Lau (NERSC/LBNL), Dr. Vern Paxson (LBNL/ICSI), James Rothfuss (LBNL)','Content-Level: 10% Introductory 50% Intermediate 40% Advanced\n\nAbstract: The ability for scientists to collaborate unfettered over networks is critical for high performance computational (HPC) environments. This need however is tempered by the realities of today\'s interconnected computational environments where protection from unauthorized access and usage is a necessity. How does one find an effective balance between the needs of an open scientific research facility and simultaneously protecting the site from attacks? What challenges lie ahead in high performance security? \n\nThis tutorial addresses these questions by exploring various topics of computer security as it relates to an open, high-performance computer facility. Some of the topics we will address are: \n\n1) The unique nature and demands within an HPC environment \n\n2) Addressing the needs of computer protection in an HPC environment \n\n3) An overview of current trends in attacks and incidents \n\n4) Intrusion detection in an HPC environment \n\n5) The future of high performance computing protection \n\nSCinet, the SC conference network, resembles networks at open scientific facilities. Some of the tools deployed at open scientific facilities are also deployed at SC for computer protection. We will show real network attack statistics collected at SC03 and explain how the techniques described in the tutorial are in use at SC.','M10_Out.pdf',10675);
INSERT INTO subevents VALUES (1092,'2003-11-17','08:30:00','17:00:00','','M1: How to Build a Grid Service Using the Globus Toolkit(R) 3','Lisa C Childers (The Globus Project), Charles A Bacon (The Globus Project), Ben Z Clifford (The Globus Project), Ravi K Madduri (The Globus Project)','Content-Level: 50% Introductory 50% Intermediate 0% Advanced\n\nAbstract: This full-day tutorial is designed to teach developers how to build a Grid Service that is compliant with the Open Grid Services Infrastructure (OGSI) Specification using the Globus Toolkit(R) 3 (GT3). The OGSI Specification is a community standard published by the Global Grid Forum that is expected to form the architectural foundation of next-generation Grid Computing. GT3 is the latest toolkit distribution from the Globus Project, which produces open source middleware used in building grids around the world. \n\nThe session begins with an introduction to the key concepts that define a Grid Service, including a tour of the OGSI Specification. The second section focuses on GT3, providing an overview of its architecture and functionality. The tutorial concludes with hands-on exercises in which attendees learn how to build, host and interact with an OGSI-Compliant Grid Service. \n\nThis tutorial is geared toward developers and technical managers who want to learn about the latest release of the Globus Toolkit and how to apply fundamental concepts outlined in the Open Grid Services Infrastructure Specification. In order to participate, attendees are required to bring a SC2003-network-enabled laptop to the tutorial. Tutorial participants will be expected to bring their own wireless-enabled laptops pre-loaded with a small set of open-source software. There will be no support available to debug problems with attendee laptops. A list of prerequisites for the tutorial is published at: http://www.globus.org/ogsa/tutorials/SC03/. Attendees must be able to run all the software listed in the prerequisites in order to participate in the hands-on portion of the tutorial.','M1_Out.pdf',10676);
INSERT INTO subevents VALUES (1093,'2003-11-17','08:30:00','17:00:00','','M5: Component Software for High-Performance Computing: Using the Common Component Architecture','Robert C Amstrong (Sandia National Laboratories), David E Bernholdt (Oak Ridge National Laboratory), Lori Freitag Diachin (Sandia National Laboratories), Wael R Elwasif (Oak Ridge National Laboratory), Daniel S Katz (Jet Propulsion Laboratory, California Institute of Technology), James A Kohl (Oak Ridge National Laboratory), Gary Kumfert (Lawrence Livermore National Laboratory), Lois Curfman McInnes (Argonne National Laboratory), Boyana Norris (Argonne National Laboratory), Craig E Rasmussen (Los Alamos National Laboratory), Jaideep Ray (Sandia National Laboratories), Sameer Shende (University of Oregon), Shujia Zhou (Northrop Grumman/TASC)','Content-Level: 25% Introductory 50% Intermediate 25% Advanced\n\nAbstract: This full-day tutorial will introduce participants to the Common Component Architecture (CCA) at both conceptual and practical levels. \n\nComponent-based approaches to software development increase software developer productivity by helping to manage the complexity of large-scale software applications and facilitating the reuse and interoperability of code. The CCA was designed specifically with the needs of high-performance scientific computing in mind. It takes a minimalist approach to support language-neutral component-based application development for both parallel and distributed computing without penalizing the underlying performance, and with a minimal cost to incorporate existing code into the component environment. The CCA environment is also well suited to the creation of domain-specific application frameworks, whereas traditional domain-specific frameworks lack the generality and extensibility of the component approach.\n\nWe will cover the concepts of components and the CCA in particular, the tools provided by the CCA environment, the creation of CCA-compatible components, and their use in scientific applications. We will use a combination of traditional presentation and live demonstration during the tutorial. The tools and example software will also be available for download. This presentation updates the SC2002 tutorial with advances in the CCA tools and technology (including Fortran 90 support) and more extensive user experience.','M5_Out.pdf',10677);
INSERT INTO subevents VALUES (1094,'2003-11-17','08:30:00','17:00:00','','M3: Lustre: A Scalable, High-Performance Distributed File System','Radhika Vullikanti (Cluster File Systems, Inc.), Thomas M. Ruwart (University of Minnesota), Robert Read (Cluster File Systems, Inc.)','Content-Level: 25% Introductory 50% Intermediate 25% Advanced\n\nAbstract: Lustre is a scalable and high-performance distributed file system. It is a highly modular next generation storage architecture that combines established, open standards, the Linux operating system, an open networking API and innovative protocols into a reliable, network-neutral data storage and retrieval solution. \n\nThis tutorial is aimed at providing a discussion of the object based storage devices and intelligent storage devices followed by a high-level overview of the Lustre file system accompanied with some in-depth discussion of some design concepts that make up this innovative file system. \n\nThe tutorial will be interesting to a broad audience – it will be useful for anyone just curious about Lustre, academia interested in the design concepts behind this high-performance file system, hardware manufacturers interested in development of intelligent storage devices and customers who might be exploring various storage solutions for their cluster installations.\n\nThe audience participating in this tutorial is expected to be familiar with basic file system concepts and UNIX shell driven work for the hands-on sessions. A laptop with wireless or ethernet networking and an SSH client will be required for the hands-on session of this tutorial.','M3_Out.pdf',10678);
INSERT INTO subevents VALUES (1095,'2003-11-17','08:30:00','17:00:00','','M4: Using MPI-2: Advanced Features of the Message-Passing Interface','William Gropp (Argonne National Laboratory), Ewing (Rusty) Lusk (Argonne National Laboratory), Rob Ross (Argonne National Laboratory), Rajeev Thakur (Argonne National Laboratory)','Content-Level: 20% Introductory 40% Intermediate 40% Advanced\n\nAbstract: This tutorial is about how to use MPI-2, the collection of advanced features that were added to MPI (Message-Passing Interface) by the second MPI Forum. These features include parallel I/O, one-sided communication, dynamic-process management, language interoperability, and some miscellaneous features. Implementations of MPI-2 (or significant subsets thereof) are now available both from vendors and from open-source projects. For example, the one-sided communication functions of MPI-2 are being used successfully in applications running on the Earth Simulator. In other words, MPI-2 can now really be used in practice.\n\nThis tutorial explains how to use MPI-2, particularly, how to use it in a way that results in high performance. We present each feature of MPI-2 in the form of a series of examples (in C, Fortran, and C++), starting with simple programs and moving on to more complex ones. We also discuss how to combine MPI with OpenMP. We assume that attendees are familiar with the basic message-passing concepts of MPI-1. \n\nThe tutorial will feature a hands-on session in which attendees will be able to run MPI-2 programs on their own laptops with the latest version of MPICH2, which we will distribute on CDs.','M4_Out.pdf',10679);
INSERT INTO subevents VALUES (1096,'2003-11-17','08:30:00','17:00:00','','M6: Reconfigurable Supercomputing Systems','Tarek El-Ghazawi (GWU), Maya Gokhale (LANL), Duncan Buell (USC), Kris Gaj (GMU)','Content-Level: 40% Introductory 40% Intermediate 20% Advanced\n\nAbstract: The synergistic advances in high-performance computing and in reconfigurable computing, based on field programmable gate arrays (FPGAs) have engendered a new class of supercomputing systems, namely reconfigurable supercomputing systems. Such systems inherently support both fine-grain and coarse-grain parallelism, and can dynamically tune their architecture to fit applications. Advances in this area have progressed at different levels. At the network level, researchers have extended job management systems to exploit networked reconfigurable resources in cluster and grid computing fashions. At the single system level, steps have been taken towards the development of massively parallel systems of microprocessors and reconfigurable computing capabilities. Programming such systems can be quite challenging as programming FPGA devices can essentially involve hardware design. This has been addressed by significant developments in compiler technologies and programming tools for these systems. This tutorial will introduce the field of reconfigurable supercomputing and the advances made so far in systems, programming tools, applications, and compiler technology. In addition to concepts and technology coverage, the tutorial offers a first-hand exposure to this field though carefully crafted hands-on experience on two state-of-the-art reconfigurable machines. There will be a limited number of workstations provided for the hands-on portion of the tutorial, assigned on a first-come, first-served basis. You may participate in the hands-on portion of the tutorial using your own laptop, but be aware that there will be no support available to debug problems with attendee laptops. Attendee laptops must be configured for wireless access and must have an ssh client installed.','M6_Out.pdf',10680);
INSERT INTO subevents VALUES (1097,'2003-11-17','08:30:00','17:00:00','','M2: Applications in the TeraGrid Environment','John Towns (NCSA), Nancy Wilkins-Diehr (SDSC), Sharon Brunett (CACR), Sandra Bittner (ANL), Derek Simmel (PSC)','Content-Level: 0% Introductory 60% Intermediate 40% Advanced\n\nAbstract: The TeraGrid is the foundation of the NSF’s national cyberinfrastructure program and is positioned to ignite the imaginations of new grid communities while delivering the next level of innovation in grid computing. It will connect scientific instruments, data collections and other unique resources as well as offer significant amounts of compute power. The TeraGrid includes 20 teraflops of computing power, 1 petabyte of data storage, high-resolution visualization environments, and grid services. The TeraGrid is anchored with Intel-based Linux clusters at ANL, Caltech, NCSA and the SDSC and an Alpha-based cluster at PSC that are connected by a 40 Gbps network.\n\nThis tutorial includes an overview of the TeraGrid environment and configuration and descriptions of available services. The software foundation is based on the NSF Middleware Initiative (NMI) and programming techniques learned in this tutorial will be applicable in many grid communities. Attendees can expect to learn to manage a grid identity and work through several usage scenarios by building and launching sample jobs. Several working applications will be used as examples to illustrate these capabilities. Attendees are expected to be familiar with Fortran or C programming, MPI and basic Unix environments \n\nNOTE: There will be a limited number of laptops provided for the hands-on portion of the tutorial, assigned on a first-come, first-served basis. You may participate in the hands-on portion of the tutorial using your own laptop, but be aware that there will be no support available to debug problems with attendee laptops. Information on laptop software and configuration requirements are available at: http://mithril.ncsa.uiuc.edu/SC03/Tutorial_M2/','M2_Out.pdf',10681);
INSERT INTO subevents VALUES (1098,'2003-11-17','13:30:00','17:00:00','','M11: Performance Tools 101: Principles of Experimental Performance Measurement and Analysis','Luiz DeRose (IBM Research), Bernd Mohr (Research Centre Juelich), Kevin London (ICL / University of Tennessee)','Content-Level: 20% Introductory 60% Intermediate 20% Advanced\n\nAbstract: Application developers are facing new and more intricate performance tuning and optimization problems as parallel architectures become more complex. Hence, there are a growing number of application developers who want to understand more about the performance characteristics of their applications. In this tutorial we will introduce the theory and practice of tools development, with an overview of the major issues, techniques, and resources for performance measurement of applications. Our goals are twofold: first, we will provide enough information, such that users who want to learn how to develop simple performance tools for instrumentation, measurement, and analysis of their own code could attempt to do in-house development, in order to fulfill their needs. Second, we will discuss some of the open problems in the area of performance measurement and analysis for researchers and students interested in working in this field. Areas covered will include instrumentation, performance measurement, performance data representation, analysis, and visualization techniques.','M11_Out.pdf',10682);
INSERT INTO subevents VALUES (1099,'2003-11-17','13:30:00','17:00:00','','M12: Power and Energy Conservation for Servers','Ricardo Bianchini (Rutgers University), Ram Rajamony (IBM), Michael Kistler (IBM)','Content-Level: 30% Introductory 50% Intermediate 20% Advanced\n\nAbstract: Power and energy consumption have recently become key concerns for high-performance servers, especially when they are deployed in large cluster configurations as in compute farms and data centers. In fact, it has been reported that power and energy account for a significant fraction of the operational cost of such clusters. Furthermore, computing nodes in densely packed cluster systems also often overheat, leading to intermittent failures. These problems are likely to worsen as newer server-class processors offer higher levels of performance at the expense of increased power consumption. \n\nThis tutorial will present an in-depth look at techniques for power and energy management in standalone servers, clustered servers, and storage servers. We will start by motivating the topic, discuss various direct and indirect methods for measuring power/energy consumption, appropriate metrics to use, and where power and energy are expended in servers today. Next, we will address mechanisms for power management and the characteristics of server workloads from a power/energy conservation viewpoint. Components of power and energy conservation policies will then be discussed, followed by the actual policies to use. An important component of the tutorial will be a discussion on practical techniques. We will conclude with a discussion of the future challenges in this area.','M12_Out.pdf',10683);
INSERT INTO subevents VALUES (1100,'2003-11-17','13:30:00','17:00:00','','M13: HPC and InfiniBand Architecture in Practice','Lars E Jonsson (Intel Americas, Inc.), Bill Magro (Intel Americas, Inc.)','Content-Level: 70% Introductory 30% Intermediate 0% Advanced\n\nAbstract: InfiniBand architecture (IBA) is an open standard interconnect architecture designed to meet the challenging I/O and inter-process messaging needs of clustered compute and data centers. IBA\'s native data rate of 1250 MByte/s in each direction leapfrogs most current interconnect technologies while still delivering the low latencies required by HPC applications. In the last year, a range of HPC-oriented InfiniBand products - host adapter cards, fat-tree topology switches, device drivers, and MPI implementations - have come to market, with initial deployments occurring in HPC centers worldwide. Now is the time for computer center managers and engineers to evaluate InfiniBand as an interconnect technology for future clusters. \n\nThis tutorial first introduces the InfiniBand architecture\'s key features and capabilities and then gives practical guidance about how to build and configure a cluster of (Linux) compute servers targeted to HPC users. In addition to an overview of InfiniBand architecture and products, we will cover a) hardware installation and configuration; b) switch topologies and associated routing and management software; c) user-level protocols and message passing interface (MPI) implementations; d) low-level and application-level performance results; and e) status of the open source InfiniBand Linux software infrastructure (infiniband.sourceforge.net).','M13_Out.pdf',10684);
INSERT INTO subevents VALUES (1101,'2003-11-17','13:30:00','17:00:00','','M14: Customer Owned Networks: Strategy, Implementation, and Example','Bill Nickless (Argonne National Laboratory), Tony Rimovsky (NCSA)','Content-Level: 30% Introductory 70% Intermediate 0% Advanced\n\nAbstract: Within the past 5 years, the research, education, and technical computing community has deployed regional optical networks, which utilize Wave Division Multiplexing (WDM) technology to interconnect sites and resources. These networks can provide several advantages over circuits provided by the telecommunications industry, particularly related to direct costs and flexibility. \n\nThe first section of this tutorial (Strategy) will explore the strategic challenges involved in planning the deployment of an optical network. The second section (Implementation) will explore the technology and practices needed to create such an optical network. Much of the discussion will surround experiences and continuing challenges in the State of Illinois I-WIRE project.','M14_Out.pdf',10685);
INSERT INTO subevents VALUES (1102,'2003-11-18','10:30:00','11:00:00','','An Efficient Data Location Protocol for Self-organizing Storage Clusters','Hong Tang (University of California, Santa Barbara), Tao Yang (University of California, Santa Barbara)','Component additions and failures are common for large-scale storage clusters in production environments. To improve availability and manageability, we investigate and compare data location schemes for a large self-organizing storage cluster that can quickly adapt to the additions or departures of storage nodes. We further present an efficient location scheme that differentiates between small and large file blocks for reduced management overhead compared to uniform strategies. In our protocol, small blocks, which are typically in large quantities, are placed through consistent hashing. Large blocks, much fewer in practice, are placed through a usage-based policy, and their locations are tracked by Bloom filters. The proposed scheme results in improved space utilization even with non-uniform cluster nodes. To achieve high scalability and fault resilience, this protocol is fully distributed, relies only on soft states, and supports data replication. We demonstrate the effectiveness and efficiency of this protocol through trace-driven simulation.','',10686);
INSERT INTO subevents VALUES (1103,'2003-11-18','10:30:00','11:00:00','','MRNet: A Software-Based Multicast/Reduction Network for Scalable Tools','Philip C. Roth (University of Wisconsin-Madison), Dorian C. Arnold (University of Wisconsin-Madison), Barton P. Miller (University of Wisconsin-Madison)','We present MRNet, a software-based multicast/reduction network for building scalable performance and system administration tools. MRNet supports multiple simultaneous, asynchronous collective communication operations. MRNet is flexible, allowing tool builders to tailor its process network topology to suit their tool’s requirements and the underlying system’s capabilities. MRNet is extensible, allowing tool builders to incorporate custom data reductions to augment its collection of built-in reductions. We evaluated MRNet in a simple test tool and also integrated into an existing, real-world performance tool with up to 512 tool back-ends. In the test tool, MRNet’s performance was comparable to that of previous tool infrastructure. In the real-world tool, we used MRNet not only for multicast and simple data reductions but also with custom histogram and clock skew detection reductions. Although the tool’s start-up protocol was already highly tuned, our tests of MRNet with 512 tool back-ends show significant improvements in average start-up latency.','',10687);
INSERT INTO subevents VALUES (1104,'2003-11-18','10:30:00','11:20:00','','Collaborative Virtual Design in Engineering','Lee Margetts (University of Manchester, MRCCS), Simon Bee (University of Salford)','Engineering design, particularly in the aerospace and automotive industries often involves several iterations through a design cycle before the final project emerges. This is because many different professionals are involved at different stages and may make minor modifications to the design, forcing the process back through earlier design phases. By integrating the different applications used into a single problem solving environment and bringing the professionals together, design times can be greatly reduced leading to increased productivity, turnover and customer satisfaction. \n\nUsing high performance computing and interactive virtual reality visualisation, such an environment is being developed at the Advanced Virtual Prototyping Research Centre (AVPRC). The core of this environment, Hydra, is a software framework that can deploy and control many interconnected applications, such as Computer Aided Design (CAD) and Finite Element Analysis (FEA), at geographically remote locations. Collaboration and computational steering can be conducted simultaneously from different ‘consoles’ – CAVES, Immersabenches and desktop computers. By integrating this problem solving environment with the Access Grid, collaborators can also benefit from multicast audio and visual communication with their colleagues. \n\nThen SCGlobal Showcase would demonstrate the use of the Access Grid in a new important application area - Virtual Prototyping. Perhaps the most significant innovation is the integration of the following emerging technologies into a single problem solving environment: \n\n· Collaboration (Access Grid) · Virtual Reality visualisation · Grid Computing · Computational Steering · Parallel engineering simulations','',10688);
INSERT INTO subevents VALUES (1105,'2003-11-18','10:30:00','12:00:00','','Petaflops Architectures: Parallelism, Pain, and Perverse Programming Paradigms','Moderator: Candace Culhane (National Security Agency), Panelists: Mootaz Elnozahy (IBM), 3 others to be named','Petaflop architectures now on the drawing boards will enable a sequence of scientific breakthroughs and herald a new era in computational science. At the same time, many petaflops architectures are likely to challenge systems designers, language developers, and compiler writers in totally new ways. Pity the poor applications programmer at the end of this chain, who will have to live with the mistakes of architects, language developers and compiler writers alike. A certain amount of \"pain\" in use of petaflops architectures is surely unavoidable – machines with hundreds of thousands of processors and awkward memory models will not be \"user friendly.\" Moreover, some experts believe that the intrinsic unreliability of hardware at the scale envisioned will force adoption of complex checkpoint and recovery strategies. We firmly believe, however, that much of this pain can be ameliorated by clever architects and programming model designers, assuming they evince a depth of understanding and subtlety of approach not universally evident in the past.\n\nIn this first panel of the three-part workshop, computer architects will talk about architectural trends and the shape of probable architectures at the beginning of the petaflops era. This panel will focus on the emerging structure of likely petascale architectures, and what will be expected/needed from programming models, languages, and end users to handle such systems. Questions the panelists should address include:\n\nWhat architectural features will be easy to exploit and yield good performance?\nWhat anticipated features will be complex and problematic for language designers, compiler writers and applications programmers alike?\nWhat \"features\" will be so complex that only the run-time environment should see them, and could lead to fragile or down right awful performance?\nWill architectures have so much complexity, and require so many levels of parallelism that our programming models need to reflect this same complexity?\nWhat are the trends?\nWhat will be the key things that programming models need to address?','',10689);
INSERT INTO subevents VALUES (1106,'2003-11-18','11:00:00','11:30:00','','The Tool Daemon Protocol (TDP)','Barton Miller (University of Wisconsin), Ana Cortés (Autonomous University of Barcelona), Miquel Senar (Autonomous University of Barcelona), Miron Livny (University of Wisconsin)','Runtime tools are crucial to program development. In desktop environments, we take tools for granted. In the Grid, it is difficult to find tools because of the complex interactions between applications, operating system and layers of job scheduling/management software. Therefore each runtime tool must be ported to run under each job management system; for m tools and n environments, the problem becomes an m*n effort, rather than m+n. The consequence is a paucity of tools in distributed and Grid computing environments. \n\nIn response, we analyzed several scheduling environments and runtime tools to better understand their interactions. We isolated what we believe are the essential interactions between tools, schedulers, resource manager, and applications. We propose a new standard, called the Tool Daemon Protocol, that codifies these interactions and provides the necessary communication functions. We implemented a pilot library and experimented with Parador, a prototype using the Paradyn Performance tools under Condor.','',10687);
INSERT INTO subevents VALUES (1107,'2003-11-18','11:00:00','11:30:00','','Handling Heterogeneity in Shared-Disk File Systems','Changxun Wu (Johns Hopkins University), Randal Burns (Johns Hopkins University)','We develop and evaluate a system for load management in shared-disk file systems built on clusters of heterogeneous computers. The system generalizes load balancing and server provisioning. It balances file metadata workload by moving file sets among cluster server nodes. It also responds to changing server resources that arise from failure and recovery and dynamically adding or removing servers. The system is adaptive and self-managing. It operates without any a-priori knowledge of workload properties or the capabilities of the servers. Rather, it continuously tunes load placement using a technique called adaptive, non-uniform (ANU) randomization. ANU randomization realizes the scalability and metadata reduction benefits of hash-based, randomized placement techniques. It also avoids hashing\'s drawbacks: load skew, inability to cope with heterogeneity, and lack of tunability. Simulation results show that our load-management algorithm performs comparably to a prescient algorithm.','',10686);
INSERT INTO subevents VALUES (1108,'2003-11-18','11:20:00','12:00:00','','Communities Collaborating to Bridge the Digital Divide: the Tribal Virtual Network','Maria Williams (College of Fine Arts, University of New Mexico), Ron Solimon (Indian Pueblo Cultural Center), Vernon Lujan (Pojoaque Poeh Arts Center), Kevin Shendo (Jemez Walatowa Visitors Center), Lorene Willis (Jicarilla Apache Cultural Center), Tom Kennedy (Zuni Tribe)','The Tribal Virtual Network is a consortium of Native American tribal museums and education centers. It sees the Access Grid as not only a research tool for scientists, but as a collaborative ignition switch between communities. The TVN will 1) briefly discuss their unique application of the Access Grid, and 2) demonstrate how collaborative technologies are being used to overcome the \"Digital Divide\" by accessing education and training.\n\nThe intent of the 30-minute showcase demonstration is to:\n1. Clarify to the scientific community what the digital divide means to Native Americans \n2. Demonstrate how collaborative technologies can unite diverse communities.\n\nTVN consortium members will connect from New Mexico through their low-bandwidth, custom configured inSORS Access Grid nodes. They will start out by briefly introducing their respective tribes and cultural centers to visually show why so few minorities become researchers. Next, they will demonstrate a condensed version of an interactive, collaborative class in professional artist development. Finally, they will invite scientists and organizations across the Access Grid to collaborate with the consortium.','',10688);
INSERT INTO subevents VALUES (1109,'2003-11-18','11:30:00','12:00:00','','Quantifying and Improving the Availability of High-Performance Cluster-Based Internet Services','Kiran Nagaraja (Computer Science, Rutgers University), Neeraj Krishnan (Computer Science, Rutgers University), Ricardo Bianchini (Computer Science, Rutgers University), Richard P. Martin (Computer Science, Rutgers University), Thu D. Nguyen (Computer Science, Rutgers University)','Cluster-based servers can substantially increase performance when nodes cooperate to globally manage resources. We apply a quantification methodology to show, however, that following a cooperative strategy without additional compensating mechanisms results in a substantial availability loss. Specifically, we show that a sophisticated cooperative cluster-based web server gains a factor of 3 in performance but increases service unavailability by a factor of 10 over a non-cooperative version. We then show how to augment this web server with software components embodying a small set of high-availability techniques to regain the lost availability. Among other interesting observations, we show that the application of multiple high-availability techniques, each implemented independently in its own subsystem, can lead to inconsistent recovery actions. We also show that a novel technique called Fault Model Enforcement can be used to resolve such inconsistencies. Augmenting the server with these techniques led to a final expected availability of close to 99.99%.','',10686);
INSERT INTO subevents VALUES (1110,'2003-11-18','11:30:00','12:00:00','','Conservative Scheduling: Using Predicted Variance to Improve Scheduling Decisions in Dynamic Environments','Lingyun Yang (Department of Computer Science, University of Chicago), Jennifer M. Schopf (Math&Computer Science Division, Argonne National Laboratory), Ian Foster (Math&Computer Science Division, Argonne National Laboratory)','In heterogeneous and dynamic environments, efficient execution of parallel computations can require mappings of tasks to processors with performance that is both irregular and time-varying. While adaptive domain decomposition techniques have been used to address heterogeneous resource capabilities, temporal variations in those capabilities have seldom been considered. We propose a conservative scheduling policy that uses information about expected future variance in resource capabilities to produce more efficient data mapping decisions. We first present techniques for predicting CPUload at some future time point, average CPUload for some future time interval, and variation of CPUload over some future time interval. We then present a family of stochastic scheduling algorithms that exploit such predictions when making data mapping decisions. Finally, we describe experiments in which we apply our techniques to an astrophysics application. The results demonstrate that conservative scheduling can produce execution times that are significantly faster and less variable than other techniques.','',10687);
INSERT INTO subevents VALUES (1111,'2003-11-18','13:30:00','14:00:00','','Synthesizing Realistic Computational Grids','Dong Lu (Northwestern University), Peter August Dinda (Northwestern university)','Realistic workloads are essential in evaluating middleware for computational grids. One important component is the raw grid itself: a network topology graph annotated with the hardware and software available on each node and link. This paper defines our requirements for grid generation and presents GridG, our extensible generator. We describe GridG in two steps: topology generation and annotation. For topology generation, we have both model and mechanism. We extend Tiers, an existing tool from the networking community, to produce graphs that obey recently discovered power laws of Internet topology. We also contribute to network topology theory by illustrating a contradiction between two laws and proposing a new version of one of them. For annotation, GridG captures intra- and inter-host correlations between attributes using conditional probability rules. We construct a set of rules, including one based on empirical evidence of OS concentration in subnets, that produce sensible host annotations.','',10690);
INSERT INTO subevents VALUES (1112,'2003-11-18','13:30:00','14:00:00','','Enabling the Efficient Use of SMP Clusters: The GAMESS/DDI Model','Ryan M. Olson (Iowa State University), Michael W. Schmidt (Iowa State University), Mark S. Gordon (Iowa State University), Alistair P. Rendell (Australian National University)','An important advance in cluster computing is the evolution from single processor clusters to multi-processor SMP clusters. Due to the increased complexity in the memory model on SMP clusters, new approaches are needed for applications that make use of distributed-memory paradigms. This paper presents new communications software developments that are designed to take advantage of SMP cluster hardware. Although the specific focus is on the central field of computational chemistry and materials science, as embodied in the popular electronic structure package GAMESS (General Atomic and Molecular Electronic Structure System), the impact of these new developments will be far broader in scope. Following a summary of the essential features of the distributed data interface (DDI) in the current implementation of GAMESS, the new developments for SMP clusters are described. The advantages of these new features are illustrated using timing benchmarks on several hardware platforms, using a typical computational chemistry application.','',10691);
INSERT INTO subevents VALUES (1113,'2003-11-18','13:30:00','14:00:00','','A Compiler Analysis of Interprocedural Data Communication','Yonghua Ding (Purdue University), Zhiyuan Li (Purdue University)','This paper presents a compiler analysis for data-communication for the purpose of transforming ordinary programs into ones that run on distributed systems. Such transformations have been used for process migration and computation offloading to improve the performance of mobile computing devices. In a client-server distributed environment, the efficiency of an application can be improved by careful partitioning of tasks between the server and the client. Optimal task partitioning depends on the tradeoff between the computation workload and the communication cost. Our compiler analysis, assisted by a minimum set of user assertions, estimates the amount of data communication between procedures. The paper also presents experimental results based on an implementation in the GCC compiler. The static estimates for several multimedia programs are compared against dynamic measurement performed using Shade, a SUN Microsystem\'s instruction-level simulator. The results show a high precision of the static analysis for most pairs of the procedures.','',10692);
INSERT INTO subevents VALUES (1114,'2003-11-18','13:30:00','15:00:00','','Petaflops Programming Models: Ameliorating Architectural Issues or Exacerbating Them?','Moderator: Burton Smith (Cray), Panelists: Kathy Yelick (UC-Berkeley), Hans Zima (University of Vienna, NASA Jet Propulsion Laboratory), Larry Snyder (University of Washington), 1 other to be named','Petaflop architectures now on the drawing boards will enable a sequence of scientific breakthroughs and herald a new era in computational science. At the same time, many petaflops architectures are likely to challenge systems designers, language developers, and compiler writers in totally new ways. Pity the poor applications programmer at the end of this chain, who will have to live with the mistakes of architects, language developers and compiler writers alike. A certain amount of \"pain\" in use of petaflops architectures is surely unavoidable – machines with hundreds of thousands of processors and awkward memory models will not be \"user friendly.\" Moreover, some experts believe that the intrinsic unreliability of hardware at the scale envisioned will force adoption of complex checkpoint and recovery strategies. We firmly believe, however, that much of this pain can be ameliorated by clever architects and programming model designers, assuming they evince a depth of understanding and subtlety of approach not universally evident in the past.\n\nThis panel will look at evolving and expected programming models, and the way language and compiler developers hope to address the challenges posed by petaflops architectures. Given the range of issues such systems are raising, which issues should the programming model attempt to address, and which should be passed on to the end user? Hot-button issues include: how to handle fault tolerance, very deep memory hierarchies, interoperability, coping with tens or hundreds of thousands of threads, the semantics of intelligent memory, and so on. Questions the panelists should address include:\n\nWhat ideas look promising to solve some these issues?\nWhat do future architectures need to incorporate to make it easier for programmers and compilers? \nIs it better to take an evolutionary or revolutionary approach?\nWhat about engineering/adoption issues? \nWill users adopt new languages?\nAre problem-solving environments be the way to go? \nWhat hooks does the language community really need in future architectures? \nIs compiler technology up to the task? ','',10689);
INSERT INTO subevents VALUES (1115,'2003-11-18','13:30:00','15:00:00','','','Roscoe Giles (Boston University)','It is important that high performance networking and computing (HPNC) serve the interests of broader communities, especially underserved communities like minority communities in the US. This BOF brings together participants in SC from minority serving institutions, groups interested in the \'digital divide\' at the level of the Grid, as well as other interested parties to discuss how to increase representation and participation of underserved peoples and communities in HPNC. The primary contact for this activity is Stephenie McLean, NCSA and Roscoe Giles, Boston University.','',10693);
INSERT INTO subevents VALUES (1116,'2003-11-18','14:00:00','14:30:00','','Traffic-based Load Balance for Scalable Network Emulation','Xin Liu (UCSD), Andrew A. Chien (UCSD)','Load balance is critical to achieving scalability for large network emulation studies, which are of compelling interest for emerging Grid, Peer to Peer, and other distributed applications and middleware. Achieving load balance in emulation is difficult because of irregular network structure and unpredictable network traffic. We formulate load balance as a graph partitioning problem and apply classical graph partitioning algorithms to it. Using a large-scale network emulation system called MaSSF, we explore three approaches for partitioning, based on purely static topology information, combining topology and application placement information, and combining topology and application profile data. These studies show that exploiting topology and application placement information can achieve reasonable load balance, but a profile-based approach further improves load balance for even large scale network emulation. In our experiments, PROFILE improves load balance by 50% to 66% and emulation time is reduced up to 50% compared to purely static topology-based approaches.','',10690);
INSERT INTO subevents VALUES (1117,'2003-11-18','14:00:00','14:30:00','','Remote Visualization by Browsing Image Based Databases with Logistical Networking','Jin Ding (University of Tennessee), Jian Huang (University of Tennessee), Micah Beck (University of Tennessee), Shaotao Liu (University of Tennessee), Terry Moore (University of Tennessee), Stephen Soltesz (University of Tennessee)','Although Image Based Rendering (IBR) techniques using plenoptic functions have some important advantages over other approaches to the visualization of large datasets, they depend on the interactive use of huge IBR databases, which creates corresponding problems with network latency and server load. Consequently, IBR techniques, such as Light Fields, have been largely ignored for remote visualization. In this paper we describe the application of Logistical Networking, a new approach to deploying storage as a shared communication resource, to create a remote visualization system based on Light Fields. Our system extends existing work by employing a modified method of parameterization and data organization that supports more efficient prefetching, caching and loss-less compression. Using this approach, we have been able to interactively browse multi-gigabyte, high-resolution Light Field databases across the Internet with latencies observed by the user that are comparable to local area network access.','',10691);
INSERT INTO subevents VALUES (1118,'2003-11-18','14:00:00','14:30:00','','Automatic Type-Driven Library Generation for Telescoping Languages','Arun Chauhan (Rice University), Cheryl McCosh (Rice University), Ken Kennedy (Rice University), Richard Hanson (Rice University)','Telescoping languages is a strategy to automatically generate highly-optimized domain-specific libraries. The key idea is to create specialized variants of library procedures through extensive offline processing. This paper describes a telescoping system, called ARGen, which generates high-performance Fortran or C libraries from prototype Matlab code for the linear algebra library, ARPACK. ARGen uses variable types to guide procedure specializations on possible calling contexts. \n\nARGen needs to infer Matlab types in order to speculate on the possible variants of library procedures, as well as to generate code. This paper shows that our type-inference system is powerful enough to generate all the variants needed for ARPACK automatically from the Matlab development code. The ideas demonstrated here provide a basis for building a more general telescoping system for Matlab.','',10692);
INSERT INTO subevents VALUES (1119,'2003-11-18','14:30:00','15:00:00','','A Self-Organizing Flock of Condors','Ali Raza Butt (Purdue University), Rongmei Zhang (Purdue University), Y. Charlie Hu (Purdue University)','Condor provides high throughput computing by leveraging idle-cycles on off-the-shelf desktop machines. It also supports flocking, a mechanism for sharing resources among Condor pools. Since Condor pools distributed over a wide area can have dynamically changing availability and sharing preferences, the current flocking mechanism based on static configurations can limit the potential of sharing resources across Condor pools. This paper presents a technique for resource discovery in distributed Condor pools using peer-to-peer mechanisms that are self-organizing, fault-tolerant, scalable, and locality-aware. Locality-awareness guarantees that applications are not shipped across long distances when nearby resources are available. Measurements using a synthetic job trace show that self-organized flocking reduces the maximum job wait time in queue for a heavily loaded pool by a factor of 10 compared to without flocking. Simulations of 1000 Condor pools are also presented and the results confirm that our technique discovers and utilizes physically nearby resources.','',10690);
INSERT INTO subevents VALUES (1120,'2003-11-18','14:30:00','15:00:00','','Compiler Support for Exploiting Coarse-Grained Pipelined Parallelism','Wei Du (Ohio State University), Renato Ferreira (Brasil), Gagan Agrawal (Ohio State University )','The emergence of grid and a new class of {em data-driven} applications is making a new form of parallelism desirable, which we refer to as {em coarse-grained pip elined} parallelism. Here, the computations associated with an application are carried out in several stages, which are executed on a pipeline of computing units. This paper reports on a compilation system developed to exploit this for m of parallelism. Our compiler is responsible for selecting a set of candidate {em filter boundaries}, determining the volume of communication required if a p articular boundary is chosen, performing the decomposition, and generating code in which each filter unpacks data from a received buffer, iterates over its element s, and packs and forwards a buffer to the next stage. The paper reports resul ts from a detailed evaluation of our current compiler using four data-driven appl ications.','',10692);
INSERT INTO subevents VALUES (1121,'2003-11-18','14:30:00','15:00:00','','Visualizing Large-Scale Earthquake Simulations','Kwan-Liu Ma (University of California at Davis), Aleksander Stompel (University of California at Davis), Jacobo Bielak (Carnegie Mellon University ), Omar Ghattas (Carnegie Mellon University), Eui Joong Kim (Carnegie Mellon University)','This paper presents a new parallel rendering algorithm and its performance for the visualization of time-varying unstructured volume data generated from very large-scale earthquake simulations. The algorithm is used to visualize 3D seismic wave propagation generated from a 0.5 Hz simulation of the Northridge earthquake, which is the highest resolution volume visualization of an earthquake simulation performed to date. This scalable high-fidelity visualization solution we provide to the scientists will allow them to explore in the temporal, spatial, and visualization domain of their data at high resolution. This new high resolution explorability, likely not presently available to most computational science groups, will help lead to many new insights.','',10691);
INSERT INTO subevents VALUES (1122,'2003-11-18','15:30:00','16:00:00','','Applications of Algebraic Multigrid to Large-Scale Finite Element Analysis of Whole Bone Micro-Mechanics on the IBM SP','Mark F. Adams (Sandia National Laboratories), Harun H. Bayraktar (University of California, Berkeley), Tony M. Keaveny (University of California, Berkeley), Panayiotis Papadopoulos (University of California, Berkeley)','Accurate finite element analyses of whole bone require the solution of large sets of algebraic equations. Multigrid has proven to be an effective approach to the design of highly scalable linear solvers for solid mechanics problems. We present some of the first applications of scalable linear solvers, on massively parallel computers, to whole vertebral body structural analysis. We analyze the performance of our algebraic multigrid (AMG) methods on problems with over 537 million degrees of freedom on IBM SP (LLNL and SDSC) parallel computers. We demonstrate excellent parallel scalability, both in the algorithms and the implementations on IBM SPs, and analyze the nodal performance of the important AMG kernels on the IBM Power3 and Power4 architectures.','',10694);
INSERT INTO subevents VALUES (1123,'2003-11-18','15:30:00','16:00:00','','Nondeterministic Queries in a Relational Grid Information Service','Peter Dinda (Northwestern University, Computer Science), Dong Lu (Northwestern University, Computer Science)','A Grid Information Service (GIS) stores information about the resources of a distributed computing environment and answers questions about it. We are developing RGIS, a GIS system based on the relational data model. RGIS users can write SQL queries that search for complex compositions of resources that meet collective requirements. Executing these queries can be very expensive, however. In response, we introduce the nondeterministic query, an extension to the SELECT statement, which allows the user (and RGIS) to trade off between the query\'s running time and the number of results. The results are a random sample of the deterministic results, which we argue is sufficient and appropriate. Herein we describe RGIS, the nondeterministic query extension, and its implementation. Our evaluation shows that a meaningful tradeoff between query time and results returned is achievable, and that the tradeoff can be used to keep query time largely independent of query complexity.','',10695);
INSERT INTO subevents VALUES (1124,'2003-11-18','15:30:00','16:00:00','','Performance Comparison of MPI Implementations over InfiniBand, Myrinet and Quadrics','Jiuxing Liu (The Ohio State University), Balasubramanian Chandrasekaran (The Ohio State University), Jiesheng Wu (The Ohio State University), Weihang Jiang (The Ohio State University), Sushmitha Kini (The Ohio State University), Weikuan Yu (The Ohio State University), Darius Buntinas (The Ohio State University), Pete Wyckoff (Ohio Supercomputer Center), D. K. Panda (The Ohio State University)','In this paper, we present a comprehensive performance comparison of MPI implementations over InfiniBand, Myrinet and Quadrics. Our performance evaluation consists of two major parts. The first part consists of a set of MPI level micro-benchmarks that characterize different aspects of MPI implementations. The second part of the performance evaluation consists of application level benchmarks. We have used the NAS Parallel Benchmarks and the sweep3D benchmark. We not only present the overall performance results, but also relate application communication characteristics to the information we acquired from the micro-benchmarks. Our results show that the three MPI implementations all have their advantages and disadvantages. For our 8-node cluster, InfiniBand can offer significant performance improvements for a number of applications compared with Myrinet and Quadrics when using the PCI-X bus. Even with just the PCI bus, InfiniBand can still perform better if the applications are bandwidth-bound.','',10696);
INSERT INTO subevents VALUES (1125,'2003-11-18','15:30:00','17:00:00','','Petaflops Applications: Pity the Programmer Trying to Do Actual Applications','Moderator: Alan Laub (UC - Davis), Panelists: Mike Merrill (National Security Agency), Chris Johnson (University of Utah), 2 others to be named','Petaflop architectures now on the drawing boards will enable a sequence of scientific breakthroughs and herald a new era in computational science. At the same time, many petaflops architectures are likely to challenge systems designers, language developers, and compiler writers in totally new ways. Pity the poor applications programmer at the end of this chain, who will have to live with the mistakes of architects, language developers and compiler writers alike. A certain amount of \"pain\" in use of petaflops architectures is surely unavoidable – machines with hundreds of thousands of processors and awkward memory models will not be \"user friendly.\" Moreover, some experts believe that the intrinsic unreliability of hardware at the scale envisioned will force adoption of complex checkpoint and recovery strategies. We firmly believe, however, that much of this pain can be ameliorated by clever architects and programming model designers, assuming they evince a depth of understanding and subtlety of approach not universally evident in the past.\n\nIn this third panel a set of current users of high-end architectures will respond to the material presented during the other panels, and address the question of how well expected architectures and programming models will, in fact, serve the needs of the applications communities. This panel will also discuss the sociological issue of what level of maturity and usability a language needs in order to be adopted by the HPC community. Speakers in this panel will characterize their applications from the point of view of algorithms, architecture-factors (e.g. cross section bandwidth, cache issues) and programming model/language issues. They should identify the biggest challenges they foresee in exploiting promised petascale systems and their views on the most profitable directions to take in architecture, programming model, systems and compiler research. Questions the panelists should address include:\n\nWhat are the biggest challenges you foresee in using petaflops architectures?\nWhat do programming models need to provide you to allow you to exploit petaflops architectures productively?\nWhat constructs or abstractions would make it easier for you to map your application on petaflops architectures?\nAt what point in the maturity of a new programming language, would you be willing to try a new promising model? ','',10689);
INSERT INTO subevents VALUES (1126,'2003-11-18','15:30:00','17:00:00','','','Brian Corrie (New Media Innovation Centre - Immersive Media Lab, Canada), Andrew Patrick (IIT/NRC - Mutual Media Lab, Canada)','Today’s digital media, interaction devices, and networking technologies have the opportunity to drastically alter the way people communicate and collaborate. Ubiquitous displays with advanced capabilities, combined with rich interaction methods, allow for extremely complex collaborative environments. In addition, modern networking allows us to have ad-hoc local communication between devices, wireless network connectivity, and very high-bandwidth, long-distance connectivity between remote sites. This combination of interaction and networking technologies allows us to connect interaction environments together in extremely complex ways. \n\nThe goal of an Advanced Collaborative Environment, however, is to bring together the right people and the right data at the right time in order to perform a complex task, solve problems, or simply discuss what is pressing at the time. It is not enough to be able to provide tools to users and hope that they use them effectively. It is through focusing on the individual and social needs of the users in the context of the collaboration task, that we will be able to adapt to the requirements of the situation and deliver the best Quality of Experience (QoE) to each user in the environment, regardless of task, technology, or individual. \n\nThis Birds of a Feather session will bring together individuals interested in human factors to explore the human and social needs of advanced collaborative environments. A round table discussion will be held with all participants having a brief opportunity to discuss their interests in this area. The goal of this BOF is to form the beginnings of a human factors group within the Access Grid collaboration community.','',10697);
INSERT INTO subevents VALUES (1127,'2003-11-18','16:00:00','16:30:00','','Parallel Multilevel Sparse Approximate Inverse Preconditioners in Large Sparse Matrix Computations','Kai Wang (University of Kentucky), Jun Zhang (University of Kentucky), Chi Shen (University of Kentucky)','We investigate the use of the multistep successive preconditioning strategies (MSP) to construct a class of parallel multilevel sparse approximate inverse (SAI) preconditioners. We do not use independent set ordering, but a diagonal dominance based matrix permutation to build a multilevel structure. The purpose of introducing multilevel structure into SAI is to enhance the robustness of SAI for solving difficult problems. Forward and backward preconditioning iteration and two Schur complement preconditioning strategies are proposed to improve the performance and to reduce the storage cost of the multilevel preconditioners. One version of the parallel multilevel SAI preconditioner based on the MSP strategy is implemented. Numerical experiments for solving a few sparse matrices on a distributed memory parallel computer are reported.','',10694);
INSERT INTO subevents VALUES (1128,'2003-11-18','16:00:00','16:30:00','','MPICH-V2: a Fault Tolerant MPI for Volatile Nodes based on Pessimistic Sender Based Message Logging','Aurelien Bouteiller (CNRS-LRI), Franck Cappello (INRIA-LRI), Thomas Herault (CNRS-LRI), Geraud Krawezik (CNRS-LRI), Pierre Lemarinier (CNRS-LRI), Frederic Magniette (CNRS-LRI)','Execution of MPI applications on clusters and Grid deployments suffering from node and network failures motivates the use of fault tolerant MPI implementations. \n\nWe present MPICH-V2 (the second protocol of MPICH-V project), an automatic fault tolerant MPI implementation using an innovative protocol that removes the most limiting factor of the pessimistic message logging approach: reliable logging of in transit messages. MPICH-V2 relies on uncoordinated checkpointing, sender based message logging and remote reliable logging of message logical clocks. \n\nThis paper presents the architecture of MPICH-V2, its theoretical foundation and the performance of the implementation. We compare MPICH-V2 to MPICH-V1 and MPICH-P4 evaluating a) its point-to-point performance, b) the performance for the NAS benchmarks, c) the application performance when many faults occur during the execution. Experimental results demonstrate that MPICH-V2 provides performance close to MPICH-P4 for applications using large messages while reducing dramatically the number of reliable nodes compared to MPICH-V1.','',10696);
INSERT INTO subevents VALUES (1129,'2003-11-18','16:00:00','16:30:00','','Optimizing Reduction Computations In a Distributed Environment','Tahsin Kurc (Ohio State University), Feng Lee (Ohio State University), Gagan Agrawal (Ohio State University), Umit Catalyurek (Ohio State University), Renato Ferreira (Ohio State University), Joel Saltz (Ohio State University)','We investigate runtime strategies for data-intensive applications that involve generalized reductions on large, distributed datasets. Our set of strategies includes replicated filter state, partitioned filter state, and hybrid options between these two extremes. We evaluate these strategies using emulators of three real applications, different query and output sizes, and a number of configurations. We consider execution in a homogenous cluster and in a distributed environment where only a subset of nodes host the data. Our results show replicating the filter state scales well and outperforms other schemes, if sufficient memory is available and sufficient computation is involved to offset the cost of global merge step. In other cases, hybrid is usually the best. Moreover, in almost all cases, the performance of the hybrid strategy is quite close to the best strategy. Thus, we believe that hybrid is an attractive approach when the relative performance of different schemes cannot be predicted.','',10695);
INSERT INTO subevents VALUES (1130,'2003-11-18','16:30:00','17:00:00','','Job Scheduler Architecture and Performance in Computational Grid Environments','Hongzhang Shan (Lawrence Berkeley National Laboratory), Leonid Oliker (Lawrence Berkeley National Laboratory), Rupak Biswas (NASA Ames Research Center)','Computational grids hold great promise in utilizing geographically separated heterogeneous resources to solve large-scale complex scientific problems. However, a number of major technical hurdles, including distributed resource management and effective job scheduling, stand in the way of realizing these gains. In this paper, we propose a novel grid superscheduler architecture and three distributed job migration algorithms. We also model the critical interaction between the superscheduler and autonomous local schedulers. Extensive performance comparisons with ideal, central, and local schemes using real workloads from leading computational centers are conducted in a simulation environment. Additionally, synthetic workloads are used to perform a detailed sensitivity analysis of our superscheduler. Several key metrics demonstrate that substantial performance gains can be achieved via smart superscheduling in distributed computational grids.','',10695);
INSERT INTO subevents VALUES (1131,'2003-11-18','16:30:00','17:00:00','','Hierarchical Dynamics, Interarrival Times, and Performance','Stephen D Kleban (Sandia National Laboratories), Scott H Clearwater (Sandia National Laboratories)','We report on a model of the distribution of job submission interarrival times in supercomputers. Interarrival times are modeled as a consequence of a complicated set of decisions between users, the queuing algorithm, and other policies. This cascading hierarchy of decision-making processes leads to a particular kind of heavy-tailed distribution. Specifically, hierarchically constrained systems suggest that fatter tails are due to more levels coming into play in the overall decision-making process. The key contribution of this paper is that heavier tails resulting from more complex decision-making processes, that is more hierarchical levels, will lead to overall worse performance, even when the average interarrival time is the same. Finally, we offer some suggestions for how to overcome these issues and the tradeoffs involved.','',10696);
INSERT INTO subevents VALUES (1132,'2003-11-18','16:30:00','17:00:00','','Parallel Particle-In-Cell Simulation of Colliding Beams in High Energy Accelerators','Ji Qiang (Lawrence Berkeley National Laboratory), Miguel A. Furman (Lawrence Berkeley National), Robert D. Ryne (Lawrence Berkeley National Laboratory)','In this paper we present a self-consistent simulation model of colliding beams in high energy accelerators. The model, which is based on a particle-in-cell method, uses a new developed shifted-Green function algorithm for the efficient calculation of the beam-beam interaction. In the parallel implementation we studied various strategies to deal with the particular nature of the colliding beam system -- a system in which there can be significant particle movement between beam-beam collisions. We chose a particle-field decomposition approach instead of the conventional domain decomposition or particle decomposition approach. The particle-field approach leads to good load balance, reduced communication cost, and shows the best scalability on an IBM SP3 among the three parallel implementations. A performance test of the beam-beam model on a Cray T3E, IBM SP3, and a PC cluster is presented. As an application, we studied the effect of long-range collisions on antiproton lifetime in the Fermilab Tevatron.','',10694);
INSERT INTO subevents VALUES (1133,'2003-11-18','17:00:00','19:00:00','Lobby 2','','','Posters at SC2003 showcase the latest innovations and be prominently displayed throughout the conference. This evening Posters Reception, sponsored by AMD, features the posters and allows for a time for conference attendees to discuss the displays with the poster presenters. The poster session is a means of presenting timely research in a more casual setting, with the chance for greater in- depth, one-on-one dialogue that can proceed at its own pace.\n\nTopics of interest include:\n- scalable systems\n- performance evaluation and modeling\n- high performance networking\n- distributed computing systems\n- high performance I/O\n- programming environments and tools\n- novel computer architectures\n- visualization\n- distributed collaborations\n- parallel and distributed algorithms\n- architecture simulation\n- workload characterization\n- user experiences\n- optimization studies\n- parallel databases\n- data-and computation-intensive applications\n- large-scale databases and digital libraries\n- fault-tolerant architectures and system software','',10698);
INSERT INTO subevents VALUES (1134,'2003-11-19','08:30:00','09:15:00','Ballroom','State of the Field: Is There Anything New on the Networking Horizon?','Judy Estrin (Packet Design LLC)','Networking - first in the form of local-area networks and later the Internet and the World Wide Web - was the major driver behind technology industry growth during the past couple of decades. That the growth of networking has slowed dramatically is due not just to a weak economy, but also to the fact that we have over-capacity due to aggressive build out during the bubble and the overall IT infrastructure market has matured.But this does not mean that innovation in networking has ceased. Granted, the next several years will see slower growth driven by evolutionary - not revolutionary - innovation. But there is a great deal to be excited about in new markets such as mobile networking and voice over IP. It is time to innovate not just in speeds and feeds, but in ways to improve operational efficiency to better utilize and run the critical infrastructures that we have in place. Networking must also evolve to support new server trends such as clustering or SANs. Over the longer term we will see growth for networking as we move toward ubiquitous consumer connectivity, with always-on high bandwidth that will permit new forms of entertainment. Even further out, the connectivity of embedded devices into ad hoc networks used in a variety of applications.','',10699);
INSERT INTO subevents VALUES (1135,'2003-11-19','09:15:00','10:30:00','Ballroom','State of the Field: Physical Infrastructure Assessment and Protection to Mitigate Natural and Man-made Disasters\nor\nThe Infrastructure Enabled Infrastructure','Frieder Seible (University of California San Diego)','The dependence on the physical infrastructure of roads, bridges, dams, water supply pipelines/aqueducts, ports and harbors, etc., is an integral part of and so ingrained in our culture that we are mostly unaware of its existence as long as functionality is provided. The aging of our structures, natural hazards such as earthquakes, floods, and fires, as well as man-made hazards such as terrorist attacks and accidents, threaten the functionality of our physical infrastructure and extraordinary expenditures are required to just maintain the status quo. Unless significant advances are made in monitoring, rehabilitating, and managing the existing physical infrastructure, our quality of life cannot be preserved. While some of these advances will come in the form of new materials and new structural concepts and systems, the major breakthrough will come from a ubiquitous cyberinfrastructure, consisting of distributed multi-use sensor nets, wireless and high speed networks, fully searchable data bases, and data mining tools to provide the operator and the end user with on-line information and knowledge, resulting in improved infrastructure systems management and use. The critical dependence of the Physical Infrastructure on the Cyberinfrastructure will be developed on numerous examples and changes on how we can better manage and use the physical infrastructure with cyberinfrastructure support will be speculated on from an engineering perspective.','',10700);
INSERT INTO subevents VALUES (1136,'2003-11-19','10:30:00','11:00:00','','A Configurable Network Protocol for Cluster Based Communications using Modular Hardware Primitives on an Intelligent NIC','Ranjesh G. Jaganathan (Clemson University), Keith D. Underwood (Sandia National Laboratories), Ron R. Sass (Clemson University)','The high overhead of generic protocols like TCP/IP provides strong motivation for the development of a better protocol architecture for cluster-based parallel computers. Reconfigurable computing has a unique opportunity to contribute hardware level protocol acceleration while retaining the flexibility to adapt to changing needs. This paper focuses on work to create a set of parameterizable components that can be put together as needed to obtain a customized protocol for each application. To study the feasibility of such an architecture, hardware components were built that can be stitched together as needed to provide the required functionality. Feasibility is demonstrated using four different protocol configurations, namely: (1) unreliable packet transfer; (2) reliable, unordered message transfer without duplicate elimination; (3) reliable, unordered message transfer with duplicate elimination; and (4) reliable, ordered message transfer with duplicate elimination. The different configurations illustrate trade-offs between chip space and functionality while reducing processor overhead.','',10702);
INSERT INTO subevents VALUES (1137,'2003-11-19','10:30:00','11:00:00','','SCALLOP: A Highly Scalable Parallel Poisson Solver in Three Dimensions','Gregory T. Balls (University of California, San Diego/SDSC), Scott B. Baden (University of California, San Diego), Phillip Colella (Lawrence Berkeley National Laboratory)','SCALLOP is a highly scalable solver and library for elliptic partial differential equations on regular block-structured domains. SCALLOP avoids high communication overheads algorithmically by taking advantage of the locality properties inherent to solutions to elliptic PDEs. Communication costs are small, on the order of a few percent of the total running time on up to 1024 processors of NPACI\'s and NERSC\'s IBM Power-3 SP sytems. SCALLOP trades off numerical overheads against communication. These numerical overheads are independent of the number of processors for a wide range of problem sizes. SCALLOP is implicitly designed for infinite domain (free space) boundary conditions, but the algorithm can be reformulated to accommodate other boundary conditions. The SCALLOP library is built on top of the KeLP programming system and runs on a variety of platforms.','',10703);
INSERT INTO subevents VALUES (1138,'2003-11-19','10:30:00','11:15:00','','Bioinformatics Scientific Workspace of the Future','Natalia Maltsev (Argonne National Laboratory)','The Alliance Scientific Workspaces of the Future expedition is designed to create partnerships between technology developers and end users to deploy and further develop next generation high-end collaborative and network based scientific visualization tools and systems designed to meet the specific needs of distributed applications communities. The SWOF expedition uses the Access Grid 2.0 virtual venue as an organizing resource for the various technologies and applications data. Technologies include large scale tiled displays, passive 3D displays, distributed rendering code, personal and large scale Access Grid nodes, speech to text services, portals technology and the Access Grid 2.0 software toolkit. The AG 2.0 venue provides a services-based infrastructure for deploying, locating and using third party supplied web services.\n\nThe bioinformatics SWOF expedition uses this technology to create a virtual collaboratory. A bioinformatics venue has been created and is used to store data, applications, workflows, documents, results, data base pointers and node specific services such as tiled displays and 3D displays. Collaborators from Argonne National Laboratory, Oak Ridge National Laboratory, the Medical College of Wisconsin and NCSA have contributed to and make use of the collaboratory. The bioinformatics group at Argonne National Laboratory are creating an Access Grid enabled bioinformatics portal to front end a high throughput genomic server.\n\nWe propose an SCGlobal showcase demonstration of the bioinformatics SWOF technology. The demonstration will take the form of bioinformatics experts and biologists using their tools in a collaborative way to research a biology problem. We anticipate demonstrating the use of portal technology to run sequences through the high throughput server, the use of high end displays for visualization of results, the use of the AG 2.0 collaborative tools for analysis and documentation and last, the demonstration of the effectiveness of using these technologies to facilitate remote collaboration.','',10704);
INSERT INTO subevents VALUES (1139,'2003-11-19','10:30:00','12:00:00','','','','The HPC Challenge honors participants for innovative uses of high performance computing resources. It provides opportunities for contestants to showcase applications and platforms. The categories are:\n\nMost Innovative Data-Intensive Application: With the increasing ability to create, store, and re-access larger and larger datasets, one thing remains constant: the importance of mining such data to glean useful pieces of knowledge. The award will be presented to the entry that uses the most novel and/or inventive approaches in mining data, visualizing data, or a combination of these tasks.\n\nMost Geographically Distributed Application: As the Grid continues to decrease the virtual distance between computers around the world, the ability to solve challenging computational problems with combinations of diverse system architectures is continuing to strengthen. The award will be presented to the team with the most geographically distributed application to solve a significantly complex problem.\n\nThis session will allow teams to present their projects to the SC03 audience and the HPC Challenge judges. The actual award will be made at the Awards plenary session on Thursday.','',10701);
INSERT INTO subevents VALUES (1140,'2003-11-19','10:30:00','17:00:00','','','','This workshop will help paint a picture of the current and future application requirements that significantly push the envelope of storage systems and the concepts, architectures, and technologies being developed to meet these requirements. This includes describing current work on Object-based Storage Devices (OSD) and associated file systems and alternative approaches. This leads into a discussion of adding more intelligence into the storage devices making them aware of the data objects they store. Each presentation is expected to address one of the following questions:\n\nHow to achieve beyond 1PB/sec bandwidth from a single source (i.e. a file)\nHow to achieve a trillion storage accesses per second\nHow to manage beyond a trillion objects (i.e. files)\nHow to find any one object withing a trillion objects\nHow to find things insude any number of objects within a trillion objects\nSecurity of a trillion objects\nWhere existing concepts, architectures, and technologies break down\nHow to get storage devices to work \"for\" you and not \"against\" you\n\nSession 1: 10:30am-noon\nChair: Steve Louis (LLNL)\nCurrent and Future HPC Application Requirements for Storage Systems\n\n\nSession 2: 1:30pm-3:00pm\nChair: To Be Announced\nAdvanced Storage Concepts, Architectures, and Technologies\n\n\nSession 3: 3:30pm-5:00pm\nHow do we get there from here? Call to action, Roadmaps, Standards, ...etc.\nChair: Thomas Ruwart (University of Minnesota)','',10705);
INSERT INTO subevents VALUES (1141,'2003-11-19','11:00:00','11:30:00','','Parallel Iterative Solvers of GeoFEM with Selective Blocking Preconditioning for Nonlinear Contact Problems on the Earth Simulator','KENGO NAKAJIMA (RIST)','An efficient parallel iterative method with selective blocking preconditioning has been developed for symmetric multiprocessor (SMP) cluster architectures with vector processors such as the Earth Simulator. This method is based on a three-level hybrid parallel programming model, which includes message passing for inter-SMP node communication, loop directives by OpenMP for intra-SMP node parallelization and vectorization for each processing element (PE). This method provides robust and smooth convergence and excellent vector and parallel performance in 3D geophysical simulations with contact conditions performed on the Earth Simulator. The selective blocking preconditioning is much more efficient than ILU(1) and ILU(2). Performance for the complicated Southwest Japan model with more than 23 M DOF on 10 SMP nodes (80 PEs) of the Earth Simulator was 161.7 GFLOPS, corresponding to 25.3% of the peak performance for hybrid programming model, and 190.4 GFLOPS (29.8% of the peak performance) for flat MPI, respectively.','',10703);
INSERT INTO subevents VALUES (1142,'2003-11-19','11:00:00','11:30:00','','Optimizing 10-Gigabit Ethernet for Network of Workstations, Clusters, and Grids','Wu-chun Feng (Los Alamos National Laboratory), Justin (Gus) Hurwitz (Los Alamos National Laboratory), Harvey B. Newman (California Institute of Technology), Sylvain Ravot (California Institute of Technology), Roger Les Cottrell (Stanford Linear Accelerator Center), Olivier Martin (CERN), Fabrizio Coccetti (Stanford Linear Accelerator Center), Cheng Jin (California Institute of Technology), David Weig (California Institute of Technology), Steven Low (California Institute of Technology)','This paper presents a case study of the 10-Gigabit Ethernet (10GigE) adapter from Intel. Specifically, with appropriate optimizations to the configurations of the 10GigE adapter and TCP, we demonstrate that the 10GigE adapter can perform well in local-area, storage-area, system-area, and wide-area networks.\n\nIn local-area, storage-area, and system-area networks, we achieved over 4-Gb/s end-to-end throughput and 20-us end-to-end latency between applications running on less capable, lower-end PCs. In the wide-area network, we broke the recently-set Internet2 Land Speed Record by 2.5 times by sustaining an end-to-end TCP/IP throughput of 2.38 Gb/s between Sunnyvale, California and Geneva, Switzerland (i.e., 10,037 kilometers). Thus, the above results indicate that 10GigE may be a cost-effective solution across a multiude of network environments.','',10702);
INSERT INTO subevents VALUES (1143,'2003-11-19','11:15:00','12:00:00','','Grid and Web services for problem solving environments - Towards collaborative visualisation','Lakshmi Sastry (CCLRC Rutherford Appleton Laboratory, UK), John Brooke (University of Manchester)','Much research and development expertise had gone into developing generic as well as domain specific data analysis, computational steering and visualisation software tools. The use of such tools has become an integral part of modern scientific research. Consequently much resources have been invested in the training, and learning of such tools by individual and teams of scientists who have also developed highly customised, robust data analysis applications using these tools that form the core of their daily scientific activity. At the same time, advances in experimental techniques, instrumentation and high performance computing produce massive datasets from the simulation of physical processes. The scalability requirements to handle such massive datasets for data analysis tasks are beyond the capabilities of their existing data analysis and visualisation systems and algorithms. This is especially true of a large multidisciplinary research support organisations such as the Central Laboratory of the Research Councils (CCLRC) which operates several large scale scientific facilities for the UK research and industrial communities including accelerators, lasers, telescopes, satellites and supercomputers alongside its active participation in scientific research in astronomy, biology, chemistry, environmental science and physics. The users and scientific partners of these large scale facilities are situated across the world.
The data analysis challenge is to cater to such a wide range of complex multidisciplinary high performance applications that use myriad methodologies and software packages that ambitiously aim to process, visualise and produce gigabytes of data at every finely discretised spatio-temporal and other parametric space of the problem domain. The motivation behind our Web and Grid services based applications and visualisation services is to provide a generic toolkit that can be used to harness the power of the Grid computing and the dynamic flexibility of the Web/Grid services based architecture and make it available to familiar data analysis and problem solving environments. This Grid Applications Portals Toolkit (GAPtk – http://ws1.esc.rl.ac.uk/web/projects/gaptk ) toolkit will provide utilities, services, protocols and configurable high-level application programming interfaces. These can be accessed via customised self-contained application portals built using domain specific tools such as MATLAB, CDAT and Live Access Server. There is also the increasing requirement to explore the ways to exploit emerging technologies such as Access Grid (http://www.accessgrid.org) for collaborative working for increasing productivity of geographically distributed project partnerships.','',10704);
INSERT INTO subevents VALUES (1144,'2003-11-19','11:30:00','12:00:00','','Multi-Constraint Mesh Partitioning for Contact/Impact Computations','George Karypis (Department of Computer Science & Engineering, University of Minnesota)','We present a novel approach for decomposing contact/impact computations. Effective decomposition of these computations poses a number of challenges as it needs to both balance the computations and minimize the amount of communication that is performed during the finite element and the contact search phase. Our approach achieves the first goal by partitioning the underlying mesh such that it simultaneously balances both the work that is performed during the finite element phase and that performed during contact search phase, while producing subdomains whose boundaries consist of piecewise axes-parallel lines or planes. The second goal is achieved by using a decision tree to decompose the space into rectangular or box-shaped regions that contain contact points from a single partition. Our experimental evaluation on a sequence of 100 meshes, shows that this new approach can reduce the overall communication overhead over existing algorithms.','',10703);
INSERT INTO subevents VALUES (1145,'2003-11-19','11:30:00','12:00:00','','Scalable Hardware-Based Multicast Trees','Salvador Coll (Technical University of Valencia), Jose Duato (Technical University of Valencia), Fabrizio Petrini (Los Alamos National Laboratory), Francisco J. Mora (Technical University of Valencia)','This paper presents an algorithm for implementing optimal hardware-based multicast trees, on networks that provide hardware support for collective communication. Although the underlying methodology is general enough to be applied in other, present and future, technologies, the Quadrics network has been chosen as state-of-the-art interconnect where applying hardware-based multicast trees. The proposed mechanism is intended to improve the performance of collective communication patterns, in those cases where the hardware support cannot be directly used, for instance, due to some faulty nodes. This scheme provides significant reduction on multicast latencies compared to the system primitives, which use multicast trees based on unicast communication. A backtracking algorithm to find the optimal solution to the problem is presented. In addition, a greedy algorithm is presented and shown to provide near-optimal solutions. Finally, our experimental results show the good performance and scalability of the proposed multicast tree in comparison to traditional unicast-based multicast trees.','',10702);
INSERT INTO subevents VALUES (1146,'2003-11-19','13:30:00','14:00:00','','The Case of the Missing Supercomputer Performance: Achieving Optimal Performance on the 8,192 Processors of ASCI Q','Fabrizio Petrini (Los Alamos National Laboratory), Darren J. Kerbyson (Los Alamos National Laboratory), Scott Pakin (Los Alamos National Laboratory)','In this paper we describe how we improved the effective performance of ASCI Q, the world\'s second-fastest supercomputer, to meet our expectations. Using an arsenal of performance-analysis techniques including analytical models, custom microbenchmarks, full applications, and simulators, we succeeded in observing a serious -- but previously undetectable -- performance problem. We identified the source of the problem, eliminated the problem, and \"closed the loop\" by demonstrating improved application performance. We present our methodology and provide insight into performance analysis that is immediately applicable to other large-scale cluster-based supercomputers.','',10706);
INSERT INTO subevents VALUES (1147,'2003-11-19','13:30:00','14:00:00','','High Resolution Forward and Inverse Earthquake Modeling on Terascale Computers','Volkan Akcelik (Carnegie Mellon University), Jacobo Bielak (Carnegie Mellon University), George Biros (Courant Institute, New York University), Ioannis Epanomeritakis (Carnegie Mellon University), Antonio Fernandez (Carnegie Mellon University), Omar Ghattas (Carnegie Mellon University), Eui Joong Kim (Carnegie Mellon University), David O\'Hallaron (Carnegie Mellon University), Tiankai Tu (Carnegie Mellon University)','For earthquake simulations to play an important role in the reduction of seismic risk, they must be capable of high resolution and high fidelity. We have developed algorithms and tools for earthquake simulation based on multiresolution hexahedral meshes. We have used this capability to carry out 1 Hz simulations of the 1994 Northridge earthquake in the LA Basin using 100 million grid points. Our code sustains 0.9 teraflop/s for 12 hours on 2048 AlphaServer processors at 87% parallel efficiency. Because of uncertainties in characterizing earthquake source and basin material properties, a critical remaining challenge is to invert for source and material parameter fields for complex 3D basins from records of past earthquakes. Towards this end, we present results for material and source inversion of high-resolution models of basins undergoing antiplane motion using parallel scalable inversion algorithms that overcome many of the difficulties particular to inverse heterogeneous wave propagation problems.','',10707);
INSERT INTO subevents VALUES (1148,'2003-11-19','13:30:00','14:00:00','','ParADE: An OpenMP Programming Environment for SMP Cluster Systems','Yang-Suk Kee (Dr.), Jin-Soo Kim (Prof.), Soonhoi Ha (Prof.)','The demands for programming environments to exploit clusters of symmetric multiprocessors (SMPs) are increasing. In this paper, we present a new programming environment, called ParADE, to enable easy, portable, and high-performance programming on SMP clusters. It is an OpenMP programming environment on top of a multi-threaded software distributed shared memory (SDSM) system with a variant of home-based lazy release consistency (HLRC). To boost performance, the ParADE runtime system provides explicit message-passing primitives to make it a hybrid-programming environment. Collective communication primitives are used for the synchronization and work-sharing directives associated with small variables, lessening the synchronization overhead and avoiding the implicit barriers of work-sharing directives. The OpenMP translator bridges the gap between the OpenMP abstraction and the hybrid programming interfaces of the runtime system. The experiments with several NAS benchmarks and real applications on a Linux-based cluster show promising results, overcoming the poor performance of the conventional SDSM-based approaches.','',10708);
INSERT INTO subevents VALUES (1149,'2003-11-19','13:30:00','15:00:00','','','Joyce F. Williams-Green (Winston-Salem State University), Ian Foster (Argonne National Laboratory), Daniel A. Reed (NCSA/Alliance), Ulrich Lang (High Performance Computing Center Stuttgart (HLRS) )','This panel gathers some of the top thinkers in grid technologies, all of whom have extensive experience with the Access Grid, to discuss their visions for this collaborative technology. Discussion will include (but not be limited to) questions about necessary technical enhancements to the core Access Grid Toolkit, applications which should be built based on this Toolkit, and new directions in which the AG community must expand to reach its potential.','',10709);
INSERT INTO subevents VALUES (1150,'2003-11-19','14:00:00','14:30:00','','Dyn-MPI: Supporting MPI on Non Dedicated Clusters','D. Brent Weatherly (University of Georgia), David K. Lowenthal (University of Georgia), Mario Nakazawa (University of Georgia), Franklin Lowenthal (California State University -- Hayward)','Distributing data is a fundamental problem in implementing efficient distributed-memory parallel programs. The problem becomes more difficult in environments where the participating nodes are not dedicated to a parallel application. We are investigating the data distribution problem in non dedicated environments in the context of explicit message-passing programs. \n\nTo address this problem, we have designed and implemented an extension to MPI called Dynamic MPI (Dyn-MPI). The key component of Dyn-MPI is its run-time system, which efficiently and automatically redistributes data on the fly when there are changes in the application or the underlying environment. Dyn-MPI supports efficient memory allocation, precise measurement of system load and computation time, and node removal. Performance results show that programs that use Dyn-MPI execute efficiently in non dedicated environments, including up to almost a three-fold improvement compared to programs that do not redistribute data and a 25% improvement over standard adaptive load balancing techniques.','',10708);
INSERT INTO subevents VALUES (1151,'2003-11-19','14:00:00','14:30:00','','Early Evaluation of the Cray X1','Thomas H. Dunigan, Jr. (Oak Ridge National Laboratory), Mark R. Fahey (Oak Ridge National Laboratory), James B. White III (Oak Ridge National Laboratory), Patrick H. Worley (Oak Ridge National Laboratory)','Oak Ridge National Laboratory installed a 32 processor Cray X1 in March, 2003, and will have a 256 processor system installed by October, 2003. In this paper, we describe our initial evaluation of the X1 architecture, focusing on microbenchmarks, kernels, and application codes that highlight the performance characteristics of the X1 architecture and indicate how to use the system most efficiently.','',10706);
INSERT INTO subevents VALUES (1152,'2003-11-19','14:00:00','14:30:00','','IPSAP : A High-performance Parallel Finite Element Code for Large-scale Structural Analysis Based on Domain-wise Multifrontal Technique','Seung Jo Kim (Department of Aerospace Engineering, Seoul National University, KOREA), Chang Sung Lee (Department of Aerospace Engineering, Seoul National Universtiy, KOREA), Jeong Ho Kim (High Performance Computing and Networking Supercomputing Center, Korea), Minsu Joh (High Performance Computing and Networking Supercomputing Center, Korea), Sangsan Lee (High Performance Computing and Networking Supercomputing Center, Korea)','Most of researches for large-scale parallel structural analysis have focused on iterative solution methods since direct solution methods generally have many difficulties and disadvantages for large-scale problems. However, due to the numerical robustness of direct methods that guarantees the solution to be obtained within estimated time, direct methods are much more desirable for general application of large-scale structural analysis, if the difficulties and disadvantages can be overcome. In this research, we propose the domain-wise multifrontal solver as an efficient direct solver that can overcome most of these difficulties and disadvantages. By using our own structural analysis code IPSAP which uses the proposed solver, we can solve the largest problem ever solved by direct solvers and can sustain 191 Gflop/s with 256 CPUs on our self-made cluster system, Pegasus. By implementing the block Lanczos algorithm using our solver, IPSAP can solve eigen problems with 7 millions of DOFs within one hour.','',10707);
INSERT INTO subevents VALUES (1153,'2003-11-19','14:30:00','15:00:00','','Evaluation of Cache-based Superscalar and Cacheless Vector Architectures for Scientific Computations','Leonid Oliker (Lawrence Berkeley National Laboratory), Andrew Canning (Lawrence Berkeley National Laboratory), Jonathan Carter (Lawrence Berkeley National Laboratory), John Shalf (Lawrence Berkeley National Laboratory), David Skinner (Lawrence Berkeley National Laboratory), Stephane Ethier (Princeton University), Rupak Biswas (NASA Ames Research Center), Jahed Djomehri (Computer Sciences Corporation), Rob Van der Wijngaart (Computer Sciences Corporation)','The growing gap between sustained and peak performance for scientific applications is a well-known problem in high end computing. The recent development of parallel vector systems offers the potential to bridge this gap for many computational science codes and deliver a substantial increase in computing capabilities. This paper examines the intranode performance of the NEC SX-6 vector processor and the cache-based IBM Power3/4 superscalar architectures across a number of scientific computing areas. First, we present the performance of a microbenchmark suite that examines low-level machine characteristics. Next, we study the behavior of the NAS Parallel Benchmarks. Finally, we evaluate the performance of several scientific computing codes. Results demonstrate that the SX-6 achieves high performance on a large fraction of our applications and often significantly outperforms the cache-based architectures. However, certain applications are not easily amenable to vectorization and would require extensive algorithm and implementation reengineering to utilize the SX-6 effectively.','',10706);
INSERT INTO subevents VALUES (1154,'2003-11-19','14:30:00','15:00:00','','An Evaluation of a Framework for the Dynamic Load Balancing of Highly Adaptive and Irregular Parallel Applications','Kevin J. Barker (College of William and Mary), Nikos P. Chrisochoides (College of William and Mary)','We present an evaluation of a flexible framework and runtime software system for the dynamic load balancing of asynchronous and highly adaptive and irregular applications. These applications, which include parallel unstructured and adaptive mesh refinement, serve as building blocks for a large class of scientific applications. Extensive study has lead to the development of solutions to the dynamic load balancing problem for loosely synchronous and computation intensive programs; however, these methods are not suitable for asynchronous and highly adaptive applications. We evaluate a new software framework which includes support for an Active Messages style communication mechanism, global name space, transparent object migration, and preemptive decision making. Our results from both a 3-dimensional parallel advancing front mesh generation program, as well as a synthetic micro-benchmark, indicate that this new framework out-performs two existing general-purpose, well-known, and widely used software systems for the dynamic load balancing of adaptive and irregular parallel applications.','',10708);
INSERT INTO subevents VALUES (1155,'2003-11-19','14:30:00','15:00:00','','A new parallel kernel-independent fast multipole method','Lexing Ying (New York University), George Biros (New York University), Denis Zorin (New York University), Harper Langston (New York University)','We present a new adaptive fast multipole algorithm and its parallel implementation. The algorithm is kernel-independent in the sense that the acceleration of the computation of the far field does not rely on any analytic expansions, but only uses kernel evaluations. The new method enables scalable simulations for many important problems in science and engineering. Examples include viscous flows, fracture mechanics and screened Coulombic interactions. Our MPI based parallel implementation logically separates the computation and communication phases to avoid synchronization in the upward and downward computation passes, and it enables us to fully exploit computation and communication overlapping. We measure isogranular and fixed-size scalability for a variety of kernels on the Pittsburgh Supercomputing Center\'s TCS-1 Alphaserver on up to 2048 processors. Our largest experiments reached 1.2 billion unknowns, for which we have achieved 1 Tflops/s peak performance and 0.7 Tflops/s sustained performance. Overall, our implementation achieves excellent parallel efficiency.','',10707);
INSERT INTO subevents VALUES (1156,'2003-11-19','15:30:00','16:00:00','','A Million-Fold Speed Improvement in Genomic Repeats Detection','John W. Romein (Vrije Universiteit, Amsterdam), Jaap Heringa (Vrije Universiteit, Amsterdam), Henri E. Bal (Vrije Universiteit, Amsterdam)','This paper presents a novel, parallel algorithm for generating top alignments. Top alignments are used for finding internal repeats in biological sequences like proteins and genes. Our algorithm replaces an older, sequential algorithm (Repro), which was prohibitively slow for sequence lengths higher than 2000. The new algorithm is an order of magnitude faster (O(n^3) rather than O(n^4)). \n\nThe paper presents a three-level parallel implementation of the algorithm: using SIMD multimedia extensions found on present-day processors (a novel technique that can be used to parallelize any application that performs many sequence alignments), using shared-memory parallelism, and using distributed-memory parallelism. It allows processing the longest known proteins (nearly 35000 amino acids). We show exceptionally high speed improvements: between 548 and 889 on a cluster of 64 dual-processor machines, compared to the new sequential algorithm. Especially for long sequences, extreme speed improvements over the old algorithm are obtained.','',10710);
INSERT INTO subevents VALUES (1157,'2003-11-19','15:30:00','16:00:00','','Performance evaluation and tuning of GRAPE-6 --- towards 40 \"real\" Tflops','Junichiro Makino (Department of Astronomy, School of Science, University of Tokyo), Eiichiro Kokubo (National Astronomical Observatory of Japan), Toshiyuki Fukushige (Department of General System Studies, College of Arts and Sciences, University of Tokyo), Hiroshi Daisaka (Department of Astronomy, School of Science, University of Tokyo)','In this paper, we describe the performance characteristics of GRAPE-6, the sixth-generation special-purpose computer for gravitational many-body problems. GRAPE-6 consists of 2048 custom pipeline chips, each of which integrates six pipeline processors specialized for the calculation of gravitational interaction between particles. The GRAPE hardware performs the evaluation of the interaction. The frontend processors perform all other operations, such as the time integration of the orbits of particles, I/O, on-the-fly analysis etc. The theoretical peak speed of GRAPE-6 is 63.4 Tflops. We present the result of benchmark runs, and discuss the performance characteristics. We also present the measured performance for a few real scientific applications. The best performance so far achieved with real applications is 35.3 Tflops.','',10711);
INSERT INTO subevents VALUES (1158,'2003-11-19','15:30:00','17:00:00','','','Allan Snavely (SDSC), Jack Dongarra (University of Tennessee), Walt Brooks (NASA Ames), David Bailey (NERSC), Henry Newman (Instrumental), John McCalpin (IBM)','This panel consists of government and government-sponsored researchers in the area of high performance computing system performance modeling. Benchmarks are structured to address a range of target configurations. They provide the High Performance Computing Modernization Program (HPCMP) with accurate performance information on available high performance computing (HPC) capabilities. The intent is to provide a set of program source code listings, makefiles, runtime scripts, input files, and validated results files which represents the type of computational work performed on HPC resources. Questions addressed by the panelists include:\n\n1. Describe a benchmark structure.\nThe benchmark is divided into 2 parts (1) hardware performance and systems tests and (2) applications test.\n\n2. Describe a synthetic performance test.\nThe tests are to be run one time with a standard scheduler w/no changes to the default priorities. Special rules apply to I/O tests.\n\n3. Describe application tests.\nMultiple test cases using a suite of codes that are to be run using the standard system scheduler.\n\n4. Allowed changes.<\nVendors are only allowed to change the source code to the extent needed to get the program to execute and provide correct output.','',10712);
INSERT INTO subevents VALUES (1159,'2003-11-19','15:30:00','17:00:00','','Informed use and development of collaborative technologies','Kelli Robyn Dipple (University of Manchester and Queensland University of Technology)','A distributed presentation / panel involving key speaker presentations and group discussion, remotely from Manchester UK, Brisbane Australia, and Gainsville Florida. Discussion will focus on research and workshop activities undertaken over Access Grid facilities in Brisbane, Sydney, Manchester, Gainsville and Amsterdam, throughout 2002 - 2003. In Pheonix, audiences will have a screen based experience of the discussion with distributed video documentation and website.\n\nDiscussion topics: Informed use and development of collaborative technologies \n1. The breadth of communication technologies available. Where does the Access Grid sit in relation to the larger context of available communication technologies. Parallel developments for realtime communication using hand held devices and wireless connectivity. The convergent evolution and hybrid systems. \n2. Practice, development, form and presentation on the Access Grid. What are the limitations and expansions? \n3. The consequences of distribution in juxtaposition to the advantages. Its impact on socialization, relational expectations and effective communication. \n4. What does this mean for artists and for scientists? What kind of contribution do the arts have towards the development of scientific tools?\n\nDocumented research projects for presentation : \n1. Navigating Gravity - distributed performance, multiple site scripting and access grid aesthetics, research and development project (Gainsville, Manchester and Sydney). \n2. Telematics and networking performance workshops over the grid (Brisbane, Amsterdam, Sydney) \n3. SC Global 02 / remote panel session on multiple site live events and comparative methodologies for simultaneous distributions (University of Manchester and University of Sydney Viz Lab)','',10713);
INSERT INTO subevents VALUES (1160,'2003-11-19','16:00:00','16:30:00','','A 14.6 billion degrees of freedom, 5 teraflops, 2.5 terabyte earthquake simulation on the Earth Simulator','Dimitri Komatitsch (California Institute of Technology), Seiji Tsuboi (Institute for Frontier Research on Eath Evolution, JAMSTEC), Chen Ji (California Institute of Technology), Jeroen Tromp (California Institute of Technology)','We use 1944 processors of the Earth Simulator to model seismic wave propagation resulting from large earthquakes. Simulations are conducted based upon the spectral-element method, a high-degree finite-element technique with an exactly diagonal mass matrix. We use a very large mesh with 5.5 billion grid points (14.6 billion degrees of freedom). We include the full complexity of the Earth, i.e., a three-dimensional wave-speed and density structure, a 3-D crustal model, ellipticity as well as topography and bathymetry. A total of 2.5 terabytes of memory is needed. Our implementation is purely based upon MPI, with loop vectorization on each processor. We obtain an excellent vectorization ratio of 99.3%, and we reach a performance of 5 teraflops (30% of the peak performance) on 38% of the machine. The very high resolution of the mesh allows us to perform fully three-dimensional calculations at seismic periods as low as 5 seconds.','',10711);
INSERT INTO subevents VALUES (1161,'2003-11-19','16:00:00','16:30:00','','GridSAT: A Chaff-based Distributed SAT Solver for the Grid','Wahid Chrabakh (UC Santa Barbara), Rich Wolski (UC Santa Barbara)','We present GridSAT, a parallel and complete satisfiability solver designed to solve non-trivial SAT problem instances using a large number of widely distributed and heterogeneous resources. \n\nThe GridSAT parallel algorithm uses intelligent backtracking, distributed and carefully scheduled sharing of learned clauses, and clause reduction. Our implementation focuses on dynamic resource acquisition and release to optimize application execution. We show how the large number of computational resources that are available from a Grid can be managed effectively for the application by an automatic scheduler and effective implementation. GridSAT execution speed is compared against the best sequential solver as rated by the SAT2002 competition using a wide variety of problem instances. The results show that GridSAT delivers speed-up for all but one of the test problem instances that are of significant size. In addition, we describe how GridSAT has solved previously unsolved satisfiability problems and the domain science contribution these results make.','',10710);
INSERT INTO subevents VALUES (1162,'2003-11-19','16:30:00','17:00:00','','The Space Simulator: Modeling the Universe from Supernovae to Cosmology','Michael S. Warren (LANL), Chris L. Fryer (LANL), M. Patrick Goda (LANL)','The Space Simulator is a 294-processor Beowulf cluster with theoretical peak performance just below 1.5 Teraflop/s. It is based on the Shuttle XPC SS51G mini chassis. Each node consists of a 2.53 GHz Pentium 4 processor, 1 Gb of 333 MHz DDR SDRAM, an 80 Gbyte Maxtor hard drive, and a 3Com 3C996B-T gigabit ethernet card. The network is made up of a Foundry FastIron 1500 and 800 Gigabit Ethernet switch. Each individual node cost less than $1000, and the entire system cost under $500,000. The cluster achieved Linpack performance of 665.1 Gflop/s on 288 processors in October 2002, making it the 85th fastest computer in the world according to the 20th TOP500 list. Performance has since improved to 757.1 Linpack Gflop/s, ranking at #90 on the 21st TOP500 list. This is the first machine in the TOP100 to surpass Linpack price/performance of 1 dollar per Mflop/s.','',10711);
INSERT INTO subevents VALUES (1163,'2003-11-19','16:30:00','17:00:00','','HPC.NET - are CLI-based Virtual Machines Suitable for High Performance Computing?','Werner Vogels (Cornell University)','The Common Language Infrastructure is a new, standardized virtual machine that is likely to become popular on several platforms. In this paper we review whether this technology has any future in the high-performance computing community, for example by targeting the same application space as the Java-Grande Forum. We review the technology by benchmarking three implementations of the CLI and compare those with the results on Java virtual machines.','',10710);
INSERT INTO subevents VALUES (1164,'2003-11-20','08:30:00','09:15:00','Ballroom','State of the Field: Computational Paradigms for Integrative Approaches to Genomic Medicine','Jill P. Mesirov (Whitehead Institute/MIT Center for Genome Research)','The completion of the human genome sequencing project, coupled with the ever increasing scale and throughput of biological experimentation, has the potential to greatly accelerate progress in biomedicine. Discoveries in this new realm of “high-dimensionality biology” are dependent both upon sophisticated computation and the ability to unify the analytical approaches of a variety of disciplines. However, the lack of an integrated computational environment that can provide both easy access to a set of universal analytic tools, and support the development and dissemination of novel algorithmic approaches, has resulted in the pace of data acquisition greatly outstripping that of meaningful data analysis.\n\nWe will describe some of the challenging computational problems in biomedicine, the techniques we use to address them, and a software infrastructure to support this highly interdisciplinary field of research.','',10714);
INSERT INTO subevents VALUES (1165,'2003-11-20','09:15:00','10:30:00','Ballroom','State of the Field: Networking the Physical World','David E. Culler (University of California Berkeley)','The SuperComputing conference ushered in computational simulation as the third pillar of science and, in its SCxy form, rode the wave of the killer micro. We are now seeing the emergence of a new interplay of science and computing technology. The sustained, dramatic advance of CMOS has made it possible to construct complete computing systems with processing, storage, sensing, and communication in a tiny volume at very low cost. Like the microscope, telescope, and computer, this technology will allow scientists and engineers to perceive what was previously invisible; in this case through deployment of dense networks of these sensors in the physical world. We decribe the emerging technology of sensor networks at several levels, including platform architecture, operating system design, self-organizing networks, sensor data processing, and programming environments. We will explore how it opens new avenues for computer networking to advance the scientific endeavor.','',10715);
INSERT INTO subevents VALUES (1166,'2003-11-20','10:30:00','11:00:00','','Improving the Scalability of Parallel Jobs by adding Parallel Awareness to the Operating System','Terry Jones (LLNL), William Tuel (IBM), Larry Brenner (IBM), Jeff Fier (IBM), Patrick Caffrey (IBM), Shawn Dawson (LLNL), Rob Neely (LLNL), Robert Blackmore (IBM), Brian Maskell (AWE), Paul Tomlinson (AWE), Mark Roberts (AWE)','A parallel application benefits from scheduling policies that include a global perspective of the application. As the interactions among cooperating processes increase, mechanisms to ameliorate waiting within one or more of the processes become more important. Collective operations such as barriers and reductions are extremely sensitive to even usually harmless events such as context switches. For the last 18 months, we have been researching the impact of random short-lived interruptions such as timer-decrement processing and periodic daemon activity, and developing strategies to minimize their impact on large processor-count SPMD bulk-synchronous programming styles. We present a novel co-scheduling scheme for improving performance of fine-grain collective activities such as barriers and reductions, describe an implementation consisting of operating system kernel modifications and run-time system, and present a set of results comparing the technique with traditional operating system scheduling. Our results indicate a speedup of over 300% on synchronizing collectives.','',10716);
INSERT INTO subevents VALUES (1167,'2003-11-20','10:30:00','11:00:00','','Merrimac: Supercomputing with Streams','William J. Dally (Stanford University), Patrick Hanrahan (Stanford University), Mattan Erez (Stanford University), Timothy J. Knight (Stanford University), Francois Labonte (Stanford University), Jung-Ho Ahn (Stanford University), Nuwan Jayasena (Stanford University), Ujval J. Kapasi (Stanford University), Abhishek Das (Stanford University), Jayanth Gummaraju (Stanford University), Ian Buck (Stanford University)','Merrimac uses stream architecture and advanced interconnection networks to give an order of magnitude more performance per unit cost than cluster-based scientific computers built from the same technology. Organizing the computation into streams and exploiting the resulting locality using a register hierarchy enables a stream architecture to reduce the memory bandwidth required by representative applications by an order of magnitude or more. Hence a processing node with a fixed bandwidth (expensive) can support an order of magnitude more arithmetic units (inexpensive). This in turn allows a given level of performance to be achieved with fewer nodes (a 1-PFLOPs machine, for example, with just 8,192 nodes) resulting in greater reliability, and simpler system management. We sketch the design of Merrimac, a streaming scientific computer that can be scaled from a $20K 2 TFLOPS workstation to a $20M 2PFLOPS supercomputer and present the results of some initial application experiments on this architecture.','',10717);
INSERT INTO subevents VALUES (1168,'2003-11-20','10:30:00','11:00:00','','A Metadata Catalog Service for Data Intensive Applications','Gurmet Singh (Information Sciences Institute, University of Southern California), Ann Chervenak (Information Sciences Institute, University of Southern California), Ewa Deelman (Information Sciences Institute, University of Southern California), Carl Kesselman (Information Sciences Institute, University of Southern California), Mary Manohar (Information Sciences Institute, University of Southern California), Sonal Patil (Information Sciences Institute, University of Southern California), Laura Pearlman (Information Sciences Institute, University of Southern California)','Today’s advances in computational, storage and network technologies as well as middleware such as the Globus Toolkit, allow scientists to expand their scientific horizons and develop sophisticated data intensive application. These applications produce and analyze terabytes and petabytes of data that are distributed in millions of files or objects. In order to efficiently manage the large data sets, the metadata about the data needs to be managed. There are various types of metadata, and it is likely that a range of metadata services will exist in grid environments that are specialized for different types of metadata cataloguing and discovery. In this paper, we present a design of a Metadata Catalog Service (MCS) that provides a mechanism for storing and accessing descriptive metadata and allows users to query for data items based on desired attributes. We describe our experience in using the MCS with several applications and present a scalability study of the service.','',10718);
INSERT INTO subevents VALUES (1169,'2003-11-20','10:30:00','11:30:00','','Computational Steering in a Collaborative Environment','John Brooke (University of Manchester), Thomas Eickermann (Research Centre Juelich), Uwe Woessner (HLRS)','In this showcase we will present live running simulations which are integrated into the Access Grid in a variety of different ways. An example of this is the use of vnc to distribute a desktop on which the simulation is being displayed. Another example is the redirection of the visualization into vic to make 3D animations available over the Access Grid. Other examples that will be explored are the use of OpenGL Vizserver to direct the output of a graphics supercomputer located on the Grid to the AG locations. We will also utilize the ability of the next generation AG software to directly link with visualization toolkits such as vtk, AVS/Express, or COVISE as an integrated part of the Virtual Venue if this functionality is available at the time of SC2003.','',10719);
INSERT INTO subevents VALUES (1170,'2003-11-20','11:00:00','11:30:00','','Protein Explorer: A Petaflops Special-Purpose Computer System for Molecular Dynamics Simulations','Makoto Taiji (RIKEN), Tetsu Narumi (RIKEN), Yousuke Ohno (RIKEN), Noriyuki Futatsugi (RIKEN), Atsushi Suenaga (RIKEN), Naoki Takada (RIKEN), Akihiko Konagaya (RIKEN)','We are developing the `Protein Explorer\' system, a petaflops special-purpose computer system for molecular dynamics simulations. The Protein Explorer is a PC cluster equipped with special-purpose engines that calculate nonbonded interactions between atoms, which is the most time-consuming part of the simulations. A dedicated LSI `MDGRAPE-3 chip\' performs these force calculations at a speed of 165 gigaflops or higher. The system will have 6,144 MDGRAPE-3 chips to achieve a nominal peak performance of one petaflop. The system will be completed in 2006. In this paper, we describe the project plans and the architecture of the Protein Explorer.','',10717);
INSERT INTO subevents VALUES (1171,'2003-11-20','11:00:00','11:30:00','','Grid-Based Galaxy Morphology Analysis for the National Virtual Observatory','Ewa Deelman (ISI), Raymond Plante (NCSA), Carl Kesselman (USC/ISI), Gurmeet Singh (USC/ISI), Mei Su (USC/ISI), Gretchen Greene (Space Telescope Science Institute), Robert Hanisch (Space Telescope Science Institute), Niall Gaffney (Space Telescope Science Institute), Antonio Volpicelli (Space Telescope Science Institute), James Annis (Fermi Lab), Vijay Sekhri (Fermi Lab), Tamas Budavari (John Hopkins University), Maria Nieto-Santisteban (John Hopkins University), William O\'Mullane (John Hopkins University), David Bohlender (Canadian Astrophysical Data Center), Tom McGlynn (NASA), Arnold Rots (Simthsonian Astrophysical Observatory), Olga Pevunova (NASA)','As part of the development of the National Virtual Observatory (NVO), a data grid for astronomy, we have developed a prototype science application to explore the dynamical history of galaxy clusters by analyzing the galaxies’ morphologies. The purpose of the prototype is to explore how grid-based technologies can be used to provide specialized computational services within the NVO environment. Although we describe the scientific goals of the application, this paper focuses on the key technology components, particularly Chimera and Pegasus which are used to create and manage the computational workflow. We illustrate how the components were connected and driven from the application’s portal.','',10718);
INSERT INTO subevents VALUES (1172,'2003-11-20','11:00:00','11:30:00','','BCS MPI: a New Approach in the System Software Design for Large-Scale Parallel Computers','Juan Fernandez (LANL), Fabrizio Petrini (LANL), Eitan Frachtenberg (LANL)','Buffered CoScheduled (BCS) MPI proposes a new approach to design the communication libraries for large-scale parallel machines. The emphasis of BCS MPI is on the global coordination of a large number of processes rather than in the traditional optimization of the local performance of a pair of communicating processes. BCS MPI delays the interprocessor communication in order to schedule globally the communication pattern and it is designed on top of a minimal set of collective communication primitives. In this paper we describe a prototype implementation of BCS MPI and its communication protocols. The experimental results, executed on a set of scientific applications representative of the ASCI workload, show that BCS MPI is only marginally slower than the production-level MPI, but much simpler to implement, debug and analyze.','',10716);
INSERT INTO subevents VALUES (1173,'2003-11-20','11:30:00','12:00:00','','Early Experience with Scientific Programs on the Cray MTA-2','Wendell Anderson (Naval Research Laboratory), Preston Briggs (Cray, Inc.), C. Stephen Hellberg (Naval Research Laboratory), Daryl W. Hess (Naval Research Laboratory), Alexei Khokhlov (University of Chicago), Marco Lanzagorta (Scientific and Engineering Solutions), Robert Rosenberg (Naval Research Laboratory)','We describe our experiences porting and tuning three scientific programs to the Cray MTA-2, paying particular attention to the problems posed by I/O. We have measured the performance of each program over many different machine configurations and we report on the scalability of each program. In addition, we compare the performance of the MTA with that of an SGI Origin running all three programs.','',10717);
INSERT INTO subevents VALUES (1174,'2003-11-20','11:30:00','12:00:00','','Scalable NIC-based reduction on Large-scale Clusters','Adam Moody (Ohio State University), Juan Fernandez (LANL), Fabrizio Petrini (LANL), Dhabaleswar K. Panda (Ohio State University)','Over the last decades, researchers have developed many efficient reduction algorithms. However, all these algorithms assume that the reduction processing takes place on the host CPU. Modern Network Interface Cards (NICs) sport programmable processors and thus introduce a fresh variable into the equation. This raises the following interesting challenge: Can we take advantage of modern NICs to implement fast reduction operations? In this paper, we take on this challenge in the context of large-scale clusters. Through experiments on a 960-node, 1920-processor cluster we show that NIC-based reductions indeed perform with reduced latency and improved consistency and scalability over host-based algorithms for the common case. In the largest configuration tested ---1812 processors--- our NIC-based algorithm can sum a single element vector in 73 microseconds with 32-bit integers and in 118 microseconds with 64-bit floating-point numbers, an improvement, respectively, of 121% and 39% with respect to the production level MPI library.','',10716);
INSERT INTO subevents VALUES (1175,'2003-11-20','11:30:00','12:00:00','','The Livny and Plank-Beck Problems: Studies in Data Movement on the Computational Grid','Matthew S. Allen (University of California, Santa Barbara), Rich Wolski (University of California, Santa Barbara)','Over the last few years the Grid Computing research community has become interested in developing data intensive applications for the Grid. These applications face significant challenges because their widely distributed nature makes it difficult to access data with reasonable speed. In order to address this problem, we feel that the Grid community needs to develop and explore data movement challenges that represent problems encountered in these applications. In this paper, we will identify two such problems that we have dubbed the Livny Problem and the Plank-Beck Problem. We will also present data movement scheduling techniques that we have developed to address these problems.','',10718);
INSERT INTO subevents VALUES (1176,'2003-11-20','11:30:00','12:00:00','','Splat Theremin','Joe Reitzer (TRECC-NCSA-UIUC)','Splat Theremin will be a collaborative art piece developed by Joe Reitzer TRECC/NCSA, Marcus Thiebaux ISI/USC, Tom Coffin ACCESS DC/NCSA, Alexander Horn. Splat Theremin will utilize the AG 2.0 software, Linux Graphics Cluster with Tile Display, Geowall and Immersadek. TRECC, ACCESS, and EVL will be the main sites involved. \n\nSplat Theremin will provide an interactive means to manipulate volumetric data. The data will be represented visually through the user of Splatting rendering techniques utlizing cluster visualization on TRECC\'s Linux graphics cluster with tile display. The participants at the Phoenix Civic Plaza will be able to manipulate data of the volume interactively by means of a custom built short range tracking system. Behavioral software that reacts to the input of the tracking system will control parameters of how the data is manipulated and rendered. The Access Grid 2.0 software will be used for additional manipulation of the volumetric data. The participants at other Access Grid locations can interact with the volume rendering appearing on the tile display through the use of extracting luminosity of the AG video streams. Also the number of AG nodes connected to the SC Global Showcase site will possibly influence other parameters of how the data is manipulated. All data will be selectively available for display on the Geowall at TRECC, ACCESS, and any other site that participates. The Immersadesk at ACCESS DC would also be available for possible remote interaction from participants. Remote sites would be able to choose a specific tile of the tiled display. The choice of tile(s) display will be a distributed stream like that of DPPT.\n\nThere are numerous contributions of combining these diverse technologies into a homogenous system for the immediate and remote creation of artwork. The experience would provide collaborative insight through the use of this multi-system integration in the generation of visually interesting animations. The live and digital interaction would be similar to how a Theremin creates sound and music except with multiple people and machines interacting in a live jam session. Each users experience would be different every time.','',10719);
INSERT INTO subevents VALUES (1177,'2003-11-20','13:30:00','15:00:00','Ballroom','','','SC2003 presents a wide range of awards that recognize the innovative hard work of conference participants and leaders in the field. The Gordon Bell Prizes reward practical uses of high-performance computers, including best performance of an application and best achievement in cost-performance. The Seymour Cray Computer Science and Engineering Award recognizes innovative contributions to high- performance computing systems that best exemplify the creative spirit of Seymour Cray. The Sidney Fernbach Memorial Award honors innovative uses of high-performance computing in problem solving. In addition, The conference gives prizes for this year\'s HPC Challenge, Bandwidth Challenge, Best Paper, and Best Student Paper. These prestigious honors are presented during a special ceremony held as a capstone to the SC conference.','',10720);
INSERT INTO subevents VALUES (1178,'2003-11-20','15:30:00','15:45:00','','Contextual Backgrounds: AG on the Beach','Darran Edmundson (ANU Supercomputer Facility Vizlab)','Green and blue screen compositing is widely used in television and film to merge foreground actors with background scenes. A classic example is the television weather report in which the otherwise information-less pixels surrounding the reporter\'s \"talking head\" are replaced by real data in the form of weather maps and images. The Access Grid, with wall display real-estate being a precious commodity, is particularly well suited to this technique. Users can be combined with their data in meaningful ways to aid understanding for remote viewers. Additionally, with multiple cameras in play, replacing each camera\'s background with a perspective-correct view of the virtual background helps to unify the otherwise disjoint streams coming from a node. \n\nFair enough, so why not paint one\'s Access Grid green and be done with it? Because while the end result is compelling, green screen studios are not at all pleasant environments. They require large sets to minimize color spill from the screen onto the actors plus highly-controlled lighting to ensure even screen color for automated keying. In contrast, the Access Grid - where the distinction between participant and viewer is highly blurred - needs to be a comfortable space conducive to multi-way communication. (Regardless, our small 6m x 4.5m retrofitted room precluded any thought of traditional green screening). \n\nBy covering the walls of our AG Node with a 3M retroreflective material and placing rings of illuminating green LEDs around our camera lenses, viewers at other nodes perceive brilliant green backgrounds in all outgoing video streams. (Users in the node itself perceive the walls to have a shimmering blue color.) However, prior to transmitting, we use software and hardware to remove the chroma green and, as we know the current position and orientation of the camera lense, composite camera-correct background images into the outgoing video stream. The result, at least for viewers at other nodes, is that our node participants appear embedded in a virtual world of our choosing. Allowing remote operation of our pan/tilt cameras helps to complete the illusion. In this showcase we demonstrate the technology with both scientific user background data and the more contrived example alluded to in the showcase title - namely, our node attendees in landlocked Canberra placed on a beautiful Australian beach.','',10725);
INSERT INTO subevents VALUES (1179,'2003-11-20','15:30:00','16:00:00','','Efficient, Unified, and Scalable Performance Monitoring for Multiprocessor Operating Systems','Robert W. Wisniewski (IBM T.J. Watson Research), Bryan Rosenburg (IBM T.J. Watson Research)','Programming, understanding, and tuning the performance of large multiprocessor operating systems is challenging. Crucial to achieving good performance is understanding the system\'s behavior. \n\nWe have developed an efficient, unified, and scalable tracing infrastructure that allows for correctness debugging, performance debugging, and performance monitoring of an operating system. The infrastructure allows variable-length events to be logged without locking and provides random access to the event stream. The infrastructure allows cheap and parallel logging of events by applications, libraries, servers, and the kernel. The infrastructure was designed for K42, a new open-source research kernel designed to scale near perfectly on large cache-coherent 64-bit multiprocessor systems. The techniques are generally applicable, and have been integrated into LTT (Linux Trace Toolkit). We describe the implementation of the infrastructure, how we used the facility, e.g., analyzing lock contention, to understand and achieve K42\'s scalable performance, and the lessons we learned. The infrastructure has been invaluable.','',10721);
INSERT INTO subevents VALUES (1180,'2003-11-20','15:30:00','16:00:00','','Fast Parallel Non-Contiguous File Access','Joachim Worringen (NEC C&C Research Lab), Jesper Larson Traff (NEC C&C Research Lab), Hubert Ritzdorf (NEC C&C Research Lab)','Many applications of parallel I/O perform non-contiguous file accesses, but only few file system interfaces support non-contiguous access. In contrast, the most commonly used parallel programming interface, MPI, supports parallel I/O through its MPI-IO interface. Within this interface, non-contiguous accesses are supported by the use of derived MPI datatypes. Unfortunately, current MPI-IO implementations suffer from low performance of such non-contiguous accesses when compared to the performance of the storage system for contiguous accesses although a considerable amount of work has been done in this area. In this paper we analyze an important bottleneck in current implementations of MPI-IO, and present a new technique termed listless i/o to perform non-contiguous access with MPI-IO. On the NEC SX-series of parallel vector computers, listless i/o is able to increase the bandwidth for non-contiguous file access by sometimes more than a factor of 500 when compared to the traditional approach.','',10722);
INSERT INTO subevents VALUES (1181,'2003-11-20','15:30:00','17:00:00','','','Thomas A. DeFanti (University of Illinois at Chicago, USA), Larry Landweber (National Science Foundation, USA), Kees Neggers (SURFnet, The Netherlands), Harvey B. Newman (Caltech, USA), Bill St. Arnaud (CANARIE, Canada)','E-Science faces unprecedented challenges in the coming decade, in terms of: (1) the data-intensiveness of the work (as the data being processed, distributed and analyzed moves from terabytes to petabytes to exabytes), (2) the complexity of the data (extracting detail from overwhelming datasets generated by instruments), (3) the timeliness of data transfers (whether bulk transfers for remote storage, smaller transfers for distributing computing and analysis, or real-time transfers for collaboration), and (4) the global extent and multi-level peer group structure of the collaborations, leading to the need for international teams to collaborate and share data-intensive work in fundamentally new ways. This panel discusses the key roles and issues facing new networking infrastructures taking shape worldwide to tackle the data tsunami coming this decade.','',10723);
INSERT INTO subevents VALUES (1182,'2003-11-20','15:30:00','17:00:00','','','Daniel J. Blumenthal (University of California, Santa Barbara), Andrew A. Chien (University of California, San Diego), Jason Leigh (University of Illinois at Chicago), Larry Smarr (University of California, San Diego), Rick L. Stevens (Argonne National Laboratory/University of Chicago)','For the last decade, Moore\'s Law has dominated supercomputing architecture, since it was on a steeper exponential than either bandwidth or storage. Furthermore, during the 1990s, the commoditization of processors allowed for super-exponential growth in computing power through the parallelization of processors -- that is, we were able to multiply Moore\'s Law for the growth of individual processor speeds by the 500-fold increase in the number of processors in a single parallel computer (from 4 processors in the early 90s to 2000 processors today). \n\nA fundamental architectural shift has occurred in this decade, in that storage and particularly networking bandwidth are growing much faster than Moore’s Law. The super-exponential in bandwidth is caused by parallelization in the number of Lambdas, independent light paths down a single fiber optic, multiplied by the increase of the bandwidth of the individual light paths (DWDM). The TeraGrid was the first example of a national-scale supercomputer with dedicated optical paths -- 4x10Gbps. The Panel reviews the basic engineering trends in processors, storage and optics, and then examines a number of federally funded projects which are exploring the vision laid out by Steve Wallach at Supercomputing 2000 in which a petaflop computer by 2010 will be an optical switch with compute and storage peripherals. In addition, the Panel examines how dedicated multi-Lambda optical circuits could radically change the architecture of distributed cyberinfrastructure and the ability for application end-users to use that infrastructure to carry out 21st-century scientific research.','',10724);
INSERT INTO subevents VALUES (1183,'2003-11-20','15:45:00','16:30:00','','Transpacific Synergistic Entertainment','Kazuyuki Shudo (National Institute of Advanced Industrial Science and Technology (AIST), Japan)','The mode of entertainment has not taken place with the Access Grid technology. We explore an informal entertainment use of the technology and future requirements for the use will become clear.\n\nIn this event, participants from countries around the Pacific Ocean share exciting experience and synergistic feeling in a particular mode of entertainment. Entertainment companies, AIST and Waseda University have started testing our prototype and we are now building up our experience toward SC Global and our business.\n\nChallenges: Those countries have very different cultures from each other and their attitudes toward the entertainment are also divergent. It is a social and cultural challenge to share the mode of entertainment. Of course, other participants are welcome. Bandwidth of transpacific Internet lines are relatively narrow compared with U.S. domestic lines. And further, lines between Asia-Pacific countries are not good as domestic lines. SC Global can be one of precious opportunities to evaluate the current state of Internet of this region. The real obstacle to entertainment uses will be latency due to a long distance between Asia and U.S.','',10725);
INSERT INTO subevents VALUES (1184,'2003-11-20','16:00:00','16:30:00','','Parallel netCDF: A High-Performance Scientific I/O Interfac','Jianwei Li (ECE Department, Northwestern University), Wei-keng Liao (ECE Department, Northwestern University), Alok Choudhary (ECE Department, Northwestern University), Robert Ross (MCS Division, Argonne National Laboratory), Rajeev Thakur (MCS Division, Argonne National Laboratory), William Gropp (MCS Division, Argonne National Laboratory), Rob Latham (MCS Division, Argonne National Laboratory), Andrew Siegel (MCS Division, Argonne National Laboratory), Brad Gallagher (ASCI Flash Center, University of Chicago), Michael Zingale (UCO/Lick Observatory, University of California, Santa Cruz)','Dataset storage, exchange, and access play a critical role in scientific applications. For such purposes netCDF serves as a portable, efficient file format and programming interface, which is popular in numerous scientific application domains. However, the original interface does not provide an efficient mechanism for parallel data storage and access. \n\nIn this work, we present a new parallel interface for writing and reading netCDF datasets. This interface is derived with minimal changes from the serial netCDF interface but defines semantics for parallel access and is tailored for high performance. The underlying parallel I/O is achieved through MPI-IO, allowing for substantial performance gains through the use of collective I/O optimizations. We compare the implementation strategies and performance with HDF5. Our tests indicate programming convenience and significant I/O performance improvement with this parallel netCDF (PnetCDF) interface.','',10722);
INSERT INTO subevents VALUES (1185,'2003-11-20','16:00:00','16:30:00','','Memory Profiling using Hardware Counters','Marty Itzkowitz (Sun Microsystems), Brian J.N. Wylie (Sun Microsystems), Christopher Aoki (Sun Microsystems), Nicolai Kosche (Sun Microsystems)','Although memory performance is often a limiting factor in application performance, most tools only show performance data relating to the instructions in the program, not to its data. In this paper, we describe a technique for directly measuring the memory profile of an application. We describe the tools and their user model, and then discuss a particular code, the MCF benchmark from SPEC CPU 2000. We show performance data for the data structures and elements, and discuss the use of the data to improve program performance. Finally, we discuss extensions to the work to provide feedback to the compiler for prefetching and to generate additional reports from the data.','',10721);
INSERT INTO subevents VALUES (1186,'2003-11-20','16:30:00','17:00:00','','Grid -Based Parallel Data Streaming implemented for the Gyrokinetic Toroidal Code','Scott Alan Klasky (PPPL), Stephane Ethier (PPPL), Zhihong Lin (UC Irvine), Kevin Martins (PPPL), Doug McCune (PPPL), Ravi Samtaney (PPPL)','We have developed a threaded parallel data streaming approach using Globus to transfer multi-terabyte simulation data from a remote supercomputer to the scientist’s home analysis/visualization cluster, as the simulation executes, with negligible overhead. Data transfer experiments show that this concurrent data transfer approach is more favorable compared with writing to local disk and then transferring this data to be post-processed. The present approach is conducive to using the grid to pipeline the simulation with post-processing and visualization. We have applied this method to the Gyrokinetic Toroidal Code (GTC), a 3-dimensional particle-in-cell code used to study micro-turbulence in magnetic confinement fusion from first principles plasma theory.','',10722);
INSERT INTO subevents VALUES (1187,'2003-11-20','16:30:00','17:00:00','','Identifying and Exploiting Spatial Regularity in Data Memory References','Tushar Mohan (Lawrence Berkeley National Lab), Bronis R. de Supinski (LLNL), Sally A. McKee (CSL Cornell), Frank Mueller (NCSU), Andy Yoo (LLNL), Martin Schulz (CSL, Conell)','The growing processor/memory performance gap causes the performance of many codes to be limited by memory accesses. Strided memory accesses forming streams can be targeted by optimizations such as prefetching, relocation, remapping, and vector loads. Undetected, they can be a significant source of memory stalls in loops. The concept of locality fails to capture the existence of streams in a program\'s memory accesses. \n\nFirst, we define spatial regularity as a means to discuss the presence and effects of streams. Second, we develop measures to quantify spatial regularity, and we design and implement an on-line, parallel algorithm to detect streams in running applications. Third, we use examples from real codes and common benchmarks to illustrate how derived stream statistics can be used to guide the application of profile-driven optimizations. Overall, we demonstrate the benefits of our novel regularity metric as an instrument to detect potential for optimizations affecting memory performance.','',10721);
INSERT INTO subevents VALUES (1188,'2003-11-20','16:30:00','17:00:00','','Closing Comments, SC Global Chair','Jennifer Teig von Hoffman (Boston University)','A summary of this year\'s SC Global conference, including information about participating sites, and the technical and production infrastructure that supported them.','',10725);
INSERT INTO subevents VALUES (1189,'2003-11-21','08:30:00','10:00:00','','','John Grosh (General Engineer, Office of the Under Secretary of Defense for Science and Technology), Alan Laub (SciDAC Director, DOE Office of Science ), Daniel A. Reed (NCSA and the University of Illinois at Urbana-Champaign )','The President’s FY 2004 budget included a directive to develop a plan to guide future Federal investments in high end computing. Established in March 2003, the High End Computing Revitalization Task Force (HECRTF), coordinated through the National Science and Technology Council, has been preparing this plan, which is scheduled to be delivered in the fall of 2003. The plan will set forth a roadmap for fiscal year (FY) 2005 through FY 2009 Federal investments in three areas:\n\n* Research and development in core HEC technologies, including identification of key technologies for new generations of HEC systems, alternative coordinated multiagency plans, and approaches to enable both revolutionary and evolutionary advances and diffusion into industry\n\n* Provision of HEC resources to Federal and Federally-funded researchers, including alternative plans to help reduce capability and capacity gaps, design specifications and performance targets linked to application domain requirements and user needs, minimizing time to solution, and access by potential Federal and non-Federal user communities beyond those using HEC systems funded or hosted by Federal agencies\n\n* Recommendations about Federal HEC procurement practices, such as practical performance measures, ways to derive system performance targets, ways to measure total cost of ownership, and ways to improve HEC acquisitions processes\n\nIn April 2003 the HECRTF solicited white papers that provide technical facts and information about these three areas. In response to the solicitation, which was extended to a wide spectrum of stakeholders, almost 80 papers were received.\n\nOn June 16 through 18, 2003, the Computer Research Association (CRA) sponsored a Workshop on the Road Map for the Revitalization of High End Computing that brought together 200 academic, industry, and government researchers and managers to address HEC revitalization issues and publish a report, which is scheduled to be publicly released in the fall of 2003. The workshop had working groups that addressed HEC topics including (1) enabling technologies, (2) COTS-based architectures, (3) custom-based architectures, (4) run times and operating systems, (5) programming environments and tools, (6) performance modeling, metrics, and specifications, (7) application-driven system requirements, and (8) procurement, accessibility, and cost of ownership.\n\nThe panelists will draw on these activities to address the following questions:\n\nWhat are the purposes, plans, schedule, and results to date of the Federal government’s High End Computing Revitalization Task Force (HECRTF)?\n\nWhat input to the HECRTF was provided by academia and industry in white papers solicited by the HECRTF in the spring of 2003 and at the June 2003 Workshop on the Road Map for the Revitalization of High End Computing?\n\nWhat is the current status of the Federal government’s plans to revitalize high end computing in the U.S.?\n\nWhat are the views of academia and industry about HECRTF efforts?','',10726);
INSERT INTO subevents VALUES (1190,'2003-11-21','08:30:00','10:00:00','','','Ken Kennedy (Rice University), David Kuck (Intel), Mark Snir (University of Illinois), Thomas Sterling (CalTech/NASA JPL), Bob Numrich (University of Minnesota), John Gustafson (Sun)','The value of a HPC system to a user includes many factors, such as: execution time on a particular problem, software development time, direct and indirect costs. The DARPA High Productivity Computing Systems is focused on providing a new generation of economically viable high productivity computing systems for the national security and industrial user community in the 2007-2010 timeframe. The goal is to provide systems that double in productivity (or value) every 18 months.\n\nThis program has initiated a fundamental reassessment of how we define and measure performance, programmability, portability, robustness and ultimately productivity in the HPC domain. The panelists will present their (new) views on two fundamental questions:\n\n Q1: How should we define and measure productivity in HPC?\n Q2: What are the implications for HPC designers and users?','',10727);
INSERT INTO subevents VALUES (1191,'2003-11-21','08:30:00','10:00:00','','','Fabrizio Petrini (Los Alamos National Laboratory), Dhabaleswar K. Panda (The Ohio State University), Jeffrey S. Chase (Duke University), Bradley Booth (Intel Corporation), Allyn Romanow (Cisco Systems), Anthony Skjellum (MPI Software Technology)','In the tradition of the ABC TV show that pitted Hollywood stars from different networks against each other, this panel reprises the show where the network stars in this case are InfiniBand, Myrinet, Quadrics, SCI, and 10-Gigabit Ethernet. \n\nThe panel will address two major sets of questions: [1] Which interconnect is the best for high-performance computing and why? [2] What are the future trends in high-performance networking, and what are the implications of these trends?\n\nQuestions for the \"head-to-head\" battle of network interconnects: \n[1] Each panelist was invited onto the panel due to their expertise with specific network interconnects. Why is \"your\" solution the better one? \n[2] Given that we, as a community, focus on the typical quantitative measures of latency and throughput for network interconnects, what other ways should we be evaluating interconnects? \n[3] Will the \"status quo\" in networking continue? That is, Ethernet as a commodity interconnect that often doubles as a cheap commodity solution for clusters with InfiniBand, Quadrics, and Myrinet \"relegated\" to high-end and more costly clusters. \n[4] With 10-Gigabit Ethernet processors on the horizon, will RDMA/TCP/10GigE be sufficient in matching the performance of Quadrics, InfiniBand, and Myrinet? And if so, what does this foretell of the future of these latter interconnects?\n[5] What assumptions must interconnects make about the underlying architecture (or what assumptions would they like to make)? PCI-X? PCI Express? Intel\'s \"Communications Streaming Architecture\" or network co-processor? InfiniBand? \n\nQuestions for \"Future Trends and Implications\": \n[1] Moore\'s Law forecasts the doubling of processor speeds every 18 months. Arguably network speeds have been doubling every 12 months on average. Will there come a time where we focus more on \"supernetworking\"? \n[2] \"Sockets or Bust?\": Does the interface to the network have to be a sockets interface? Or will application programmers be willing to rely on \"inefficient\" MPI software to hide such details? \n[3] In five years, how will today's interconnects evolve and/or compete in high-performance computing? \n[4] InfiniBand started out as a high-performance I/O technology but has evolved into a general network interconnect for high-performance clusters. Will it replace Myrinet or Quadrics as the costlier high-performance interconnect for high-end clusters? \n[5] What implications, if any, are there for direct-access file systems (DAFS)?','',10728);
INSERT INTO subevents VALUES (1192,'2003-11-21','10:30:00','12:00:00','','','Paul Gottlieb (DOE), Terry Bollinger (MITRE), Tony Stanco (The Center of Open Source and Government), Tim Witham (Open Source Development Lab)','Panelists have been selected based on their recent activities in OSS. They will describe their activities and any resulting conclusions or recommendations. Observations about \"best practices\" or \"lessons learned\" will be elicited from them.','',10729);
INSERT INTO subevents VALUES (1193,'2003-11-21','10:30:00','12:00:00','','','Marshall Peterson (CTO of the J. Craig Venter Science Foundation), Steve Oberlin (Founder and CEO of Unlimted Scale), Dr. Andrew Grimshaw (Founder and CTO of Avaki), Mark Seager (Assistant Department Head for Terascale Systems, Lawrence Livermore National Laboratory), Russ Miller (Director for the Center of Computational Research at SUNY-Buffalo)','In the beginning, supercomputing involved utilizing gigantic machines stored in huge rooms at only the top locations. Today, however, the supercomputing world is experiencing a change-powerful yet lower cost forms of supercomputing are becoming commonplace thanks to the emergence of practices such as the use of Linux clustering, grid computing and the onset of appliances. Future computing sites, especially in the life sciences, will combine these components into massive data factories that pipeline automated instrumentation, supercomputers and analytic databases and then integrate the results with geographically separated compute and data grids.','',10730);
INSERT INTO subevents VALUES (1194,'2003-11-21','10:30:00','12:00:00','','','Steve Scott (Cray Inc.), Rob Pennington (NCSA), Bill Pulleyblank (IBM)','This panel follows in the steps of an HPC architectural discussion called Goldilocks and the Three Bears chaired by Bob Borchers at SC95. While the names and some of the topics have changed, HPC architectural directions continues to be a hot area of discussion. During this session, three experts from widely divergent schools of HPC thought will discuss where the industry is headed and why their particular architecture will be an important part of that future. The questions that will be posed to this panel include: \n\n1) Why do you think that the architecture you are most closely associated with is important and relevant? \n\n2) Where do you see it evolving over time? \n\n3) Do you see any other current HPC architectures becoming less important over time? \n\n4) Do you think that customized HPC hardware can compete on a price/delivered performance basis long term with COTS microprocessors and systems?','',10731);
--
-- Table structure for table 'user_events'
--
CREATE TABLE user_events (
email text,
date date default '0000-00-00',
sessionTitle text,
subSessionTitle text
) TYPE=MyISAM;
--
-- Dumping data for table 'user_events'
--
INSERT INTO user_events VALUES ('alex@branchbt.com','2003-11-20','Showcase III','Splat Theremin');
INSERT INTO user_events VALUES ('alex@branchbt.com','2003-11-20','Scheduling and Communication','Scalable NIC-based reduction on Large-scale Clusters');
INSERT INTO user_events VALUES ('alex@branchbt.com','2003-11-20','Showcase III','Computational Steering in a Collaborative Environment');
INSERT INTO user_events VALUES ('alex@branchbt.com','2003-11-20','Scheduling and Communication','BCS MPI: a New Approach in the System Software Design for Large-Scale Parallel Computers');
INSERT INTO user_events VALUES ('alex@branchbt.com','2003-11-20','Scheduling and Communication','Improving the Scalability of Parallel Jobs by adding Parallel Awareness to the Operating System');
INSERT INTO user_events VALUES ('alex@branchbt.com','2003-11-20','Awards Session','');