PandA Publications

This page contains the list of publications related with the PandA framework.

Publication list is organized by year. Inside each year most recent publications appear first.

Journals

2011

  • G. Kuzmanov, V. Sima, K. Bertels, J. de Coutinho, W. Luk, G. Marchiori, R. Tripiccione and F. Ferrandi, "hArtes: Holistic Approach to Reconfigurable Real-Time Embedded Systems," in Reconfigurable Computing: From FPGAs to Hardware/Software Codesign, pp. 91, 2011. bibtex
    @article {kuzmanov2011hartes,
      publisher = {Springer Verlag},
      year = {2011},
      pages = {91},
      journal = {Reconfigurable Computing: From FPGAs to Hardware/Software Codesign},
      title = {hArtes: Holistic Approach to Reconfigurable Real-Time Embedded Systems},
      yy = {2011},
      author = {G. Kuzmanov and V.M. Sima and K. Bertels and J.G.F. de Coutinho and W. Luk and G. Marchiori and R. Tripiccione and F. Ferrandi} }
  • F. Bettarelli, E. Ciavattini, A. Lattanzi, G. Beltrame, F. Ferrandi, L. Fossati, C. Pilato, D. Sciuto, R. Meeuws, S. Ostadzadeh and others, "Extensions of the hArtes Tool Chain," in Hardware-Software Co-Design for Heterogeneous Multi-Core Platforms: The Hartes Toolchain, pp. 193, 2011. bibtex
    @article {bettarelli2011extensions,
      publisher = {Springer Verlag},
      year = {2011},
      pages = {193},
      journal = {Hardware-Software Co-Design for Heterogeneous Multi-Core Platforms: The Hartes Toolchain},
      title = {Extensions of the hArtes Tool Chain},
      yy = {2011},
      author = {F. Bettarelli and E. Ciavattini and A. Lattanzi and G. Beltrame and F. Ferrandi and L. Fossati and C. Pilato and D. Sciuto and R.J. Meeuws and S.A. Ostadzadeh and others} }

2010

  • F. Ferrandi, P. L. Lanzi, C. Pilato, D. Sciuto and A. Tumeo, "Ant Colony Heuristic for Mapping and Scheduling Task and Communications on Heterogeneous Embedded Systems," in IEEE Transactions on COMPUTER-AIDED DESIGN of Integrated Circuits and Systems, Vol. 29, No. 6, pp. 911-924, 2010.
    abstract bibtex
    Abstract: To exploit the power of modern heterogeneous multiprocessor embedded platforms on partitioned applications, the designer usually needs to efficiently map and schedule all the tasks and the communications of the application, respecting the constraints imposed by the target architecture. Since the problem is heavily constrained, common methods used to explore such design space usually fail, obtaining low-quality solutions. In this paper, we propose an ant colony optimization (ACO) heuristic that, given a model of the target architecture and the application, efficiently executes both scheduling and mapping to optimize the application performance. We compare our approach with several other heuristics, including simulated annealing, tabu search, and genetic algorithms, on the performance to reach the optimum value and on the potential to explore the design space. We show that our approach obtains better results than other heuristics by at least 16% on average, despite an overhead in execution time. Finally, we validate the approach by scheduling and mapping a JPEG encoder on a realistic target architecture.

    @article {TCAD2010,
      abstract = {To exploit the power of modern heterogeneous multiprocessor embedded platforms on partitioned applications, the designer usually needs to efficiently map and schedule all the tasks and the communications of the application, respecting the constraints imposed by the target architecture. Since the problem is heavily constrained, common methods used to explore such design space usually fail, obtaining low-quality solutions. In this paper, we propose an ant colony optimization (ACO) heuristic that, given a model of the target architecture and the application, efficiently executes both scheduling and mapping to optimize the application performance. We compare our approach with several other heuristics, including simulated annealing, tabu search, and genetic algorithms, on the performance to reach the optimum value and on the potential to explore the design space. We show that our approach obtains better results than other heuristics by at least 16% on average, despite an overhead in execution time. Finally, we validate the approach by scheduling and mapping a JPEG encoder on a realistic target architecture.},
      issn = {0278-0070},
      year = {2010},
      month = {June},
      pages = {911--924},
      number = {6},
      volume = {29},
      journal = {IEEE Transactions on COMPUTER-AIDED DESIGN of Integrated Circuits and Systems},
      title = {Ant Colony Heuristic for Mapping and Scheduling Task and Communications on Heterogeneous Embedded Systems},
      yy = {2010},
      mm = {6},
      author = {Fabrizio Ferrandi and Pier Luca Lanzi and Christian Pilato and Donatella Sciuto and Antonino Tumeo} }
  • C. Pilato, D. Loiacono, A. Tumeo, F. Ferrandi, P. L. Lanzi and D. Sciuto, "Speeding-Up Expensive Evaluations in High-Level Synthesis Using Solution Modeling and Fitness Inheritance," in in "Computational Intelligence in Expensive Optimization Problems". Y. Tenne and C.-K. Goh (Eds.), Springer, pp. 701-723, 2010. bibtex
    @article {Book_hls_2010,
      pages = {701--723},
      year = {2010},
      month = {February},
      journal = {in "Computational Intelligence in Expensive Optimization Problems". Y. Tenne and C.-K. Goh (Eds.), Springer},
      title = {Speeding-Up Expensive Evaluations in High-Level Synthesis Using Solution Modeling and Fitness Inheritance},
      yy = {2010},
      mm = {2},
      author = {Christian Pilato and Daniele Loiacono and Antonino Tumeo and Fabrizio Ferrandi and Pier Luca Lanzi and Donatella Sciuto} }
  • K. Bertels, V. Sima, Y. Yankova, G. Kuzmanov, W. Luk, G. Coutinho, F. Ferrandi, C. Pilato, M. Lattuada, D. Sciuto and A. Michelotti, "HArtes: Hardware-Software Codesign for Heterogeneous Multicore Platforms," in IEEE Micro, Vol. 30, pp. 88-97, 2010.
    abstract bibtex doi
    Abstract: Developing heterogeneous multicore platforms requires choosing the best hardware configuration for mapping the application, and modifying that application so that different parts execute on the most appropriate hardware component. The hArtes toolchain provides the option of automatic or semi-automatic support for this mapping. During test and validation on several computation-intensive applications, hArtes achieved substantial speedups and drastically reduced development times.
    @article {Bertels2010,
      abstract = {Developing heterogeneous multicore platforms requires choosing the best hardware configuration for mapping the application, and modifying that application so that different parts execute on the most appropriate hardware component. The hArtes toolchain provides the option of automatic or semi-automatic support for this mapping. During test and validation on several computation-intensive applications, hArtes achieved substantial speedups and drastically reduced development times.},
      keywords = {reconfigurable hardware, hardware-software interface, compiler, tool chain, hArtes, heterogeneous multicore platforms},
      address = {Los Alamitos, CA, USA},
      publisher = {IEEE Computer Society Press},
      acmid = {1916498},
      doi = {http://dx.doi.org/10.1109/MM.2010.91},
      url = {http://dx.doi.org/10.1109/MM.2010.91},
      numpages = {10},
      pages = {88--97},
      issn = {0272-1732},
      year = {2010},
      month = {September},
      issue = {5},
      volume = {30},
      issue_date = {September 2010},
      journal = {IEEE Micro},
      title = {HArtes: Hardware-Software Codesign for Heterogeneous Multicore Platforms},
      yy = {2010},
      mm = {9},
      author = {Koen Bertels and Vlad-Mihai Sima and Yana Yankova and Georgi Kuzmanov and Wayne Luk and Gabriel Coutinho and Fabrizio Ferrandi and Christian Pilato and Marco Lattuada and Donatella Sciuto and Andrea Michelotti} }

2008

  • C. Pilato, A. Tumeo, G. Palermo, F. Ferrandi, P. L. Lanzi and D. Sciuto, "Improving Evolutionary Exploration to Area-Time Optimization of FPGA Designs," in Journal of Systems Architecture - Embedded Systems Design, Vol. 54, No. 11, pp. 1046-1057, 2008.
    abstract bibtex
    Abstract: This paper presents a new methodology based on evolutionary multi-objective optimization (EMO) to synthesize multiple complex modules on reprogrammable devices. It starts from a behavioral description written in a common high-level language (for instance C) to automatically produce the register-transfer level (RTL) design in a hardware description language (e.g. Verilog). Since all high-level synthesis problems (scheduling, resource allocation and binding) are notoriously NP-complete and interdependent, these problems should be considered simultaneously. This drives to a wide design space, that needs to be thoroughly explored to obtain solutions able to satisfy the design constraints (e.g. area and performance). Since evolutionary algorithms are good candidates to tackle such complex explorations, in this paper we provide a solution based on the Non-dominated Sorting Genetic Algorithm (NSGA-II) to explore the design space and obtain the best solutions in terms of performance given the area constraints of a target reprogrammable device, for instance a Field Programmable Gate Array (FPGA). To further reduce the time needed for the exploration, that theoretically requires the complete logic synthesis of each visited point, the evaluation of the solutions have been speed-up by using two techniques: a good cost estimation model and a technique to exploit fitness inheritance by substituting the expensive actual evaluations with estimations based on closeness in an hypothetical design space. We show on the JPEG case study that the proposed approach provides good results in terms of trade-off between total area occupied and execution time. The results shows also that the Pareto-optimal set obtained by applying the proposed fitness inheritance model well approximates the set obtained without the proposed technique and reduces the overall execution time up to the 25% in average.

    @article {PilatoJSA2008,
      abstract = {This paper presents a new methodology based on evolutionary multi-objective optimization (EMO) to synthesize multiple complex modules on reprogrammable devices. It starts from a behavioral description written in a common high-level language (for instance C) to automatically produce the register-transfer level (RTL) design in a hardware description language (e.g. Verilog). Since all high-level synthesis problems (scheduling, resource allocation and binding) are notoriously NP-complete and interdependent, these problems should be considered simultaneously. This drives to a wide design space, that needs to be thoroughly explored to obtain solutions able to satisfy the design constraints (e.g. area and performance). Since evolutionary algorithms are good candidates to tackle such complex explorations, in this paper we provide a solution based on the Non-dominated Sorting Genetic Algorithm (NSGA-II) to explore the design space and obtain the best solutions in terms of performance given the area constraints of a target reprogrammable device, for instance a Field Programmable Gate Array (FPGA). To further reduce the time needed for the exploration, that theoretically requires the complete logic synthesis of each visited point, the evaluation of the solutions have been speed-up by using two techniques: a good cost estimation model and a technique to exploit fitness inheritance by substituting the expensive actual evaluations with estimations based on closeness in an hypothetical design space. We show on the JPEG case study that the proposed approach provides good results in terms of trade-off between total area occupied and execution time. The results shows also that the Pareto-optimal set obtained by applying the proposed fitness inheritance model well approximates the set obtained without the proposed technique and reduces the overall execution time up to the 25% in average.},
      year = {2008},
      pages = {1046-1057},
      number = {11},
      volume = {54},
      journal = {Journal of Systems Architecture - Embedded Systems Design},
      title = {Improving Evolutionary Exploration to Area-Time Optimization of {FPGA} Designs},
      yy = {2008},
      author = {Christian Pilato and Antonino Tumeo and Gianluca Palermo and Fabrizio Ferrandi and Pier Luca Lanzi and Donatella Sciuto} }

International/National Conferences

2012

  • M. Lattuada and F. Ferrandi, "Performance estimation of embedded software with confidence levels," in Design Automation Conference (ASP-DAC), 2012 17th Asia and South Pacific, pp. 573-578, IEEE, 2012. bibtex
    @inproceedings {lattuada2012performance,
      organization = {IEEE},
      year = {2012},
      pages = {573--578},
      booktitle = {Design Automation Conference (ASP-DAC), 2012 17th Asia and South Pacific},
      title = {Performance estimation of embedded software with confidence levels},
      yy = {2012},
      author = {M. Lattuada and F. Ferrandi} }

2011

  • M. Elhoj, A. Reis, R. Ribas, F. Ferrandi, C. Pilato, F. Moll, M. Miranda, P. Dobrovoln'y, N. Woolaway, A. Grasset, P. Bonnot, G. Desoli and D. Pandini, "SYNAPTIC Project: Regularity Applied to Enhance Manufacturability and Yield at Several Abstraction Levels," in In Proceedings of the 1st Exploiting Regularity in the Design of IPs, Architectures and Platforms Workshop, (ERDIAP '11), pp. 189-192, 2011.
    abstract bibtex
    Abstract: In this paper, we describe a project to enhance manufacturability at several abstraction levels. The project targets several different abstraction levels seen through a design flow targeting regular approaches. The project intends to verify the role of applying regularity at different levels compared to a golden design flow used as reference. The SYNAPTIC project will span for three years involving eight different institutions, and this paper describes the intended goals.

    @inproceedings {ERDIAP2011,
      mm = {2},
      yy = {2011},
      month = {February},
      year = {2011},
      title = {SYNAPTIC Project: Regularity Applied to Enhance Manufacturability and Yield at Several Abstraction Levels},
      editor = {Dimitrios Soudris and {Wolfgang Karl}},
      booktitle = {In Proceedings of the 1st Exploiting Regularity in the Design of IPs, Architectures and Platforms Workshop, {(ERDIAP '11)}},
      pages = {189--192},
      numpages = {4},
      publisher = {VDE Verlag},
      doi = {},
      isbn = {978-3-8007-3333-0},
      abstract = {In this paper, we describe a project to enhance manufacturability at several abstraction levels. The project targets several different abstraction levels seen through a design flow targeting regular approaches. The project intends to verify the role of applying regularity at different levels compared to a golden design flow used as reference. The SYNAPTIC project will span for three years involving eight different institutions, and this paper describes the intended goals.},
      author = {M. Elh{o}j and A. Reis and R. Ribas and F. Ferrandi and C. Pilato and F. Moll and M. Miranda and P. Dobrovoln{'y} and N. Woolaway and A. Grasset and P. Bonnot and G. Desoli and D. Pandini} }
  • C. Pilato, F. Ferrandi and D. Pandini, "Evaluating Static CMOS Complex Cells in Technology Mapping," in In Proceedings of the 1st Exploiting Regularity in the Design of IPs, Architectures and Platforms Workshop, (ERDIAP '11), pp. 222-229, 2011.
    abstract bibtex
    Abstract: Current EDA tools are often based on standard-cell libraries for the design of modern complex systems-on-chip. In general, the composition of such libraries does not follow a fixed rule, but it is mainly based on the experience of the chip foundries. They compact or extend the standard cell libraries by removing or adding certain implementations, respectively, in order to optimize specific goals (e.g., area, timing or power consumption) or a specific set of designs. In this paper, we define and present a comprehensive study about the effects of using static CMOS complex gates in technology mapping. The impact of such cells has been evaluated on several benchmarks usually adopted in logic synthesis targeting a 45mm technology with Synopsis Design Compiler.

    @inproceedings {ERDIAP2011d,
      mm = {2},
      yy = {2011},
      month = {February},
      year = {2011},
      title = {Evaluating Static CMOS Complex Cells in Technology Mapping},
      editor = {Dimitrios Soudris and {Wolfgang Karl}},
      booktitle = {In Proceedings of the 1st Exploiting Regularity in the Design of IPs, Architectures and Platforms Workshop, {(ERDIAP '11)}},
      pages = {222--229},
      numpages = {8},
      publisher = {VDE Verlag},
      doi = {},
      isbn = {978-3-8007-3333-0},
      abstract = {Current EDA tools are often based on standard-cell libraries for the design of modern complex systems-on-chip. In general, the composition of such libraries does not follow a fixed rule, but it is mainly based on the experience of the chip foundries. They compact or extend the standard cell libraries by removing or adding certain implementations, respectively, in order to optimize specific goals (e.g., area, timing or power consumption) or a specific set of designs. In this paper, we define and present a comprehensive study about the effects of using static CMOS complex gates in technology mapping. The impact of such cells has been evaluated on several benchmarks usually adopted in logic synthesis targeting a 45mm technology with Synopsis Design Compiler.},
      author = {Christian Pilato and Fabrizio Ferrandi and Davide Pandini} }
  • C. Pilato, F. Ferrandi and D. Pandini, "A design methodology for the automatic sizing of standard-cell libraries," in Proceedings of the 21st ACM Great Lakes Symposium on VLSI, pp. 151-156, 2011. bibtex doi
    @inproceedings {PilatoFP11,
      bibsource = {DBLP, http://dblp.uni-trier.de},
      editor = {David Atienza and Yuan Xie and Jos{'e} L. Ayala and Ken S. Stevens},
      publisher = {ACM},
      isbn = {978-1-4503-0667-6},
      doi = {http://doi.acm.org/10.1145/1973009.1973040},
      pages = {151-156},
      year = {2011},
      booktitle = {Proceedings of the 21st ACM Great Lakes Symposium on VLSI},
      title = {A design methodology for the automatic sizing of standard-cell libraries},
      yy = {2011},
      author = {Christian Pilato and Fabrizio Ferrandi and Davide Pandini} }
  • C. Pilato, V. Castellana, S. Lovergine and F. Ferrandi, "A runtime adaptive controller for supporting hardware components with variable latency," in Adaptive Hardware and Systems (AHS), 2011 NASA/ESA Conference on, pp. 153-160, IEEE, 2011. bibtex
    @inproceedings {pilato2011runtime,
      organization = {IEEE},
      year = {2011},
      pages = {153--160},
      booktitle = {Adaptive Hardware and Systems (AHS), 2011 NASA/ESA Conference on},
      title = {A runtime adaptive controller for supporting hardware components with variable latency},
      yy = {2011},
      author = {C. Pilato and V.G. Castellana and S. Lovergine and F. Ferrandi} }
  • C. Pilato, F. Ferrandi and D. Sciuto, "A design methodology to implement memory accesses in High-Level Synthesis," in Hardware/Software Codesign and System Synthesis (CODES+ ISSS), 2011 Proceedings of the 9th International Conference on, pp. 49-58, IEEE, 2011. bibtex
    @inproceedings {pilato2011design,
      organization = {IEEE},
      year = {2011},
      pages = {49--58},
      booktitle = {Hardware/Software Codesign and System Synthesis (CODES+ ISSS), 2011 Proceedings of the 9th International Conference on},
      title = {A design methodology to implement memory accesses in High-Level Synthesis},
      yy = {2011},
      author = {C. Pilato and F. Ferrandi and D. Sciuto} }

2010

  • C. Pilato, F. Ferrandi and D. Pandini, "A Fast Heuristic for Extending Standard Cell Libraries with Regular Macro Cells," in Proc. ISVLSI 2010: Proceedings of the IEEE Computer Society Annual Symposium on VLSI, 2010. bibtex
    @inproceedings {isvlsi2010,
      year = {2010},
      location = {Lixouri, Kefalonia, Greece},
      booktitle = {Proc. ISVLSI 2010: Proceedings of the IEEE Computer Society Annual Symposium on VLSI},
      title = {A Fast Heuristic for Extending Standard Cell Libraries with Regular Macro Cells},
      yy = {2010},
      author = {Christian Pilato and Fabrizio Ferrandi and Davide Pandini} }
  • C. Pilato, F. Ferrandi and D. Sciuto, "A Design Exploration Framework for Mapping and Scheduling onto Heterogeneous MPSoCs," in Workshop on Mapping Applications to MPSoCs 2010, 2010. bibtex
    @inproceedings {Pilato2010,
      location = {St. Goar, Germany},
      year = {2010},
      booktitle = {Workshop on Mapping Applications to MPSoCs 2010},
      title = {A Design Exploration Framework for Mapping and Scheduling onto Heterogeneous {MPSoCs}},
      yy = {2010},
      author = {Christian Pilato and Fabrizio Ferrandi and Donatella Sciuto} }
  • F. Ferrandi, M. Lattuada, C. Pilato and D. Sciuto, "Performance Estimation for Mapping and Scheduling Parallel Applications on Heterogeneous Multi-Processor Systems," in Workshop on The European landscape of reconfigurable computing: Lessons learned, new perspectives and innovations, 2010. bibtex
    @inproceedings {FERRANDI2010:DATE,
      location = {held during DATE '10, Dresden, Germany},
      year = {2010},
      booktitle = {Workshop on The European landscape of reconfigurable computing: Lessons learned, new perspectives and innovations},
      title = {Performance Estimation for Mapping and Scheduling Parallel Applications on Heterogeneous Multi-Processor Systems},
      yy = {2010},
      author = {Fabrizio Ferrandi and Marco Lattuada and Christian Pilato and Donatella Sciuto} }
  • F. Ferrandi, C. Pilato, D. Sciuto and A. Tumeo, "Mapping and Scheduling of Parallel C Applications with Ant Colony Optimization onto Heterogeneous Reconfigurable MPSoCs," in Proc. ASPDAC 2010: Proceedings of the IEEE Asia and South Pacific Design Automation Conference, pp. 799-804, 2010. bibtex
    @inproceedings {Aspdac2010,
      year = {2010},
      location = {Taipei, Taiwan},
      pages = {799--804},
      booktitle = {Proc. ASPDAC 2010: Proceedings of the IEEE Asia and South Pacific Design Automation Conference},
      title = {Mapping and Scheduling of Parallel C Applications with Ant Colony Optimization onto Heterogeneous Reconfigurable MPSoCs},
      yy = {2010},
      author = {Fabrizio Ferrandi and Christian Pilato and Donatella Sciuto and Antonino Tumeo} }
  • M. Lattuada and F. Ferrandi, "Performance modeling of embedded applications with zero architectural knowledge," in Proceedings of the eighth IEEE/ACM/IFIP international conference on Hardware/software codesign and system synthesis, CODES/ISSS '10 series, pp. 277-286, 2010.
    abstract bibtex doi
    Abstract: Performance estimation is a key step in the development of an embedded system. Normally, the performance evaluation is performed using a simulator or a performance mathematical model of the target architecture. However, both these approaches are usually based on the knowledge of the architectural details of the target. In this paper we present a methodology for automatically building an analytical model to estimate the performance of an application on a generic processor without requiring any information about the processor architecture but the one provided by the GNU GCC Intermediate Representation. The proposed methodology exploits the linear regression technique based on an application analysis performed on the Register Transfer Level internal representation of the GNU GCC compiler. The benefits of working with this type of model and with this intermediate representation are three: we take into account most of the compiler optimizations, we implicitly consider some architectural characteristics of the target processor and we can easily estimate the performance of portions of the specification. We validate our approach by evaluating with cross-validation technique the accuracy and the generality of the performance models built for the ARM926EJ-S and the LEON3 processors
    @inproceedings {Lattuada2010a,
      abstract = {Performance estimation is a key step in the development of an embedded system. Normally, the performance evaluation is performed using a simulator or a performance mathematical model of the target architecture. However, both these approaches are usually based on the knowledge of the architectural details of the target. In this paper we present a methodology for automatically building an analytical model to estimate the performance of an application on a generic processor without requiring any information about the processor architecture but the one provided by the GNU GCC Intermediate Representation. The proposed methodology exploits the linear regression technique based on an application analysis performed on the Register Transfer Level internal representation of the GNU GCC compiler. The benefits of working with this type of model and with this intermediate representation are three: we take into account most of the compiler optimizations, we implicitly consider some architectural characteristics of the target processor and we can easily estimate the performance of portions of the specification. We validate our approach by evaluating with cross-validation technique the accuracy and the generality of the performance models built for the ARM926EJ-S and the LEON3 processors},
      keywords = {gnu gcc, performance estimation, profiling},
      address = {New York, NY, USA},
      publisher = {ACM},
      acmid = {1879010},
      doi = {http://doi.acm.org/10.1145/1878961.1879010},
      url = {http://doi.acm.org/10.1145/1878961.1879010},
      numpages = {10},
      pages = {277--286},
      location = {Scottsdale, Arizona, USA},
      isbn = {978-1-60558-905-3},
      year = {2010},
      series = {CODES/ISSS '10},
      booktitle = {Proceedings of the eighth IEEE/ACM/IFIP international conference on Hardware/software codesign and system synthesis},
      title = {Performance modeling of embedded applications with zero architectural knowledge},
      yy = {2010},
      author = {Marco Lattuada and Fabrizio Ferrandi} }
  • M. Lattuada and F. Ferrandi, "Combining Target-independent Analysis with Dynamic Profiling to Build the Performance Model of a DSP," in Proceedings of the 2010 10th IEEE International Conference on Computer and Information Technology, CIT '10 series, pp. 1895-1901, 2010.
    abstract bibtex doi
    Abstract: Fast and accurate performance estimation is a key aspect of heterogeneous embedded systems design flow, since cycle-accurate simulators, when exist, are usually too slow to be used during design space exploration. Performance estimation techniques are usually based on combination of estimation of the single processing elements which compose the system. Architectural characteristics of Digital Signal Processors (DSP), such as the presence of Single Instruction Multiple Data operations or of special hardware units to control loop executions, introduce peculiar aspects in the performance estimation problem. In this paper we present a methodology to estimate the performance of a function on a given dataset on a DSP. Estimation is performed combining the host profiling data with the function GNU GCC GIMPLE representation. Starting from the results of this analysis, we build a performance model of a DSP by exploiting the Linear Regression Technique. Use of GIMPLE representation allows to take directly into account the target-independent optimizations performed by the DSP compiler. We validate our approach by building a performance model of the MagicV DSP and by testing the model on a set of significative benchmarks.
    @inproceedings {Lattuada2010b,
      abstract = {Fast and accurate performance estimation is a key aspect of heterogeneous embedded systems design flow, since cycle-accurate simulators, when exist, are usually too slow to be used during design space exploration. Performance estimation techniques are usually based on combination of estimation of the single processing elements which compose the system. Architectural characteristics of Digital Signal Processors (DSP), such as the presence of Single Instruction Multiple Data operations or of special hardware units to control loop executions, introduce peculiar aspects in the performance estimation problem. In this paper we present a methodology to estimate the performance of a function on a given dataset on a DSP. Estimation is performed combining the host profiling data with the function GNU GCC GIMPLE representation. Starting from the results of this analysis, we build a performance model of a DSP by exploiting the Linear Regression Technique. Use of GIMPLE representation allows to take directly into account the target-independent optimizations performed by the DSP compiler. We validate our approach by building a performance model of the MagicV DSP and by testing the model on a set of significative benchmarks.},
      keywords = {DSP, performance estimation, linear regression},
      address = {Washington, DC, USA},
      publisher = {IEEE Computer Society},
      acmid = {1901146},
      doi = {http://dx.doi.org/10.1109/CIT.2010.324},
      url = {http://dx.doi.org/10.1109/CIT.2010.324},
      numpages = {7},
      pages = {1895--1901},
      isbn = {978-0-7695-4108-2},
      year = {2010},
      series = {CIT '10},
      booktitle = {Proceedings of the 2010 10th IEEE International Conference on Computer and Information Technology},
      title = {Combining Target-independent Analysis with Dynamic Profiling to Build the Performance Model of a {DSP}},
      yy = {2010},
      author = {Marco Lattuada and Fabrizio Ferrandi} }
  • M. Lattuada and F. Ferrandi, "Fine grain analysis of simulators accuracy for calibrating performance models," in Rapid System Prototyping (RSP), 2010 21st IEEE International Symposium on, pp. 1 -7, 2010.
    abstract bibtex doi
    Abstract: In embedded system design, the tuning and validation of a cycle accurate simulator is a difficult task. The designer has to assure that the estimation error of the simulator meets the design constraints on every application. If an application is not correctly estimated, the designer has to identify on which parts of the application the simulator introduces an estimation error and consequently fix the simulator. However, detecting which are the mispredicted parts of a very large application can be a difficult process which requires a lot of time. In this paper we propose a methodology which helps the designer to fast and automatically isolate the portions of the application mispredicted by a simulator. This is accomplished by recursively analyzing the application source code trace highlighting the mispredicted sections of source code. The results obtained applying the methodology to the TSIM simulator show how our methodology is able to fast analyze large applications isolating small portions of mispredicted code.
    @inproceedings {Lattuada2010c,
      issn = {},
      doi = {10.1109/RSP.2010.5656414},
      keywords = {TSIM simulator;code misprediction;cycle accurate simulator;embedded system design;estimation error;fine grain analysis;performance model calibration;source code trace;embedded systems;iterative methods;multiprocessing systems;performance evaluation;recursive estimation;source coding;},
      abstract = {In embedded system design, the tuning and validation of a cycle accurate simulator is a difficult task. The designer has to assure that the estimation error of the simulator meets the design constraints on every application. If an application is not correctly estimated, the designer has to identify on which parts of the application the simulator introduces an estimation error and consequently fix the simulator. However, detecting which are the mispredicted parts of a very large application can be a difficult process which requires a lot of time. In this paper we propose a methodology which helps the designer to fast and automatically isolate the portions of the application mispredicted by a simulator. This is accomplished by recursively analyzing the application source code trace highlighting the mispredicted sections of source code. The results obtained applying the methodology to the TSIM simulator show how our methodology is able to fast analyze large applications isolating small portions of mispredicted code.},
      pages = {1 -7},
      number = {},
      volume = {},
      month = {june},
      year = {2010},
      title = {Fine grain analysis of simulators accuracy for calibrating performance models},
      booktitle = {Rapid System Prototyping (RSP), 2010 21st IEEE International Symposium on},
      yy = {2010},
      mm = {6},
      author = {Marco Lattuada and Ferrandi Ferrandi} }

  • K. Bertels, F. Bettarelli, S. Cecchi, E. Ciavattini, J. De Figueiredo Coutinho, F. Ferrandi, W. Luk, F. Piazza, C. Pilato, A. Primavera, V. Sima and R. Toppi, "The hArtes CarLab: A New Approach to Advanced Algorithms Development for Automotive Audio," in Audio Engineering Society Convention 129, 2010. bibtex doi
    @conference {bertels2010the,
      url = {http://www.aes.org/e-lib/browse.cfm?elib=15605},
      year = {2010},
      month = {11},
      booktitle = {Audio Engineering Society Convention 129},
      title = {The hArtes CarLab: A New Approach to Advanced Algorithms Development for Automotive Audio},
      yy = {2010},
      mm = {0},
      author = {Koen Bertels and Ferruccio Bettarelli and Stefania Cecchi and Emanuele Ciavattini and Jose De Figueiredo Coutinho and Fabrizio Ferrandi and Wayne Luk and Francesco Piazza and Christian Pilato and Andrea Primavera and Vlad Sima and Romolo Toppi} }

2009

  • A. Tumeo, M. Branca, L. Camerini, C. Pilato, P. L. Lanzi, F. Ferrandi and D. Sciuto, "Mapping pipelined applications onto heterogeneous embedded systems: a bayesian optimization algorithm based approach," in Proc. CODES+ISSS '09: Proceedings of the 7th IEEE/ACM international conference on Hardware/software codesign and system synthesis, pp. 443-452, 2009. bibtex
    @inproceedings {Codes2009,
      year = {2009},
      location = {Grenoble, France},
      pages = {443--452},
      booktitle = {Proc. CODES+ISSS '09: Proceedings of the 7th IEEE/ACM international conference on Hardware/software codesign and system synthesis},
      title = {Mapping pipelined applications onto heterogeneous embedded systems: a bayesian optimization algorithm based approach},
      yy = {2009},
      author = {Antonino Tumeo and Marco Branca and Lorenzo Camerini and Christian Pilato and Pier Luca Lanzi and Fabrizio Ferrandi and Donatella Sciuto} }
  • M. Branca, L. Camerini, F. Ferrandi, P. L. Lanzi, C. Pilato, D. Sciuto and A. Tumeo, "Evolutionary algorithms for the mapping of pipelined applications onto heterogeneous embedded systems," in Proc. GECCO '09: Proceedings of the 11th Annual conference on Genetic and evolutionary computation, pp. 1435-1442, 2009. bibtex
    @inproceedings {Gecco2009,
      year = {2009},
      location = {Montreal, Qu'{e}bec, Canada},
      pages = {1435--1442},
      booktitle = {Proc. GECCO '09: Proceedings of the 11th Annual conference on Genetic and evolutionary computation},
      title = {Evolutionary algorithms for the mapping of pipelined applications onto heterogeneous embedded systems},
      yy = {2009},
      author = {Marco Branca and Lorenzo Camerini and Fabrizio Ferrandi and Pier Luca Lanzi and Christian Pilato and Donatella Sciuto and Antonino Tumeo} }
  • M. Rashid, F. Ferrandi and K. Bertels, "hArtes design flow for heterogeneous platforms," in 10th International Symposium on Quality of Electronic Design (ISQED 2009), pp. 330-338, 2009.
    abstract bibtex doi
    Abstract: The hArtes -Holistic Approach to Reconfigurable real Time Embedded Systems- design flow addresses the development of an holistic tool-chain for reconfigurable heterogeneous platforms. The entire tool-chain consists of three phases: Algorithm Exploration and Translation, Design Space Exploration and System Synthesis.
    @inproceedings {RashidFB09,
      abstract = {The hArtes -Holistic Approach to Reconfigurable real Time Embedded Systems- design flow addresses the development of an holistic tool-chain for reconfigurable heterogeneous platforms. The entire tool-chain consists of three phases: Algorithm Exploration and Translation, Design Space Exploration and System Synthesis.},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      ee = {http://dx.doi.org/10.1109/ISQED.2009.4810316},
      pages = {330-338},
      year = {2009},
      publisher = {IEEE},
      booktitle = {10th International Symposium on Quality of Electronic Design (ISQED 2009)},
      title = {hArtes design flow for heterogeneous platforms},
      yy = {2009},
      author = {Muhammad Rashid and Fabrizio Ferrandi and Koen Bertels} }
  • F. Ferrandi, M. Lattuada, C. Pilato and A. Tumeo, "Performance estimation for task graphs combining sequential path profiling and control dependence regions," in MEMOCODE'09: Proceedings of the 7th IEEE/ACM international conference on Formal Methods and Models for Codesign, pp. 131-140, 2009.
    abstract bibtex
    Abstract: The speed-up estimation of parallelized code is crucial to efficiently compare different parallelization techniques or task graph transformations. Unfortunately, most of the time, during the parallelization of a specification, the information that can be extracted by profiling the corresponding sequential code (e.g. the most executed paths) are not properly taken into account. In particular, correlating sequential path profiling with the corresponding parallelized code can help in the identification of code hot spots, opening new possibilities for automatic parallelization. For this reason, starting from a well-known profiling technique, the Efficient Path Profiling, we propose a methodology that estimates the speed-up of a parallelized specification, just using the corresponding hierarchical task graph representation and the information coming from the dynamic profiling of the initial sequential specification. Experimental results show that the proposed solution outperforms existing approaches.

    @inproceedings {Ferrandi2009,
      abstract = {The speed-up estimation of parallelized code is crucial to efficiently compare different parallelization techniques or task graph transformations. Unfortunately, most of the time, during the parallelization of a specification, the information that can be extracted by profiling the corresponding sequential code (e.g. the most executed paths) are not properly taken into account. In particular, correlating sequential path profiling with the corresponding parallelized code can help in the identification of code hot spots, opening new possibilities for automatic parallelization. For this reason, starting from a well-known profiling technique, the Efficient Path Profiling, we propose a methodology that estimates the speed-up of a parallelized specification, just using the corresponding hierarchical task graph representation and the information coming from the dynamic profiling of the initial sequential specification. Experimental results show that the proposed solution outperforms existing approaches.},
      address = {Piscataway, NJ, USA},
      publisher = {IEEE Press},
      location = {Cambridge, Massachusetts},
      pages = {131--140},
      isbn = {978-1-4244-4806-7},
      year = {2009},
      booktitle = {MEMOCODE'09: Proceedings of the 7th IEEE/ACM international conference on Formal Methods and Models for Codesign},
      title = {Performance estimation for task graphs combining sequential path profiling and control dependence regions},
      yy = {2009},
      author = {Fabrizio Ferrandi and Marco Lattuada and Christian Pilato and Antonino Tumeo} }
  • F. Ferrandi, M. Lattuada, C. Pilato and A. Tumeo, "Performance Modeling of Parallel Applications on MPSoCs," in IEEE International Symposium on System-on-Chip 2009 (SOC 2009), pp. 64-67, 2009.
    abstract bibtex
    Abstract: In this paper we present a new technique for automatically measuring the performance of tasks, functions or arbitrary parts of a program on a multiprocessor embedded system. The technique instruments the tasks described by OpenMP, used to represent the task parallelism, while ad hoc pragmas in the source indicate other pieces of code to profile. The annotations and the instrumentation are completely target-independent, so the same code can be measured on different target architectures, on simulators or on prototypes. We validate the approach on a single and on a dual LEON 3 platform synthesized on FPGA, demonstrating a low instrumentation overhead. We show how the information obtained with this technique can be easily exploited in a Hardware/Software design space exploration tool, by estimating, with good accuracy, the speed-up of a parallel application given the profiling on the single processor prototype.

    @inproceedings {SOC2009,
      abstract = {In this paper we present a new technique for automatically measuring the performance of tasks, functions or arbitrary parts of a program on a multiprocessor embedded system. The technique instruments the tasks described by OpenMP, used to represent the task parallelism, while ad hoc pragmas in the source indicate other pieces of code to profile. The annotations and the instrumentation are completely target-independent, so the same code can be measured on different target architectures, on simulators or on prototypes. We validate the approach on a single and on a dual LEON 3 platform synthesized on FPGA, demonstrating a low instrumentation overhead. We show how the information obtained with this technique can be easily exploited in a Hardware/Software design space exploration tool, by estimating, with good accuracy, the speed-up of a parallel application given the profiling on the single processor prototype.},
      address = {Piscataway, NJ, USA},
      publisher = {IEEE Press},
      location = {Tampere, Finland},
      pages = {64--67},
      isbn = {978-1-4244-4466-3},
      year = {2009},
      booktitle = {IEEE International Symposium on System-on-Chip 2009 (SOC 2009)},
      title = {Performance Modeling of Parallel Applications on MPSoCs},
      yy = {2009},
      author = {Fabrizio Ferrandi and Marco Lattuada and Christian Pilato and Antonino Tumeo} }

2008

  • C. Pilato, D. Loiacono, F. Ferrandi, P. Luca Lanzi and D. Sciuto, "High-level Synthesis with Multi-objective Genetic Algorithm: a Comparative Encoding Analysis," in Proc. IEEE CEC 2008 - Congress on Evolutionary Computation, pp. 3333-3340, 2008.
    abstract bibtex
    Abstract: This paper presents a comparative analysis between the different encodings that can be used to perform design space exploration for high-level synthesis with a genetic algorithm. In our previous works, we introduced the use of a multi-objective genetic algorithm, exploiting on a binding-based encoding, to tackle the complexity of such design space. In fact, high-level synthesis is a complex problem, composed of different sub-tasks that are notoriously NP-complete and interdependent each other. Moreover, the design objectives (e.g. area and performance) are in conflict for nature. In this paper we implement a different encoding, based on scheduling priorities, and we compare the results that can be obtained with the use of the two different representations. We show that the obtained methodology can be effectively combined with different solution encodings and it systematically outperforms the traditional methodologies for high-level synthesis with both of them. We also introduce a third encoding, called mixed, combining them in a single one. We show that the obtained encoding has difficulties to tackle the larger design space and it needs further investigations.

    @inproceedings {PILATO2008:cec,
      abstract = {This paper presents a comparative analysis between the different encodings that can be used to perform design space exploration for high-level synthesis with a genetic algorithm. In our previous works, we introduced the use of a multi-objective genetic algorithm, exploiting on a binding-based encoding, to tackle the complexity of such design space. In fact, high-level synthesis is a complex problem, composed of different sub-tasks that are notoriously NP-complete and interdependent each other. Moreover, the design objectives (e.g. area and performance) are in conflict for nature. In this paper we implement a different encoding, based on scheduling priorities, and we compare the results that can be obtained with the use of the two different representations. We show that the obtained methodology can be effectively combined with different solution encodings and it systematically outperforms the traditional methodologies for high-level synthesis with both of them. We also introduce a third encoding, called mixed, combining them in a single one. We show that the obtained encoding has difficulties to tackle the larger design space and it needs further investigations.},
      location = {Hong Kong, China},
      year = {2008},
      month = {June 1-6},
      pages = {3333-3340},
      booktitle = {Proc. IEEE CEC 2008 - Congress on Evolutionary Computation},
      title = {High-level Synthesis with Multi-objective Genetic Algorithm: a Comparative Encoding Analysis},
      yy = {2008},
      mm = {0},
      author = {Christian Pilato and Daniele Loiacono and Fabrizio Ferrandi and Pier Luca Lanzi and Donatella Sciuto} }
  • C. Pilato, D. Loiacono, F. Ferrandi, P. Luca Lanzi and D. Sciuto, "A Multi-Objective Genetic Algorithm for Design Space Exploration in High-Level Synthesis," in Proc. ISVLSI 2008 - IEEE Computer Society Annual Symposium on VLSI, pp. 417-422, 2008.
    abstract bibtex
    Abstract: This paper presents a methodology for design space exploration (DSE) in high-level synthesis (HLS), based on a multi-objective genetic algorithm. Since all high-level synthesis sub-tasks are notoriously NP-complete and interde- pendent and the design objectives are in conflict for nature, most of the already proposed approaches are not efficient in the exploration of this design space and not effective in the identification of different trade-offs. For these reasons, evolutionary algorithms can be considered as good candidates to tackle such difficult explorations. Therefore, we will compare our proposed approach, using different solution encoding, with a publicly available HLS framework and we will show that this approach is able to obtain better optimization results, with respect to the design objectives (latency and area have been considered for optimization), in most of situations and our proposed encoding better approaches the situations when multi-modal functional units (e.g. Arithmetic Logic Units) could be used in the final design solutions.

    @inproceedings {PILATO2008:isvlsi,
      abstract = {This paper presents a methodology for design space exploration (DSE) in high-level synthesis (HLS), based on a multi-objective genetic algorithm. Since all high-level synthesis sub-tasks are notoriously NP-complete and interde- pendent and the design objectives are in conflict for nature, most of the already proposed approaches are not efficient in the exploration of this design space and not effective in the identification of different trade-offs. For these reasons, evolutionary algorithms can be considered as good candidates to tackle such difficult explorations. Therefore, we will compare our proposed approach, using different solution encoding, with a publicly available HLS framework and we will show that this approach is able to obtain better optimization results, with respect to the design objectives (latency and area have been considered for optimization), in most of situations and our proposed encoding better approaches the situations when multi-modal functional units (e.g. Arithmetic Logic Units) could be used in the final design solutions.},
      location = {Montpellier, France},
      year = {2008},
      month = {April 7-9},
      pages = {417-422},
      booktitle = {Proc. ISVLSI 2008 - IEEE Computer Society Annual Symposium on VLSI},
      title = {A Multi-Objective Genetic Algorithm for Design Space Exploration in High-Level Synthesis},
      yy = {2008},
      mm = {0},
      author = {Christian Pilato and Daniele Loiacono and Fabrizio Ferrandi and Pier Luca Lanzi and Donatella Sciuto} }
  • A. Tumeo, C. Pilato, F. Ferrandi, P. L. Lanzi and D. Sciuto, "Ant Colony Optimization for Mapping and Scheduling in Heterogeneous Multiprocessor Systems," in Proc. IEEE IC-SAMOS 2008 - Int. Conf. SAMOS VIII: Embedded Computer Systems: Architectures, MOdeling, and Simulation (IC-SAMOS 2008), pp. 142-149, 2008.
    abstract bibtex
    Abstract: Heterogeneous multiprocessor systems, assembled with off-the-shelf processors and augmented with reprogrammable devices, thanks to their performance, cost effectiveness and flexibility, have become a standard platform for embedded systems. To fully exploit the computational power offered by these systems, great care should be taken when deciding on which processing element (mapping) and when (scheduling) executing the program tasks. Unfortunately, both these problems are NP-complete, and, even if they are strictly interconnected, they are normally performed separately with exact or heuristic algorithms to simplify the search for the optimum points. In this paper we present an exploration algorithm based on Ant Colony Optimization (ACO) that tries to solve the two problems simultaneously. We propose an implementation of the algorithm that gradually constructs feasible solution instances and searches around them rather than exploring a structure that already considers all the possible solutions. We introduce a two-stage decision mechanism that simplifies the data structures but lets the ant perform correlated choices for both the mapping and the scheduling. We show that this algorithm provides better and more robust solutions in less time than the Simulated Annealing and the Tabu Search algorithms, extended to support the combined scheduling and mapping problems. In particular, our ACO formulation can find, on average, solutions between 64% and 55% better than Simulated Annealing and Tabu Search.

    @inproceedings {TumeoSamos2008,
      abstract = {Heterogeneous multiprocessor systems, assembled with off-the-shelf processors and augmented with reprogrammable devices, thanks to their performance, cost effectiveness and flexibility, have become a standard platform for embedded systems. To fully exploit the computational power offered by these systems, great care should be taken when deciding on which processing element (mapping) and when (scheduling) executing the program tasks. Unfortunately, both these problems are NP-complete, and, even if they are strictly interconnected, they are normally performed separately with exact or heuristic algorithms to simplify the search for the optimum points. In this paper we present an exploration algorithm based on Ant Colony Optimization (ACO) that tries to solve the two problems simultaneously. We propose an implementation of the algorithm that gradually constructs feasible solution instances and searches around them rather than exploring a structure that already considers all the possible solutions. We introduce a two-stage decision mechanism that simplifies the data structures but lets the ant perform correlated choices for both the mapping and the scheduling. We show that this algorithm provides better and more robust solutions in less time than the Simulated Annealing and the Tabu Search algorithms, extended to support the combined scheduling and mapping problems. In particular, our ACO formulation can find, on average, solutions between 64% and 55% better than Simulated Annealing and Tabu Search.},
      location = {Samos, Greece},
      year = {2008},
      month = {July 21-24},
      pages = {142-149},
      booktitle = {Proc. IEEE IC-SAMOS 2008 - Int. Conf. SAMOS VIII: Embedded Computer Systems: Architectures, MOdeling, and Simulation (IC-SAMOS 2008)},
      title = {Ant Colony Optimization for Mapping and Scheduling in Heterogeneous Multiprocessor Systems},
      yy = {2008},
      mm = {0},
      author = {Antonino Tumeo and Christian Pilato and Fabrizio Ferrandi and Pier Luca Lanzi and Donatella Sciuto} }

2007

  • F. Ferrandi, L. Fossati, M. Lattuada, G. Palermo, D. Sciuto and A. Tumeo, "Automatic parallelization of sequential specifications for symmetric MPSoCs," in Proc. IESS07 - International Embedded Systems Symposium 2007, pp. 179-192, 2007.
    abstract bibtex
    Abstract: This paper presents an embedded system design toolchain for automatic generation of parallel code runnable on symmetric multiprocessor systems from an initial sequential specification written using the C language. We show how the initial C specification is translated in a modified system dependence graph with feedback edges (FSDG) composing the intermediate representation which is manipulated by the algorithm. Then we describe how this graph is partitioned and optimized: at the end of the process each partition (cluster of nodes) represents a different task. The parallel C code produced is such that the tasks can be dynamically scheduled on the target architecture; this is obtained thanks to the introduction of start conditions for each task. We present the experimental results obtained by applying our flow on the sequential code of the ADPCM and JPEG algorithms and by running the parallel specification, produced by the toolchain, on the target platform: with respect to the sequential specification, speedups up to 70% and 42% were obtained for the two benchmarks respectively.

    @inproceedings {FERRANDI2007:iess,
      abstract = {This paper presents an embedded system design toolchain for automatic generation of parallel code runnable on symmetric multiprocessor systems from an initial sequential specification written using the C language. We show how the initial C specification is translated in a modified system dependence graph with feedback edges (FSDG) composing the intermediate representation which is manipulated by the algorithm. Then we describe how this graph is partitioned and optimized: at the end of the process each partition (cluster of nodes) represents a different task. The parallel C code produced is such that the tasks can be dynamically scheduled on the target architecture; this is obtained thanks to the introduction of start conditions for each task. We present the experimental results obtained by applying our flow on the sequential code of the ADPCM and JPEG algorithms and by running the parallel specification, produced by the toolchain, on the target platform: with respect to the sequential specification, speedups up to 70% and 42% were obtained for the two benchmarks respectively.},
      location = {Irvine, CA, USA},
      year = {2007},
      month = {May 30 - June 1},
      pages = {179-192},
      booktitle = {Proc. IESS07 - International Embedded Systems Symposium 2007},
      title = {Automatic parallelization of sequential specifications for symmetric MPSoCs},
      yy = {2007},
      mm = {0},
      author = {Fabrizio Ferrandi and Luca Fossati and Marco Lattuada and Gianluca Palermo and Donatella Sciuto and Antonino Tumeo} }
  • F. Ferrandi, L. Fossati, M. Lattuada, G. Palermo, D. Sciuto and A. Tumeo, "Partitioning and Mapping for the hArtes European Project," in Workshop on Directions in FPGAs and Reconfigurable Systems: Design, Programming and Technologies for adaptive heterogeneous Systems on Chip and their European Dimensions, 2007.
    abstract bibtex
    Abstract: The hArtes - Holistic Approach to Reconfigurale real Time Embedded Systems - project has three main objectives: the development of a toolchain and a methodology supporting effective automatic or semi-automatic design of complex heterogeneous embedded systems, the design of a scalable heterogeneous and reconfigurable hardware platform and the validation of the tool chain on a set of innovative applications in the audio and video field. This paper presents the ongoing works related to hArtes at Politecnico di Milano. Our role consists in the development of innovative methodologies and algorithms for software partitioning and for initial mapping of the resulting partitions on reconfigurable multiprocessor platforms. The development of these methodologies was integrated in PandA, our framework for hardware-software codesing; several other related were developed as an aid for the testing of the implemented technologies.

    @inproceedings {FERRANDI2007:DATE,
      abstract = {The hArtes - Holistic Approach to Reconfigurale real Time Embedded Systems - project has three main objectives: the development of a toolchain and a methodology supporting effective automatic or semi-automatic design of complex heterogeneous embedded systems, the design of a scalable heterogeneous and reconfigurable hardware platform and the validation of the tool chain on a set of innovative applications in the audio and video field. This paper presents the ongoing works related to hArtes at Politecnico di Milano. Our role consists in the development of innovative methodologies and algorithms for software partitioning and for initial mapping of the resulting partitions on reconfigurable multiprocessor platforms. The development of these methodologies was integrated in PandA, our framework for hardware-software codesing; several other related were developed as an aid for the testing of the implemented technologies.},
      location = {held during DATE '07, Nice, France},
      year = {2007},
      month = {April 20},
      booktitle = {Workshop on Directions in FPGAs and Reconfigurable Systems: Design, Programming and Technologies for adaptive heterogeneous Systems on Chip and their European Dimensions},
      title = {Partitioning and Mapping for the hArtes European Project},
      yy = {2007},
      mm = {0},
      author = {Fabrizio Ferrandi and Luca Fossati and Marco Lattuada and Gianluca Palermo and Donatella Sciuto and Antonino Tumeo} }
  • F. Ferrandi, P. L. Lanzi, G. Palermo, C. Pilato, D. Sciuto and A. Tumeo, "An Evolutionary Approach to Area-Time Optimization of FPGA designs," in Proc. IEEE IC-SAMOS 2007 - Int. Conf. SAMOS VII: Embedded Computer Systems: Architectures, MOdeling, and Simulation, pp. 145-152, 2007.
    abstract bibtex
    Abstract: This paper presents a new methodology based on evolutionary multi-objective optimization (EMO) to synthesize multiple complex modules on programmable devices (FPGAs). It starts from a behavioral description written in a common high-level language (for instance C) to automatically produce the register-transfer level (RTL) design in a hardware description language (e.g. Verilog). Since all high-level synthesis problems (scheduling, allocation and binding) are notoriously NP-complete and interdependent, the three problems should be considered simultaneously. This drives to a wide design space, that needs to be thoroughly explored to obtain solutions able to satisfy the design constraints. Evolutionary algorithms are good candidates to tackle such complex explorations. In this paper we provide a solution based on the Non-dominated Sorting Genetic Algorithm (NSGA-II) to explore the design space in order obtain the best solutions in terms of performance given the area constraints of a target FPGA device. Moreover, it has been integrated a good cost estimation model to guarantee the quality of the solutions found without requiring a complete synthesis for the validation of each generation, an impractical and time consuming operation. We show on the JPEG case study that the proposed approach provides good results in terms of trade-off between total area occupied and execution time.

    @inproceedings {FERRANDI2007:samos,
      abstract = {This paper presents a new methodology based on evolutionary multi-objective optimization (EMO) to synthesize multiple complex modules on programmable devices (FPGAs). It starts from a behavioral description written in a common high-level language (for instance C) to automatically produce the register-transfer level (RTL) design in a hardware description language (e.g. Verilog). Since all high-level synthesis problems (scheduling, allocation and binding) are notoriously NP-complete and interdependent, the three problems should be considered simultaneously. This drives to a wide design space, that needs to be thoroughly explored to obtain solutions able to satisfy the design constraints. Evolutionary algorithms are good candidates to tackle such complex explorations. In this paper we provide a solution based on the Non-dominated Sorting Genetic Algorithm (NSGA-II) to explore the design space in order obtain the best solutions in terms of performance given the area constraints of a target FPGA device. Moreover, it has been integrated a good cost estimation model to guarantee the quality of the solutions found without requiring a complete synthesis for the validation of each generation, an impractical and time consuming operation. We show on the JPEG case study that the proposed approach provides good results in terms of trade-off between total area occupied and execution time.},
      isbn = {1-4244-1058-4},
      location = {Samos, Greece},
      publisher = {IEEE},
      year = {2007},
      month = {July 16-19},
      pages = {145-152},
      booktitle = {Proc. IEEE IC-SAMOS 2007 - Int. Conf. SAMOS VII: Embedded Computer Systems: Architectures, MOdeling, and Simulation},
      title = {An Evolutionary Approach to Area-Time Optimization of FPGA designs},
      yy = {2007},
      mm = {0},
      author = {Fabrizio Ferrandi and Pier Luca Lanzi and Gianluca Palermo and Christian Pilato and Donatella Sciuto and Antonino Tumeo} }
  • C. Pilato, G. Palermo, A. Tumeo, F. Ferrandi, D. Sciuto and P. Luca Lanzi, "Fitness inheritance in evolutionary and multi-objective high-level synthesis," in Proc. IEEE CEC 2007 - Congress on Evolutionary Computation, pp. 3459-3466, 2007.
    abstract bibtex
    Abstract: The high-level synthesis process allows the automatic design and implementation of digital circuits starting from a behavioral description. Evolutionary algorithms are very widely adopted to approach this problem or just part of it. Neverthless, some concerns regarding execution times exist. In evolutionary high-level synthesis, design solutions have to be evaluated to extract information about some figures of merit (such as performance, area, etc.) and to allow the genetic algorithm to evolve and converge to Pareto-optimal solutions. Since the execution time of such evaluations increases with the complexity of the specification, the overall methodology could lead to unacceptable execution time. This paper presents a model to exploit fitness inheritance in a multi-objective optimization algorithm (i.e. NSGA-II) by substituting the expensive real evaluations with estimations based on closeness in an hypothetical design space. The estimations are based on the measure of the distance between individuals and a weighted average of the fitnesses of the closest ones. The results shows that the Pareto-optimal set obtained by applying the proposed model well approximates the set obtained without fitness inheritance. Moreover, the overall execution time is reduced up to the 25% in average.

    @inproceedings {PILATO2007:cec,
      abstract = {The high-level synthesis process allows the automatic design and implementation of digital circuits starting from a behavioral description. Evolutionary algorithms are very widely adopted to approach this problem or just part of it. Neverthless, some concerns regarding execution times exist. In evolutionary high-level synthesis, design solutions have to be evaluated to extract information about some figures of merit (such as performance, area, etc.) and to allow the genetic algorithm to evolve and converge to Pareto-optimal solutions. Since the execution time of such evaluations increases with the complexity of the specification, the overall methodology could lead to unacceptable execution time. This paper presents a model to exploit fitness inheritance in a multi-objective optimization algorithm (i.e. NSGA-II) by substituting the expensive real evaluations with estimations based on closeness in an hypothetical design space. The estimations are based on the measure of the distance between individuals and a weighted average of the fitnesses of the closest ones. The results shows that the Pareto-optimal set obtained by applying the proposed model well approximates the set obtained without fitness inheritance. Moreover, the overall execution time is reduced up to the 25% in average.},
      issn = {},
      location = {Singapore},
      year = {2007},
      month = {September 25-28},
      pages = {3459-3466},
      number = {},
      volume = {},
      booktitle = {Proc. IEEE CEC 2007 - Congress on Evolutionary Computation},
      title = {Fitness inheritance in evolutionary and multi-objective high-level synthesis},
      yy = {2007},
      mm = {0},
      author = {Christian Pilato and Gianluca Palermo and Antonino Tumeo and Fabrizio Ferrandi and Donatella Sciuto and Pier Luca Lanzi} }
  • A. P. Rosiello, F. Ferrandi, D. Pandini and D. Sciuto, "A Hash-based Approach for Functional Regularity Extraction During Logic Synthesis," in Proc. ISVLSI 2007 - IEEE Computer Society Annual Symposium on VLSI, pp. 92-97, 2007.
    abstract bibtex
    Abstract: Performance, power, and functionality, yield and manufacturability are rapidly becoming additional critical factors that must be considered at higher levels of abstraction. A possible solution to improve yield and manufacturability is based on the detection of regularity at logic level. This paper focuses its attention on regularity extraction, after technology independent logic synthesis, to detect recurring functionalities during logic synthesis and thus constraining the physical design phase to exploit the regular netlist produced. A fast heuristic to the template identification is proposed and analyzed on a standard set of benchmarks both sequential and combinational.

    @inproceedings {ROSIELLO2007:ISVLSI,
      abstract = {Performance, power, and functionality, yield and manufacturability are rapidly becoming additional critical factors that must be considered at higher levels of abstraction. A possible solution to improve yield and manufacturability is based on the detection of regularity at logic level. This paper focuses its attention on regularity extraction, after technology independent logic synthesis, to detect recurring functionalities during logic synthesis and thus constraining the physical design phase to exploit the regular netlist produced. A fast heuristic to the template identification is proposed and analyzed on a standard set of benchmarks both sequential and combinational.},
      isbn = {0-7695-2896-1},
      location = {Porto Allegre,Brasil},
      address = {Washington, DC, USA},
      publisher = {IEEE Computer Society},
      year = {2007},
      month = {May 09 - 11},
      pages = {92--97},
      booktitle = {Proc. ISVLSI 2007 - IEEE Computer Society Annual Symposium on VLSI},
      title = {A Hash-based Approach for Functional Regularity Extraction During Logic Synthesis},
      yy = {2007},
      mm = {0},
      author = {Angelo P.E. Rosiello and Fabrizio Ferrandi and Davide Pandini and Donatella Sciuto} }

2006

  • F. Bruschi and F. Ferrandi, "A SystemC based framework for the early evaluation of communication architectures," in Proc. FDL'06 -- Forum on Specification & Design Languages, pp. 319-326, 2006. bibtex
    @inproceedings {bruschi06fdl06,
      abstract = {},
      location = {Darmstadt, Germany},
      year = {2006},
      month = {September 19-22},
      pages = {319-326},
      booktitle = {Proc. FDL'06 -- Forum on Specification & Design Languages},
      title = {A SystemC based framework for the early evaluation of communication architectures},
      yy = {2006},
      mm = {0},
      author = {F. Bruschi and F. Ferrandi} }
  • R. Cordone, F. Ferrandi, M. Santambrogio, G. Palermo and D. Sciuto, "Using speculative computation and parallelizing techniques to improve scheduling of control based designs," in Proc. IEEE ASP-DAC '06 - Conference on Asia South Pacific design automation, pp. 898-904, 2006.
    abstract bibtex
    Abstract: Recent research results have seen the application of parallelizing techniques to high-level synthesis. In particular, the effect of speculative code transformations on mixed control-data flow designs has demonstrated effective results on schedule lengths. In this paper we first analyze the use of the control and data dependence graph as an intermediate representation that provides the possibility of extracting the maximum parallelism. Then we analyze the scheduling problem by formulating an approach based on Integer Linear Programming (ILP) to minimize the number of control steps given the amount of resources. We improve the already proposed ILP scheduling approaches by introducing a new conditional resource sharing constraint which is then extended to the case of speculative computation. The ILP formulation has been solved by using a Branch and Cut framework which provides better results than standard branch and bound techniques.

    @inproceedings {Cordone2006,
      abstract = {Recent research results have seen the application of parallelizing techniques to high-level synthesis. In particular, the effect of speculative code transformations on mixed control-data flow designs has demonstrated effective results on schedule lengths. In this paper we first analyze the use of the control and data dependence graph as an intermediate representation that provides the possibility of extracting the maximum parallelism. Then we analyze the scheduling problem by formulating an approach based on Integer Linear Programming (ILP) to minimize the number of control steps given the amount of resources. We improve the already proposed ILP scheduling approaches by introducing a new conditional resource sharing constraint which is then extended to the case of speculative computation. The ILP formulation has been solved by using a Branch and Cut framework which provides better results than standard branch and bound techniques.},
      isbn = {0-7803-9451-8},
      location = {Yokohama, Japan},
      address = {Piscataway, NJ, USA},
      publisher = {IEEE Press},
      year = {2006},
      month = {24-27 Jan.},
      pages = {898--904},
      booktitle = {Proc. IEEE ASP-DAC '06 - Conference on Asia South Pacific design automation},
      title = {Using speculative computation and parallelizing techniques to improve scheduling of control based designs},
      yy = {2006},
      mm = {0},
      author = {R. Cordone and F. Ferrandi and M.D. Santambrogio and G. Palermo and D. Sciuto} }

2004

  • F. Ferrandi, P. Lanzi, D. Sciuto and M. Tanelli, "System-level metrics for hardware/software architectural mapping," in Proc. DELTA 2004 -- Second IEEE International Workshop on Electronic Design, Test and Applications, pp. 231-236, 2004. bibtex
    @inproceedings {Ferrandi2004,
      abstract = {},
      location = {Perth, Australia},
      year = {2004},
      month = {January 28-30},
      pages = {231--236},
      booktitle = {Proc. DELTA 2004 -- Second IEEE International Workshop on Electronic Design, Test and Applications},
      title = {System-level metrics for hardware/software architectural mapping},
      yy = {2004},
      mm = {0},
      author = {F. Ferrandi and P. Lanzi and D. Sciuto and M. Tanelli} }
  • F. Ferrandi, P. L. Lanzi and D. Sciuto, "System Level Hardware--Software Design Exploration with XCS," in Proc. GECCO-2004 -- Genetic and Evolutionary Computation, LNCS series, pp. 763-773, 2004. bibtex
    @inproceedings {lanzi:2004:gecco:codesign,
      abstract = {},
      location = {Seattle, WA, USA},
      publisher = {Springer-Verlag},
      year = {2004},
      month = {June 26-30},
      series = {LNCS},
      pages = {763-773},
      booktitle = {Proc. GECCO-2004 -- Genetic and Evolutionary Computation},
      title = {System Level Hardware--Software Design Exploration with {XCS}},
      yy = {2004},
      mm = {0},
      author = {Fabrizio Ferrandi and Pier Luca Lanzi and Donatella Sciuto} }

2003

  • F. Ferrandi, P. Lanzi and D. Sciuto, "Mining interesting patterns from hardware-software codesign data with the learning classifier system XCS," in Proc. IEEE CEC 2003 - Congress on Evolutionary Computation, Vol. 2, pp. 1486-1492Vol.2, 2003. bibtex
    @inproceedings {Ferrandi2003a,
      abstract = {},
      year = {2003},
      month = {8-12 Dec.},
      pages = {1486--1492Vol.2},
      volume = {2},
      booktitle = {Proc. IEEE CEC 2003 - Congress on Evolutionary Computation},
      title = {Mining interesting patterns from hardware-software codesign data with the learning classifier system {XCS}},
      yy = {2003},
      mm = {0},
      author = {F. Ferrandi and P.L. Lanzi and D. Sciuto} }

Leave a Reply