ansaurus

Question

Answer 1

+1 A:

It looks pretty lean to me. The only thing I can think of would be to parallelize this. If I get a chance I will try to work something out and edit my answer.

Here is how to limit the depth.

private static void ConvertToUpper(object entity, Hashtable visited, int depth)
{
  if (depth > MAX_DEPTH) return;

  // Omitted code for brevity.

  // Example usage here.
  ConvertToUppder(..., ..., depth + 1);
}

Brian Gideon 2010-07-09 16:35:13

Answer 2

+1 A:

Here is a blog of code that should work to apply the Max Depth limit that Brian Gideon mentioned as well as parallel things a bit. It's not perfect and could be refined a bit since I broke the value types and non-value type properties into 2 linq queries.

private static void ConvertToUpper(object entity, Hashtable visited, int depth)
        {
             if (entity == null || visited.ContainsKey(entity) || depth > MAX_DEPTH)
            {
                return;
            }

            visited.Add(entity, entity);

            var properties = from p in entity.GetType().GetProperties()
                                         where p.CanRead &&
                                                    p.CanWrite &&
                                                    p.PropertyType == typeof(string) &&
                                                    !p.Name.Contains("password") &&
                                                    p.GetValue(entity, null) != null
                                         select p;

            Parallel.ForEach(properties, (p) =>
            {
                p.SetValue(entity, ((string)p.GetValue(entity, null)).ToUpper(), null);
            });

            var valProperties = from p in entity.GetType().GetProperties()
                             where p.CanRead &&
                                        p.CanWrite &&
                                        !p.PropertyType.IsValueType &&
                                        !p.Name.Contains("password") &&
                                        p.GetValue(entity, null) != null 
                             select p;

            Parallel.ForEach(valProperties, (p) =>
            {
                if (p.GetValue(entity, null) as IEnumerable != null)
                {
                    foreach(var value in p.GetValue(entity, null) as IEnumerable)
                        ConvertToUpper(value, visted, depth +1);
                }
                else
                {
                    ConvertToUpper(p, visited, depth +1);
                }
            });
        }

JamesEggers 2010-07-09 16:52:31

+1: Very cool. That saved me the trouble!

Brian Gideon 2010-07-09 16:55:59

Answer 3

+1 A:

Sam 2010-07-09 17:09:17

Answer 4

+1 A:

There are a couple of immediate issues:

There is repeated evaluation of property information for what I am assuming are the same types.
Reflection is comparatively slow.

Issue 1. can be solved by memoizing property information about types and caching it so it does not have to be re-calculated for each recurring type we see.

Performance of issue 2. can be helped out by using IL code generation and dynamic methods. I grabbed code from here to implement dynamically (and also memoized from point 1.) generated and highly efficient calls for getting and setting property values. Basically IL code is dynamically generated to call set and get for a property and encapsulated in a method wrapper - this bypasses all the reflection steps (and some security checks...). Where the following code refers to "DynamicProperty" I have used the code from the previous link.

This method can also be parallelized as suggested by others, just ensure the "visited" cache and calculated properties cache are synchronized.

private static readonly Dictionary<Type, List<ProperyInfoWrapper>> _typePropertyCache = new Dictionary<Type, List<ProperyInfoWrapper>>();

private class ProperyInfoWrapper
{
    public GenericSetter PropertySetter { get; set; }
    public GenericGetter PropertyGetter { get; set; }
    public bool IsString { get; set; }
    public bool IsEnumerable { get; set; }
}

private static void ConvertToUpper(object entity, Hashtable visited)
{
    if (entity != null && !visited.Contains(entity))
    {
        visited.Add(entity, entity);

        foreach (ProperyInfoWrapper wrapper in GetMatchingProperties(entity))
        {
            object propertyValue = wrapper.PropertyGetter(entity);

            if(propertyValue == null) continue;

            if (wrapper.IsString)
            {
                wrapper.PropertySetter(entity, (((string)propertyValue).ToUpper()));
                continue;
            }

            if (wrapper.IsEnumerable)
            {
                IEnumerable enumerable = (IEnumerable)propertyValue;

                foreach (object value in enumerable)
                {
                    ConvertToUpper(value, visited);
                }
            }
            else
            {
                ConvertToUpper(propertyValue, visited);
            }
        }
    }
}

private static IEnumerable<ProperyInfoWrapper> GetMatchingProperties(object entity)
{
    List<ProperyInfoWrapper> matchingProperties;

    if (!_typePropertyCache.TryGetValue(entity.GetType(), out matchingProperties))
    {
        matchingProperties = new List<ProperyInfoWrapper>();

        foreach (PropertyInfo propertyInfo in entity.GetType().GetProperties())
        {
            if (!propertyInfo.CanRead || !propertyInfo.CanWrite)
                continue;

            if (propertyInfo.PropertyType == typeof(string))
            {
                if (!propertyInfo.Name.Contains("password"))
                {
                    ProperyInfoWrapper wrapper = new ProperyInfoWrapper
                    {
                        PropertySetter = DynamicProperty.CreateSetMethod(propertyInfo),
                        PropertyGetter = DynamicProperty.CreateGetMethod(propertyInfo),
                        IsString = true,
                        IsEnumerable = false
                    };

                    matchingProperties.Add(wrapper);
                    continue;
                }
            }

            if (!propertyInfo.PropertyType.IsValueType)
            {
                object propertyValue = propertyInfo.GetValue(entity, null);

                bool isEnumerable = (propertyValue as IEnumerable) != null;

                ProperyInfoWrapper wrapper = new ProperyInfoWrapper
                {
                    PropertySetter = DynamicProperty.CreateSetMethod(propertyInfo),
                    PropertyGetter = DynamicProperty.CreateGetMethod(propertyInfo),
                    IsString = false,
                    IsEnumerable = isEnumerable
                };

                matchingProperties.Add(wrapper);
            }
        }

        _typePropertyCache.Add(entity.GetType(), matchingProperties);
    }

    return matchingProperties;
}

chibacity 2010-07-09 18:25:22

Answer 5

+1 A:

While your question is about the performance of the code, there is another problem that others seem to miss: Maintainability.

While you might think this is not as important as the performance problem you are having, having code that is more readable and maintainable will make it easier to solve problems with it.

Here is an example of how your code might look like, after a few refactorings:

class HierarchyUpperCaseConverter
{
    private HashSet<object> visited = new HashSet<object>();

    public static void ConvertToUpper(object entity)
    {
        new HierarchyUpperCaseConverter_v1().ProcessEntity(entity);
    }

    private void ProcessEntity(object entity)
    {
        // Don't process null references.
        if (entity == null)
        {
            return;
        }

        // Prevent processing types that already have been processed.
        if (this.visited.Contains(entity))
        {
            return;
        }

        this.visited.Add(entity);

        this.ProcessEntity(entity);
    }

    private void ProcessEntity(object entity)
    {
        var properties = 
            this.GetProcessableProperties(entity.GetType());

        foreach (var property in properties)
        {
            this.ProcessEntityProperty(entity, property);
        }
    }

    private IEnumerable<PropertyInfo> GetProcessableProperties(Type type)
    {
        var properties =
            from property in type.GetProperties()
            where property.CanRead && property.CanWrite
            where !property.PropertyType.IsValueType
            where !(property.Name.Contains("password") &&
                property.PropertyType == typeof(string))
            select property;

        return properties;
    }

    private void ProcessEntityProperty(object entity, PropertyInfo property)
    {
        object value = property.GetValue(entity, null);

        if (value != null)
        {
            if (value is IEnumerable)
            {
                this.ProcessCollectionProperty(value as IEnumerable);
            }
            else if (value is string)
            {
                this.ProcessStringProperty(entity, property, (string)value);
            }
            else
            {
                this.AlterHierarchyToUpper(value);
            }
        }
    }

    private void ProcessCollectionProperty(IEnumerable value)
    {
        foreach (object item in (IEnumerable)value)
        {
            // Make a recursive call.
            this.AlterHierarchyToUpper(item);
        }
    }

    private void ProcessStringProperty(object entity, PropertyInfo property, string value)
    {
        string upperCaseValue = ConvertToUpperCase(value);

        property.SetValue(entity, upperCaseValue, null);
    }

    private string ConvertToUpperCase(string value)
    {
        // TODO: ToUpper is culture sensitive.
        // Shouldn't we use ToUpperInvariant?
        return value.ToUpper();
    }
}

While this code is more than twice as long as your code snippet, it is more maintainable. In the process of refactoring your code I even found a possible bug in your code. This bug is a lot harder to spot in your code. In your code you try to convert all string values to upper case but you don't convert string values that are stored in object properties. Look for instance at the following code.

class A
{
    public object Value { get; set; }
}

var a = new A() { Value = "Hello" };

Perhaps this is exactly what you wanted, but the string "Hello" is not converted to "HELLO" in your code.

Another thing I like to note is that while the only thing I tried to do is make your code more readable, my refactoring seems about 20% faster.

After I refactored the code I tried to improve performance of it, but I found out that it is particularly hard to improve it. While others try to parallelize the code I have to warn about this. Parallelizing the code isn't as easy as others might let you think. There is some synchronization going on between threads (in the form of the 'visited' collection). Don't forget that writing to a collection is not thread-safe. Using a thread-safe version or locking on it might degrade performance again. You will have to test this.

I also found out that the real performance bottleneck is all the reflection (especially the reading of all the property values). The only way to really speed this up is by hard coding the code operations for each and every type, or as others suggested lightweight code generation. However, this is pretty hard and it is questionable whether it is worth the trouble.

I hope you find my refactorings useful and wish you good luck with improving performance.

Steven 2010-07-09 21:26:37

@Steve Using code generation technique described in my answer gave me a 600% perf increase. Couple that with the link to code generation code that is ready to go. Not too hard in this case.

chibacity 2010-07-10 00:13:36

@Chibacity: I did a little test by using the `CreateSetMethod` and `CreateGetMethod` (and caching the creation of them) and got a 300% improvement over my refactored example right away. That's nice indeed. What I was thinking of was generating methods based on a type, instead of a single property. This would be even faster, but is also a lot harder. Of course it depends how much performance you need to squeeze out of it, how much trouble you’re willing going through.

Steven 2010-07-11 10:19:50

@Steve You should read my answer. I do exactly that.

chibacity 2010-07-11 16:26:39

@Chibacity: Not exactly. Your code example iterates a list of properties per type and does casting of the type and the property. I'm talking about going even one step further: Generating a method on a per type basis that calls those properties directly. The generated code could look something like this: `void MyTypeToUpper(MyType entity) { entity.StringProp1 = ToUpper(entity.StringProp1); entity.StringProp2 = ToUpper(entity.StringProp2); entity.StringProp3 = ToUpper(entity.StringProp3); ProcessSubCollection(entity.Children); ProcessEntity(entity.Parent); }`

Steven 2010-07-11 19:13:37

Answer 6

+2 A:

I didn't profile the following code, but it must be very performant on complex structures.

1) Uses dynamic code generation.

2) Uses type-based cache for generated dynamic delegates.

public class VisitorManager : HashSet<object>
{
  delegate void Visitor(VisitorManager manager, object entity);

  Dictionary<Type, Visitor> _visitors = new Dictionary<Type, Visitor>();

  void ConvertToUpperEnum(IEnumerable entity)
  {
    // TODO: this can be parallelized, but then we should thread-safe lock the cache 
    foreach (var obj in entity)
      ConvertToUpper(obj);
  }

  public void ConvertToUpper(object entity)
  {
    if (entity != null && !Contains(entity))
    {
      Add(entity);

      var visitor = GetCachedVisitor(entity.GetType());

      if (visitor != null)
        visitor(this, entity);
    }
  }

  Type _lastType;
  Visitor _lastVisitor;

  Visitor GetCachedVisitor(Type type)
  {
    if (type == _lastType)
      return _lastVisitor;

    _lastType = type;

    return _lastVisitor = GetVisitor(type);
  }

  Visitor GetVisitor(Type type)
  {
    Visitor result;

    if (!_visitors.TryGetValue(type, out result))
      _visitors[type] = result = BuildVisitor(type);

    return result;
  }

  static MethodInfo _toUpper = typeof(string).GetMethod("ToUpper", new Type[0]);
  static MethodInfo _convertToUpper = typeof(VisitorManager).GetMethod("ConvertToUpper", BindingFlags.Instance | BindingFlags.Public);
  static MethodInfo _convertToUpperEnum = typeof(VisitorManager).GetMethod("ConvertToUpperEnum", BindingFlags.Instance | BindingFlags.NonPublic);

  Visitor BuildVisitor(Type type)
  {
    var visitorManager = Expression.Parameter(typeof(VisitorManager), "manager");
    var entityParam = Expression.Parameter(typeof(object), "entity");

    var entityVar = Expression.Variable(type, "e");
    var cast = Expression.Assign(entityVar, Expression.Convert(entityParam, type));  // T e = (T)entity;

    var statements = new List<Expression>() { cast };

    foreach (var prop in type.GetProperties())
    {
      // if cannot read or cannot write - ignore property
      if (!prop.CanRead || !prop.CanWrite) continue;

      var propType = prop.PropertyType;

      // if property is value type - ignore property
      if (propType.IsValueType) continue;

      var isString = propType == typeof(string);

      // if string type but no password in property name - ignore property
      if (isString && !prop.Name.Contains("password"))
        continue;

      #region e.Prop

      var propAccess = Expression.Property(entityVar, prop); // e.Prop

      #endregion

      #region T value = e.Prop

      var value = Expression.Variable(propType, "value");
      var assignValue = Expression.Assign(value, propAccess);

      #endregion

      if (isString)
      {
        #region if (value != null) e.Prop = value.ToUpper();

        var ifThen = Expression.IfThen(Expression.NotEqual(value, Expression.Constant(null, typeof(string))),
           Expression.Assign(propAccess, Expression.Call(value, _toUpper)));

        #endregion

        statements.Add(Expression.Block(new[] { value }, assignValue, ifThen));
      }
      else
      {
        #region var i = value as IEnumerable;

        var enumerable = Expression.Variable(typeof(IEnumerable), "i");

        var assignEnum = Expression.Assign(enumerable, Expression.TypeAs(value, enumerable.Type));

        #endregion

        #region if (i != null) manager.ConvertToUpperEnum(i); else manager.ConvertToUpper(value);

        var ifThenElse = Expression.IfThenElse(Expression.NotEqual(enumerable, Expression.Constant(null)),
         Expression.Call(visitorManager, _convertToUpperEnum, enumerable),
         Expression.Call(visitorManager, _convertToUpper, value));

        #endregion

        statements.Add(Expression.Block(new[] { value, enumerable }, assignValue, assignEnum, ifThenElse));
      }
    }

    // no blocks 
    if (statements.Count <= 1)
      return null;

    return Expression.Lambda<Visitor>(Expression.Block(new[] { entityVar }, statements), visitorManager, entityParam).Compile();
  }
}

LexL 2010-07-10 11:45:15

Please note that `Expression.Assign` is new in .NET 4.0.

Steven 2010-07-14 06:15:23

ansaurus

tags:

views:

answers:

How to Optimize this method

related questions