Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/EPPlus/FormulaParsing/Excel/Functions/BuiltInFunctions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@ public BuiltInFunctions()
Functions["usdollar"] = new UsDollar();
Functions["encodeurl"] = new EncodeUrl();
Functions["code"] = new CodeFunction();
Functions["regextest"] = new RegexTest();
Functions["regexextract"] = new RegexExtract();
Functions["regexreplace"] = new RegexReplace();
Functions["textsplit"] = new TextSplit();
Functions["textbefore"] = new TextBefore(DelimiterFunction.TextBefore);
Functions["textafter"] = new TextAfter(DelimiterFunction.TextAfter);
Expand Down
148 changes: 148 additions & 0 deletions src/EPPlus/FormulaParsing/Excel/Functions/Text/RegexExtract.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
using OfficeOpenXml.FormulaParsing.Excel.Functions.MathFunctions;
using OfficeOpenXml.FormulaParsing.Excel.Functions.RefAndLookup;
using OfficeOpenXml.FormulaParsing.FormulaExpressions;
using OfficeOpenXml.FormulaParsing.Ranges;
using OfficeOpenXml.RichData.IndexRelations;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Runtime.CompilerServices;
using System.Text;
using System.Text.RegularExpressions;

namespace OfficeOpenXml.FormulaParsing.Excel.Functions.Text
{
internal class RegexExtract : RegexFunctionBase
{
public override int ArgumentMinLength => 2;

public override string NamespacePrefix => "_xlfn.";

public override CompileResult Execute(IList<FunctionArgument> arguments, ParsingContext context)
{
bool textIsRange = arguments[0].IsExcelRange;
bool patternIsRange = arguments[1].IsExcelRange;
int returnMode = arguments.Count > 2 ? ArgToInt(arguments, 2, 0) : 0;
int caseSensitivity = arguments.Count > 3 ? ArgToInt(arguments, 3, 0) : 0;

if (!textIsRange && !patternIsRange)
{
var text = arguments[0].Value?.ToString();
var pattern = arguments[1].Value?.ToString();

if (text == null || pattern == null)
return CreateResult(ExcelErrorValue.Create(eErrorType.NA), DataType.ExcelError);
if (caseSensitivity > 1 || caseSensitivity < 0 || returnMode < 0 || returnMode > 3)
return CreateResult(ExcelErrorValue.Create(eErrorType.Value), DataType.ExcelError);

if (returnMode == 1)
{
var matches = GetMatches(text, pattern, caseSensitivity);
if (matches.Length == 0)
return CreateResult(ExcelErrorValue.Create(eErrorType.NA), DataType.ExcelError);

var arr = new InMemoryRange((short)1, (short)matches.Length);
for (int i = 0; i < matches.Length; i++)
arr.SetValue(0, i, matches[i]);

return CreateDynamicArrayResult(arr, DataType.ExcelRange);
}
else if (returnMode == 2)
{
var match = Regex.Match(text, pattern, (RegexOptions)caseSensitivity);
if (!match.Success || match.Groups.Count <= 1)
return CreateResult(ExcelErrorValue.Create(eErrorType.NA), DataType.ExcelError);

var groups = match.Groups
.Cast<Group>()
.Skip(1)
.Select(g => g.Value)
.ToArray();

var arr = new InMemoryRange((short)1, (short)groups.Length);
for (int i = 0; i < groups.Length; i++)
arr.SetValue(0, i, groups[i]);

return CreateDynamicArrayResult(arr, DataType.ExcelRange);
}

return CreateResult(GetRegexExtractSingle(text, pattern, caseSensitivity), DataType.String);
}

var texts = textIsRange ? arguments[0].ValueAsRangeInfo : null;
var patterns = patternIsRange ? arguments[1].ValueAsRangeInfo : null;

int textRows = texts != null ? texts.Size.NumberOfRows : 1;
int textCols = texts != null ? texts.Size.NumberOfCols : 1;
int patternRows = patterns != null ? patterns.Size.NumberOfRows : 1;
int patternCols = patterns != null ? patterns.Size.NumberOfCols : 1;

var nRows = ExpandedSize(textRows, patternRows);
var nCols = ExpandedSize(textCols, patternCols);

var result = new InMemoryRange(nRows, nCols);

for (int row = 0; row < nRows; row++)
{
for (int col = 0; col < nCols; col++)
{
var textValue = GetValue(texts, arguments[0], textRows, textCols, row, col);
var patternValue = GetValue(patterns, arguments[1], patternRows, patternCols, row, col);

if (textValue == null || patternValue == null)
result.SetValue(row, col, ExcelErrorValue.Create(eErrorType.NA));
else if (Math.Abs(caseSensitivity) > 1 || Math.Abs(returnMode) > 2)
{
result.SetValue(row, col, ExcelErrorValue.Create(eErrorType.Value));
}
else
{
if(returnMode == 2)
{
var fullMatch = Regex.Match(textValue, patternValue, (RegexOptions)caseSensitivity);
var firstMatch = fullMatch.Groups
.Cast<Group>()
.Skip(1)
.Select(g => g.Value)
.ToArray().First().ToString();
result.SetValue(row, col, firstMatch);
}
else if(returnMode == 1)
{
var firstMatch = GetMatches(textValue, patternValue, caseSensitivity).First().ToString();
result.SetValue(row, col, firstMatch);
}
else
{
var match = GetRegexExtractSingle(textValue, patternValue, caseSensitivity);
if (match == string.Empty)
{
result.SetValue(row, col, ExcelErrorValue.Create(eErrorType.NA));
}
else
{
result.SetValue(row, col, GetRegexExtractSingle(textValue, patternValue, caseSensitivity));
}
}
}
}
}

return CreateDynamicArrayResult(result, DataType.ExcelRange);
}

private string[] GetMatches(string text, string pattern, int caseSensitive)
{
return Regex.Matches(text, pattern, (RegexOptions)caseSensitive)
.Cast<System.Text.RegularExpressions.Match>()
.Select(m => m.Value)
.ToArray();
}

private string GetRegexExtractSingle(string text, string pattern, int caseSensitivity)
{
return Regex.Match(text, pattern, (RegexOptions)caseSensitivity).ToString();
}

}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;

namespace OfficeOpenXml.FormulaParsing.Excel.Functions.Text
{
internal abstract class RegexFunctionBase : ExcelFunction
{
protected static string GetValue(
IRangeInfo range,
FunctionArgument scalar,
int nRows, int nCols,
int row, int col)
{
if (range == null)
return scalar.Value?.ToString();

int r = nRows == 1 ? 0 : row;
int c = nCols == 1 ? 0 : col;

if (r >= nRows || c >= nCols)
return null;

return range.GetOffset(r, c)?.ToString();
}

protected static short ExpandedSize(int a, int b)
{
if (a == 1) return (short)b;
if (b == 1) return (short)a;
return (short)Math.Max(a, b);
}
}
}
156 changes: 156 additions & 0 deletions src/EPPlus/FormulaParsing/Excel/Functions/Text/RegexReplace.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
using OfficeOpenXml.FormulaParsing.FormulaExpressions;
using OfficeOpenXml.FormulaParsing.Ranges;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Runtime.CompilerServices;
using System.Text;
using System.Text.RegularExpressions;


namespace OfficeOpenXml.FormulaParsing.Excel.Functions.Text
{
internal class RegexReplace : RegexFunctionBase
{
public override int ArgumentMinLength => 3;

public override string NamespacePrefix => "_xlfn.";

public override CompileResult Execute(IList<FunctionArgument> arguments, ParsingContext context)
{
bool textIsRange = arguments[0].IsExcelRange;
bool patternIsRange = arguments[1].IsExcelRange;
bool replacementIsRange = arguments[2].IsExcelRange;

int occurnance = arguments.Count > 3 ? ArgToInt(arguments, 3, 0) : 0;
int caseSensitive = arguments.Count > 4 ? ArgToInt(arguments, 4, 0) : 0;

if (!textIsRange && !patternIsRange && !replacementIsRange)
{
var text = arguments[0].Value?.ToString() ?? string.Empty;
var pattern = arguments[1].Value?.ToString() ?? string.Empty;
var replacement = arguments[2].Value?.ToString() ?? string.Empty;

if (caseSensitive > 1 || caseSensitive < 0 || (text != null && pattern == string.Empty))
return CreateResult(ExcelErrorValue.Create(eErrorType.Value), DataType.ExcelError);
var res = GetRegexReplaced(text, pattern, replacement, occurnance, caseSensitive);
if (res == null)
return CreateResult(ExcelErrorValue.Create(eErrorType.Value), DataType.ExcelError);
return CreateResult(res, DataType.String);
}

var texts = textIsRange ? arguments[0].ValueAsRangeInfo : null;
var patterns = patternIsRange ? arguments[1].ValueAsRangeInfo : null;
var replacements = replacementIsRange ? arguments[2].ValueAsRangeInfo : null;

int textRows = texts != null ? texts.Size.NumberOfRows : 1;
int textCols = texts != null ? texts.Size.NumberOfCols : 1;
int patternRows = patterns != null ? patterns.Size.NumberOfRows : 1;
int patternCols = patterns != null ? patterns.Size.NumberOfCols : 1;
int replacementsRows = replacements != null ? replacements.Size.NumberOfRows : 1;
int replacementsCols = replacements != null ? replacements.Size.NumberOfCols : 1;


var nRows = ExpandedSizeRegexReplace(textRows, patternRows, replacementsRows);
var nCols = ExpandedSizeRegexReplace(textCols, patternCols, replacementsCols);

var result = new InMemoryRange(nRows, nCols);

for (int row = 0; row < nRows; row++)
{
for (int col = 0; col < nCols; col++)
{
var textValue = GetRegexReplaceValue(texts, arguments[0], textRows, textCols, row, col);
var patternValue = GetRegexReplaceValue(patterns, arguments[1], patternRows, patternCols, row, col);
var replacementValue = GetRegexReplaceValue(replacements, arguments[2], replacementsRows, replacementsCols, row, col) ?? string.Empty;

if (textValue != null && patternValue == null)
{
result.SetValue(row, col, ExcelErrorValue.Create(eErrorType.Value));
}
else if (textValue == null || patternValue == null)
{
result.SetValue(row, col, ExcelErrorValue.Create(eErrorType.NA));
}
else if (caseSensitive > 1 || caseSensitive < 0)
{
result.SetValue(row, col, ExcelErrorValue.Create(eErrorType.Value));
}
else
{
var val = GetRegexReplaced(textValue, patternValue, replacementValue, occurnance, caseSensitive);
if (val == null)
result.SetValue(row, col, ExcelErrorValue.Create(eErrorType.Value));
else
result.SetValue(row, col, val);
}
}
}

return CreateDynamicArrayResult(result, DataType.ExcelRange);
}

private short ExpandedSizeRegexReplace(int a, int b, int c)
{
return (short)Math.Max(a, Math.Max(b,c));
}
private string GetRegexReplaced(string text, string pattern, string replacement, int occurnance, int caseSensitive)
{
if (HasInvalidBackreference(pattern, replacement, (RegexOptions)caseSensitive))
return null;

if (Math.Abs(occurnance) > 0)
{
var allReplaceMatches = Regex.Matches(text, pattern, (RegexOptions)caseSensitive);
var targetIndex = occurnance > 0 ? occurnance - 1
: allReplaceMatches.Count + occurnance; // search from end
if(targetIndex < 0 || targetIndex >= allReplaceMatches.Count)
{
return text;
}
var targetMatch = allReplaceMatches[targetIndex];
return text.Substring(0, targetMatch.Index)
+ targetMatch.Result(replacement)
+ text.Substring(targetMatch.Index + targetMatch.Length);
}
else
{
return Regex.Replace(text, pattern, replacement, (RegexOptions)caseSensitive);
}
}

private static string GetRegexReplaceValue(
IRangeInfo range,
FunctionArgument scalar,
int argRows, int argCols,
int row, int col)
{
if (range == null)
return scalar.Value?.ToString() ?? string.Empty;

int r = argRows == 1 ? 0 : row;
int c = argCols == 1 ? 0 : col;

if (r >= argRows || c >= argCols)
return null;

return range.GetOffset(r, c)?.ToString() ?? string.Empty;
}


private static bool HasInvalidBackreference(string pattern, string replacement, RegexOptions options)
{
if (string.IsNullOrEmpty(replacement))
return false;

int maxGroup = new Regex(pattern, options).GetGroupNumbers().Max();

foreach (Match m in Regex.Matches(replacement, @"(?<!\$)\$(\d+)"))
{
if (int.TryParse(m.Groups[1].Value, out int refNum) && refNum > maxGroup)
return true;
}
return false;
}
}
}
Loading
Loading